Home | History | Annotate | Download | only in regex
      1 /* Extended regular expression matching and search library, version
      2    0.12.  (Implements POSIX draft P10003.2/D11.2, except for
      3    internationalization features.)
      4 
      5    Copyright (C) 1993, 1994, 1995, 1996 Free Software Foundation, Inc.
      6 
      7    This program is free software; you can redistribute it and/or modify
      8    it under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 2, or (at your option)
     10    any later version.
     11 
     12    This program is distributed in the hope that it will be useful,
     13    but WITHOUT ANY WARRANTY; without even the implied warranty of
     14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
     15    GNU General Public License for more details.
     16 
     17    You should have received a copy of the GNU General Public License
     18    along with this program; if not, write to the Free Software
     19    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
     20    USA.	 */
     21 
     22 /* AIX requires this to be the first thing in the file. */
     23 #if defined (_AIX) && !defined (REGEX_MALLOC)
     24   #pragma alloca
     25 #endif
     26 
     27 #undef	_GNU_SOURCE
     28 #define _GNU_SOURCE
     29 
     30 #include "cs_config.h"
     31 #include "util/osdep.h"
     32 
     33 #ifdef HAVE_CONFIG_H
     34 #include <config.h>
     35 #endif
     36 
     37 /* We need this for `regex.h', and perhaps for the Emacs include files.	 */
     38 #include <sys/types.h>
     39 
     40 /* This is for other GNU distributions with internationalized messages.	 */
     41 #if HAVE_LIBINTL_H || defined (_LIBC)
     42 # include <libintl.h>
     43 #else
     44 # define gettext(msgid) (msgid)
     45 #endif
     46 
     47 #ifndef gettext_noop
     48 /* This define is so xgettext can find the internationalizable
     49    strings.  */
     50 #define gettext_noop(String) String
     51 #endif
     52 
     53 /* The `emacs' switch turns on certain matching commands
     54    that make sense only in Emacs. */
     55 #ifdef emacs
     56 
     57 #include "lisp.h"
     58 #include "buffer.h"
     59 #include "syntax.h"
     60 
     61 #else  /* not emacs */
     62 
     63 /* If we are not linking with Emacs proper,
     64    we can't use the relocating allocator
     65    even if config.h says that we can.  */
     66 #undef REL_ALLOC
     67 
     68 #if defined (STDC_HEADERS) || defined (_LIBC)
     69 #include <stdlib.h>
     70 #else
     71 char *malloc ();
     72 char *realloc ();
     73 #endif
     74 
     75 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
     76    If nothing else has been done, use the method below.	 */
     77 #ifdef INHIBIT_STRING_HEADER
     78 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
     79 #if !defined (bzero) && !defined (bcopy)
     80 #undef INHIBIT_STRING_HEADER
     81 #endif
     82 #endif
     83 #endif
     84 
     85 /* This is the normal way of making sure we have a bcopy and a bzero.
     86    This is used in most programs--a few other programs avoid this
     87    by defining INHIBIT_STRING_HEADER.  */
     88 #ifndef INHIBIT_STRING_HEADER
     89 #if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
     90 #include <string.h>
     91 #ifndef bcmp
     92 #define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
     93 #endif
     94 #ifndef bcopy
     95 #define bcopy(s, d, n)	memcpy ((d), (s), (n))
     96 #endif
     97 #ifndef bzero
     98 #define bzero(s, n)	memset ((s), 0, (n))
     99 #endif
    100 #else
    101 #include <strings.h>
    102 #endif
    103 #endif
    104 
    105 /* Define the syntax stuff for \<, \>, etc.  */
    106 
    107 /* This must be nonzero for the wordchar and notwordchar pattern
    108    commands in re_match_2.  */
    109 #ifndef Sword
    110 #define Sword 1
    111 #endif
    112 
    113 #ifdef SWITCH_ENUM_BUG
    114 #define SWITCH_ENUM_CAST(x) ((int)(x))
    115 #else
    116 #define SWITCH_ENUM_CAST(x) (x)
    117 #endif
    118 
    119 #ifdef SYNTAX_TABLE
    120 
    121 extern char *re_syntax_table;
    122 
    123 #else /* not SYNTAX_TABLE */
    124 
    125 /* How many characters in the character set.  */
    126 #define CHAR_SET_SIZE 256
    127 
    128 static char re_syntax_table[CHAR_SET_SIZE];
    129 
    130 static void
    131 init_syntax_once ()
    132 {
    133    register int c;
    134    static int done = 0;
    135 
    136    if (done)
    137      return;
    138 
    139    bzero (re_syntax_table, sizeof re_syntax_table);
    140 
    141    for (c = 'a'; c <= 'z'; c++)
    142      re_syntax_table[c] = Sword;
    143 
    144    for (c = 'A'; c <= 'Z'; c++)
    145      re_syntax_table[c] = Sword;
    146 
    147    for (c = '0'; c <= '9'; c++)
    148      re_syntax_table[c] = Sword;
    149 
    150    re_syntax_table['_'] = Sword;
    151 
    152    done = 1;
    153 }
    154 
    155 #endif /* not SYNTAX_TABLE */
    156 
    157 #define SYNTAX(c) re_syntax_table[c]
    158 
    159 #endif /* not emacs */
    160 
    161 /* Get the interface, including the syntax bits.  */
    163 #include "regex.h"
    164 
    165 /* isalpha etc. are used for the character classes.  */
    166 #include <ctype.h>
    167 
    168 /* Jim Meyering writes:
    169 
    170    "... Some ctype macros are valid only for character codes that
    171    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
    172    using /bin/cc or gcc but without giving an ansi option).  So, all
    173    ctype uses should be through macros like ISPRINT...	If
    174    STDC_HEADERS is defined, then autoconf has verified that the ctype
    175    macros don't need to be guarded with references to isascii. ...
    176    Defining IN_CTYPE_DOMAIN to 1 should let any compiler worth its salt
    177    eliminate the && through constant folding."	*/
    178 
    179 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
    180 #define IN_CTYPE_DOMAIN(c) 1
    181 #else
    182 #define IN_CTYPE_DOMAIN(c) isascii(c)
    183 #endif
    184 
    185 #ifdef isblank
    186 #define ISBLANK(c) (IN_CTYPE_DOMAIN (c) && isblank (c))
    187 #else
    188 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
    189 #endif
    190 #ifdef isgraph
    191 #define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isgraph (c))
    192 #else
    193 #define ISGRAPH(c) (IN_CTYPE_DOMAIN (c) && isprint (c) && !isspace (c))
    194 #endif
    195 
    196 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
    197 #define ISDIGIT(c) (IN_CTYPE_DOMAIN (c) && isdigit (c))
    198 #define ISALNUM(c) (IN_CTYPE_DOMAIN (c) && isalnum (c))
    199 #define ISALPHA(c) (IN_CTYPE_DOMAIN (c) && isalpha (c))
    200 #define ISCNTRL(c) (IN_CTYPE_DOMAIN (c) && iscntrl (c))
    201 #define ISLOWER(c) (IN_CTYPE_DOMAIN (c) && islower (c))
    202 #define ISPUNCT(c) (IN_CTYPE_DOMAIN (c) && ispunct (c))
    203 #define ISSPACE(c) (IN_CTYPE_DOMAIN (c) && isspace (c))
    204 #define ISUPPER(c) (IN_CTYPE_DOMAIN (c) && isupper (c))
    205 #define ISXDIGIT(c) (IN_CTYPE_DOMAIN (c) && isxdigit (c))
    206 
    207 #ifndef NULL
    208 #define NULL (void *)0
    209 #endif
    210 
    211 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
    212    since ours (we hope) works properly with all combinations of
    213    machines, compilers, `char' and `unsigned char' argument types.
    214    (Per Bothner suggested the basic approach.)	*/
    215 #undef SIGN_EXTEND_CHAR
    216 #if __STDC__
    217 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
    218 #else  /* not __STDC__ */
    219 /* As in Harbison and Steele.  */
    220 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
    221 #endif
    222 
    223 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
    225    use `alloca' instead of `malloc'.  This is because using malloc in
    226    re_search* or re_match* could cause memory leaks when C-g is used in
    227    Emacs; also, malloc is slower and causes storage fragmentation.  On
    228    the other hand, malloc is more portable, and easier to debug.
    229 
    230    Because we sometimes use alloca, some routines have to be macros,
    231    not functions -- `alloca'-allocated space disappears at the end of the
    232    function it is called in.  */
    233 
    234 #ifdef REGEX_MALLOC
    235 
    236 #define REGEX_ALLOCATE malloc
    237 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
    238 #define REGEX_FREE free
    239 
    240 #else /* not REGEX_MALLOC  */
    241 
    242 /* Emacs already defines alloca, sometimes.  */
    243 #ifndef alloca
    244 
    245 /* Make alloca work the best possible way.  */
    246 #ifdef __GNUC__
    247 #define alloca __builtin_alloca
    248 #else /* not __GNUC__ */
    249 #if HAVE_ALLOCA_H
    250 #include <alloca.h>
    251 #else /* not __GNUC__ or HAVE_ALLOCA_H */
    252 #if 0 /* It is a bad idea to declare alloca.  We always cast the result.  */
    253 #ifndef _AIX /* Already did AIX, up at the top.	 */
    254 char *alloca ();
    255 #endif /* not _AIX */
    256 #endif
    257 #endif /* not HAVE_ALLOCA_H */
    258 #endif /* not __GNUC__ */
    259 
    260 #endif /* not alloca */
    261 
    262 #define REGEX_ALLOCATE alloca
    263 
    264 /* Assumes a `char *destination' variable.  */
    265 #define REGEX_REALLOCATE(source, osize, nsize)				\
    266   (destination = (char *) alloca (nsize),				\
    267    bcopy (source, destination, osize),					\
    268    destination)
    269 
    270 /* No need to do anything to free, after alloca.  */
    271 #define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
    272 
    273 #endif /* not REGEX_MALLOC */
    274 
    275 /* Define how to allocate the failure stack.  */
    276 
    277 #if defined (REL_ALLOC) && defined (REGEX_MALLOC)
    278 
    279 #define REGEX_ALLOCATE_STACK(size)				\
    280   r_alloc (&failure_stack_ptr, (size))
    281 #define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
    282   r_re_alloc (&failure_stack_ptr, (nsize))
    283 #define REGEX_FREE_STACK(ptr)					\
    284   r_alloc_free (&failure_stack_ptr)
    285 
    286 #else /* not using relocating allocator */
    287 
    288 #ifdef REGEX_MALLOC
    289 
    290 #define REGEX_ALLOCATE_STACK malloc
    291 #define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
    292 #define REGEX_FREE_STACK free
    293 
    294 #else /* not REGEX_MALLOC */
    295 
    296 #define REGEX_ALLOCATE_STACK alloca
    297 
    298 #define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
    299    REGEX_REALLOCATE (source, osize, nsize)
    300 /* No need to explicitly free anything.	 */
    301 #define REGEX_FREE_STACK(arg)
    302 
    303 #endif /* not REGEX_MALLOC */
    304 #endif /* not using relocating allocator */
    305 
    306 
    307 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
    308    `string1' or just past its end.  This works if PTR is NULL, which is
    309    a good thing.  */
    310 #define FIRST_STRING_P(ptr)					\
    311   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
    312 
    313 /* (Re)Allocate N items of type T using malloc, or fail.  */
    314 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
    315 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
    316 #define RETALLOC_IF(addr, n, t) \
    317   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
    318 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
    319 
    320 #define BYTEWIDTH 8 /* In bits.	 */
    321 
    322 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
    323 
    324 #undef MAX
    325 #undef MIN
    326 #define MAX(a, b) ((a) > (b) ? (a) : (b))
    327 #define MIN(a, b) ((a) < (b) ? (a) : (b))
    328 
    329 typedef char boolean;
    330 #define false 0
    331 #define true 1
    332 
    333 static int re_match_2_internal ();
    334 
    335 /* These are the command codes that appear in compiled regular
    337    expressions.	 Some opcodes are followed by argument bytes.  A
    338    command code can specify any interpretation whatsoever for its
    339    arguments.  Zero bytes may appear in the compiled regular expression.  */
    340 
    341 typedef enum
    342 {
    343   no_op = 0,
    344 
    345   /* Succeed right away--no more backtracking.	*/
    346   succeed,
    347 
    348 	/* Followed by one byte giving n, then by n literal bytes.  */
    349   exactn,
    350 
    351 	/* Matches any (more or less) character.  */
    352   anychar,
    353 
    354 	/* Matches any one char belonging to specified set.  First
    355 	   following byte is number of bitmap bytes.  Then come bytes
    356 	   for a bitmap saying which chars are in.  Bits in each byte
    357 	   are ordered low-bit-first.  A character is in the set if its
    358 	   bit is 1.  A character too large to have a bit in the map is
    359 	   automatically not in the set.  */
    360   charset,
    361 
    362 	/* Same parameters as charset, but match any character that is
    363 	   not one of those specified.	*/
    364   charset_not,
    365 
    366 	/* Start remembering the text that is matched, for storing in a
    367 	   register.  Followed by one byte with the register number, in
    368 	   the range 0 to one less than the pattern buffer's re_nsub
    369 	   field.  Then followed by one byte with the number of groups
    370 	   inner to this one.  (This last has to be part of the
    371 	   start_memory only because we need it in the on_failure_jump
    372 	   of re_match_2.)  */
    373   start_memory,
    374 
    375 	/* Stop remembering the text that is matched and store it in a
    376 	   memory register.  Followed by one byte with the register
    377 	   number, in the range 0 to one less than `re_nsub' in the
    378 	   pattern buffer, and one byte with the number of inner groups,
    379 	   just like `start_memory'.  (We need the number of inner
    380 	   groups here because we don't have any easy way of finding the
    381 	   corresponding start_memory when we're at a stop_memory.)  */
    382   stop_memory,
    383 
    384 	/* Match a duplicate of something remembered. Followed by one
    385 	   byte containing the register number.	 */
    386   duplicate,
    387 
    388 	/* Fail unless at beginning of line.  */
    389   begline,
    390 
    391 	/* Fail unless at end of line.	*/
    392   endline,
    393 
    394 	/* Succeeds if at beginning of buffer (if emacs) or at beginning
    395 	   of string to be matched (if not).  */
    396   begbuf,
    397 
    398 	/* Analogously, for end of buffer/string.  */
    399   endbuf,
    400 
    401 	/* Followed by two byte relative address to which to jump.  */
    402   jump,
    403 
    404 	/* Same as jump, but marks the end of an alternative.  */
    405   jump_past_alt,
    406 
    407 	/* Followed by two-byte relative address of place to resume at
    408 	   in case of failure.	*/
    409   on_failure_jump,
    410 
    411 	/* Like on_failure_jump, but pushes a placeholder instead of the
    412 	   current string position when executed.  */
    413   on_failure_keep_string_jump,
    414 
    415 	/* Throw away latest failure point and then jump to following
    416 	   two-byte relative address.  */
    417   pop_failure_jump,
    418 
    419 	/* Change to pop_failure_jump if know won't have to backtrack to
    420 	   match; otherwise change to jump.  This is used to jump
    421 	   back to the beginning of a repeat.  If what follows this jump
    422 	   clearly won't match what the repeat does, such that we can be
    423 	   sure that there is no use backtracking out of repetitions
    424 	   already matched, then we change it to a pop_failure_jump.
    425 	   Followed by two-byte address.  */
    426   maybe_pop_jump,
    427 
    428 	/* Jump to following two-byte address, and push a dummy failure
    429 	   point. This failure point will be thrown away if an attempt
    430 	   is made to use it for a failure.  A `+' construct makes this
    431 	   before the first repeat.  Also used as an intermediary kind
    432 	   of jump when compiling an alternative.  */
    433   dummy_failure_jump,
    434 
    435 	/* Push a dummy failure point and continue.  Used at the end of
    436 	   alternatives.  */
    437   push_dummy_failure,
    438 
    439 	/* Followed by two-byte relative address and two-byte number n.
    440 	   After matching N times, jump to the address upon failure.  */
    441   succeed_n,
    442 
    443 	/* Followed by two-byte relative address, and two-byte number n.
    444 	   Jump to the address N times, then fail.  */
    445   jump_n,
    446 
    447 	/* Set the following two-byte relative address to the
    448 	   subsequent two-byte number.	The address *includes* the two
    449 	   bytes of number.  */
    450   set_number_at,
    451 
    452   wordchar,	/* Matches any word-constituent character.  */
    453   notwordchar,	/* Matches any char that is not a word-constituent.  */
    454 
    455   wordbeg,	/* Succeeds if at word beginning.  */
    456   wordend,	/* Succeeds if at word end.  */
    457 
    458   wordbound,	/* Succeeds if at a word boundary.  */
    459   notwordbound	/* Succeeds if not at a word boundary.	*/
    460 
    461 #ifdef emacs
    462   ,before_dot,	/* Succeeds if before point.  */
    463   at_dot,	/* Succeeds if at point.  */
    464   after_dot,	/* Succeeds if after point.  */
    465 
    466 	/* Matches any character whose syntax is specified.  Followed by
    467 	   a byte which contains a syntax code, e.g., Sword.  */
    468   syntaxspec,
    469 
    470 	/* Matches any character whose syntax is not that specified.  */
    471   notsyntaxspec
    472 #endif /* emacs */
    473 } re_opcode_t;
    474 
    475 /* Common operations on the compiled pattern.  */
    477 
    478 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
    479 
    480 #define STORE_NUMBER(destination, number)				\
    481   do {									\
    482     (destination)[0] = (number) & 0377;					\
    483     (destination)[1] = (number) >> 8;					\
    484   } while (0)
    485 
    486 /* Same as STORE_NUMBER, except increment DESTINATION to
    487    the byte after where the number is stored.  Therefore, DESTINATION
    488    must be an lvalue.  */
    489 
    490 #define STORE_NUMBER_AND_INCR(destination, number)			\
    491   do {									\
    492     STORE_NUMBER (destination, number);					\
    493     (destination) += 2;							\
    494   } while (0)
    495 
    496 /* Put into DESTINATION a number stored in two contiguous bytes starting
    497    at SOURCE.  */
    498 
    499 #define EXTRACT_NUMBER(destination, source)				\
    500   do {									\
    501     (destination) = *(source) & 0377;					\
    502     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
    503   } while (0)
    504 
    505 #ifdef DEBUG
    506 static void
    507 extract_number (dest, source)
    508     int *dest;
    509     unsigned char *source;
    510 {
    511   int temp = SIGN_EXTEND_CHAR (*(source + 1));
    512   *dest = *source & 0377;
    513   *dest += temp << 8;
    514 }
    515 
    516 #ifndef EXTRACT_MACROS /* To debug the macros.	*/
    517 #undef EXTRACT_NUMBER
    518 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
    519 #endif /* not EXTRACT_MACROS */
    520 
    521 #endif /* DEBUG */
    522 
    523 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
    524    SOURCE must be an lvalue.  */
    525 
    526 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
    527   do {									\
    528     EXTRACT_NUMBER (destination, source);				\
    529     (source) += 2;							\
    530   } while (0)
    531 
    532 #ifdef DEBUG
    533 static void
    534 extract_number_and_incr (destination, source)
    535     int *destination;
    536     unsigned char **source;
    537 {
    538   extract_number (destination, *source);
    539   *source += 2;
    540 }
    541 
    542 #ifndef EXTRACT_MACROS
    543 #undef EXTRACT_NUMBER_AND_INCR
    544 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
    545   extract_number_and_incr (&dest, &src)
    546 #endif /* not EXTRACT_MACROS */
    547 
    548 #endif /* DEBUG */
    549 
    550 /* If DEBUG is defined, Regex prints many voluminous messages about what
    552    it is doing (if the variable `debug' is nonzero).  If linked with the
    553    main program in `iregex.c', you can enter patterns and strings
    554    interactively.  And if linked with the main program in `main.c' and
    555    the other test files, you can run the already-written tests.	 */
    556 
    557 #ifdef DEBUG
    558 
    559 /* We use standard I/O for debugging.  */
    560 #include <stdio.h>
    561 
    562 /* It is useful to test things that ``must'' be true when debugging.  */
    563 #include <assert.h>
    564 
    565 static int debug = 0;
    566 
    567 #define DEBUG_STATEMENT(e) e
    568 #define DEBUG_PRINT1(x) if (debug) printf (x)
    569 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
    570 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
    571 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
    572 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)				\
    573   if (debug) print_partial_compiled_pattern (s, e)
    574 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
    575   if (debug) print_double_string (w, s1, sz1, s2, sz2)
    576 
    577 
    578 /* Print the fastmap in human-readable form.  */
    579 
    580 void
    581 print_fastmap (fastmap)
    582     char *fastmap;
    583 {
    584   unsigned was_a_range = 0;
    585   unsigned i = 0;
    586 
    587   while (i < (1 << BYTEWIDTH))
    588     {
    589       if (fastmap[i++])
    590 	{
    591 	  was_a_range = 0;
    592 	  putchar (i - 1);
    593 	  while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
    594 	    {
    595 	      was_a_range = 1;
    596 	      i++;
    597 	    }
    598 	  if (was_a_range)
    599 	    {
    600 	      printf ("-");
    601 	      putchar (i - 1);
    602 	    }
    603 	}
    604     }
    605   putchar ('\n');
    606 }
    607 
    608 
    609 /* Print a compiled pattern string in human-readable form, starting at
    610    the START pointer into it and ending just before the pointer END.  */
    611 
    612 void
    613 print_partial_compiled_pattern (start, end)
    614     unsigned char *start;
    615     unsigned char *end;
    616 {
    617   int mcnt, mcnt2;
    618   unsigned char *p = start;
    619   unsigned char *pend = end;
    620 
    621   if (start == NULL)
    622     {
    623       printf ("(null)\n");
    624       return;
    625     }
    626 
    627   /* Loop over pattern commands.  */
    628   while (p < pend)
    629     {
    630       printf ("%d:\t", p - start);
    631 
    632       switch ((re_opcode_t) *p++)
    633 	{
    634 	case no_op:
    635 	  printf ("/no_op");
    636 	  break;
    637 
    638 	case exactn:
    639 	  mcnt = *p++;
    640 	  printf ("/exactn/%d", mcnt);
    641 	  do
    642 	    {
    643 	      putchar ('/');
    644 	      putchar (*p++);
    645 	    }
    646 	  while (--mcnt);
    647 	  break;
    648 
    649 	case start_memory:
    650 	  mcnt = *p++;
    651 	  printf ("/start_memory/%d/%d", mcnt, *p++);
    652 	  break;
    653 
    654 	case stop_memory:
    655 	  mcnt = *p++;
    656 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
    657 	  break;
    658 
    659 	case duplicate:
    660 	  printf ("/duplicate/%d", *p++);
    661 	  break;
    662 
    663 	case anychar:
    664 	  printf ("/anychar");
    665 	  break;
    666 
    667 	case charset:
    668 	case charset_not:
    669 	  {
    670 	    register int c, last = -100;
    671 	    register int in_range = 0;
    672 
    673 	    printf ("/charset [%s",
    674 		    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
    675 
    676 	    assert (p + *p < pend);
    677 
    678 	    for (c = 0; c < 256; c++)
    679 	      if (c / 8 < *p
    680 		  && (p[1 + (c/8)] & (1 << (c % 8))))
    681 		{
    682 		  /* Are we starting a range?  */
    683 		  if (last + 1 == c && ! in_range)
    684 		    {
    685 		      putchar ('-');
    686 		      in_range = 1;
    687 		    }
    688 		  /* Have we broken a range?  */
    689 		  else if (last + 1 != c && in_range)
    690 	      {
    691 		      putchar (last);
    692 		      in_range = 0;
    693 		    }
    694 
    695 		  if (! in_range)
    696 		    putchar (c);
    697 
    698 		  last = c;
    699 	      }
    700 
    701 	    if (in_range)
    702 	      putchar (last);
    703 
    704 	    putchar (']');
    705 
    706 	    p += 1 + *p;
    707 	  }
    708 	  break;
    709 
    710 	case begline:
    711 	  printf ("/begline");
    712 	  break;
    713 
    714 	case endline:
    715 	  printf ("/endline");
    716 	  break;
    717 
    718 	case on_failure_jump:
    719 	  extract_number_and_incr (&mcnt, &p);
    720 	  printf ("/on_failure_jump to %d", p + mcnt - start);
    721 	  break;
    722 
    723 	case on_failure_keep_string_jump:
    724 	  extract_number_and_incr (&mcnt, &p);
    725 	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
    726 	  break;
    727 
    728 	case dummy_failure_jump:
    729 	  extract_number_and_incr (&mcnt, &p);
    730 	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
    731 	  break;
    732 
    733 	case push_dummy_failure:
    734 	  printf ("/push_dummy_failure");
    735 	  break;
    736 
    737 	case maybe_pop_jump:
    738 	  extract_number_and_incr (&mcnt, &p);
    739 	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
    740 	  break;
    741 
    742 	case pop_failure_jump:
    743 	  extract_number_and_incr (&mcnt, &p);
    744 	  printf ("/pop_failure_jump to %d", p + mcnt - start);
    745 	  break;
    746 
    747 	case jump_past_alt:
    748 	  extract_number_and_incr (&mcnt, &p);
    749 	  printf ("/jump_past_alt to %d", p + mcnt - start);
    750 	  break;
    751 
    752 	case jump:
    753 	  extract_number_and_incr (&mcnt, &p);
    754 	  printf ("/jump to %d", p + mcnt - start);
    755 	  break;
    756 
    757 	case succeed_n:
    758 	  extract_number_and_incr (&mcnt, &p);
    759 	  extract_number_and_incr (&mcnt2, &p);
    760 	  printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
    761 	  break;
    762 
    763 	case jump_n:
    764 	  extract_number_and_incr (&mcnt, &p);
    765 	  extract_number_and_incr (&mcnt2, &p);
    766 	  printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
    767 	  break;
    768 
    769 	case set_number_at:
    770 	  extract_number_and_incr (&mcnt, &p);
    771 	  extract_number_and_incr (&mcnt2, &p);
    772 	  printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
    773 	  break;
    774 
    775 	case wordbound:
    776 	  printf ("/wordbound");
    777 	  break;
    778 
    779 	case notwordbound:
    780 	  printf ("/notwordbound");
    781 	  break;
    782 
    783 	case wordbeg:
    784 	  printf ("/wordbeg");
    785 	  break;
    786 
    787 	case wordend:
    788 	  printf ("/wordend");
    789 
    790 #ifdef emacs
    791 	case before_dot:
    792 	  printf ("/before_dot");
    793 	  break;
    794 
    795 	case at_dot:
    796 	  printf ("/at_dot");
    797 	  break;
    798 
    799 	case after_dot:
    800 	  printf ("/after_dot");
    801 	  break;
    802 
    803 	case syntaxspec:
    804 	  printf ("/syntaxspec");
    805 	  mcnt = *p++;
    806 	  printf ("/%d", mcnt);
    807 	  break;
    808 
    809 	case notsyntaxspec:
    810 	  printf ("/notsyntaxspec");
    811 	  mcnt = *p++;
    812 	  printf ("/%d", mcnt);
    813 	  break;
    814 #endif /* emacs */
    815 
    816 	case wordchar:
    817 	  printf ("/wordchar");
    818 	  break;
    819 
    820 	case notwordchar:
    821 	  printf ("/notwordchar");
    822 	  break;
    823 
    824 	case begbuf:
    825 	  printf ("/begbuf");
    826 	  break;
    827 
    828 	case endbuf:
    829 	  printf ("/endbuf");
    830 	  break;
    831 
    832 	default:
    833 	  printf ("?%d", *(p-1));
    834 	}
    835 
    836       putchar ('\n');
    837     }
    838 
    839   printf ("%d:\tend of pattern.\n", p - start);
    840 }
    841 
    842 
    843 void
    844 print_compiled_pattern (bufp)
    845     struct re_pattern_buffer *bufp;
    846 {
    847   unsigned char *buffer = bufp->buffer;
    848 
    849   print_partial_compiled_pattern (buffer, buffer + bufp->used);
    850   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
    851 
    852   if (bufp->fastmap_accurate && bufp->fastmap)
    853     {
    854       printf ("fastmap: ");
    855       print_fastmap (bufp->fastmap);
    856     }
    857 
    858   printf ("re_nsub: %d\t", bufp->re_nsub);
    859   printf ("regs_alloc: %d\t", bufp->regs_allocated);
    860   printf ("can_be_null: %d\t", bufp->can_be_null);
    861   printf ("newline_anchor: %d\n", bufp->newline_anchor);
    862   printf ("no_sub: %d\t", bufp->no_sub);
    863   printf ("not_bol: %d\t", bufp->not_bol);
    864   printf ("not_eol: %d\t", bufp->not_eol);
    865   printf ("syntax: %d\n", bufp->syntax);
    866   /* Perhaps we should print the translate table?  */
    867 }
    868 
    869 
    870 void
    871 print_double_string (where, string1, size1, string2, size2)
    872     const char *where;
    873     const char *string1;
    874     const char *string2;
    875     int size1;
    876     int size2;
    877 {
    878   unsigned this_char;
    879 
    880   if (where == NULL)
    881     printf ("(null)");
    882   else
    883     {
    884       if (FIRST_STRING_P (where))
    885 	{
    886 	  for (this_char = where - string1; this_char < size1; this_char++)
    887 	    putchar (string1[this_char]);
    888 
    889 	  where = string2;
    890 	}
    891 
    892       for (this_char = where - string2; this_char < size2; this_char++)
    893 	putchar (string2[this_char]);
    894     }
    895 }
    896 
    897 #else /* not DEBUG */
    898 
    899 #undef assert
    900 #define assert(e)
    901 
    902 #define DEBUG_STATEMENT(e)
    903 #define DEBUG_PRINT1(x)
    904 #define DEBUG_PRINT2(x1, x2)
    905 #define DEBUG_PRINT3(x1, x2, x3)
    906 #define DEBUG_PRINT4(x1, x2, x3, x4)
    907 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
    908 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
    909 
    910 #endif /* not DEBUG */
    911 
    912 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
    914    also be assigned to arbitrarily: each pattern buffer stores its own
    915    syntax, so it can be changed between regex compilations.  */
    916 /* This has no initializer because initialized variables in Emacs
    917    become read-only after dumping.  */
    918 reg_syntax_t re_syntax_options;
    919 
    920 
    921 /* Specify the precise syntax of regexps for compilation.  This provides
    922    for compatibility for various utilities which historically have
    923    different, incompatible syntaxes.
    924 
    925    The argument SYNTAX is a bit mask comprised of the various bits
    926    defined in regex.h.	We return the old syntax.  */
    927 
    928 reg_syntax_t
    929 re_set_syntax (syntax)
    930     reg_syntax_t syntax;
    931 {
    932   reg_syntax_t ret = re_syntax_options;
    933 
    934   re_syntax_options = syntax;
    935   return ret;
    936 }
    937 
    938 /* This table gives an error message for each of the error codes listed
    940    in regex.h.	Obviously the order here has to be same as there.
    941    POSIX doesn't require that we do anything for REG_NOERROR,
    942    but why not be nice?	 */
    943 
    944 static const char *re_error_msgid[] =
    945   {
    946     gettext_noop ("Success"),	/* REG_NOERROR */
    947     gettext_noop ("No match"),	/* REG_NOMATCH */
    948     gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
    949     gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
    950     gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
    951     gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
    952     gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
    953     gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
    954     gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
    955     gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
    956     gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
    957     gettext_noop ("Invalid range end"),	/* REG_ERANGE */
    958     gettext_noop ("Memory exhausted"), /* REG_ESPACE */
    959     gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
    960     gettext_noop ("Premature end of regular expression"), /* REG_EEND */
    961     gettext_noop ("Regular expression too big"), /* REG_ESIZE */
    962     gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
    963   };
    964 
    965 /* Avoiding alloca during matching, to placate r_alloc.	 */
    967 
    968 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
    969    searching and matching functions should not call alloca.  On some
    970    systems, alloca is implemented in terms of malloc, and if we're
    971    using the relocating allocator routines, then malloc could cause a
    972    relocation, which might (if the strings being searched are in the
    973    ralloc heap) shift the data out from underneath the regexp
    974    routines.
    975 
    976    Here's another reason to avoid allocation: Emacs
    977    processes input from X in a signal handler; processing X input may
    978    call malloc; if input arrives while a matching routine is calling
    979    malloc, then we're scrod.  But Emacs can't just block input while
    980    calling matching routines; then we don't notice interrupts when
    981    they come in.  So, Emacs blocks input around all regexp calls
    982    except the matching calls, which it leaves unprotected, in the
    983    faith that they will not malloc.  */
    984 
    985 /* Normally, this is fine.  */
    986 #define MATCH_MAY_ALLOCATE
    987 
    988 /* When using GNU C, we are not REALLY using the C alloca, no matter
    989    what config.h may say.  So don't take precautions for it.  */
    990 #ifdef __GNUC__
    991 #undef C_ALLOCA
    992 #endif
    993 
    994 /* The match routines may not allocate if (1) they would do it with malloc
    995    and (2) it's not safe for them to use malloc.
    996    Note that if REL_ALLOC is defined, matching would not use malloc for the
    997    failure stack, but we would still use it for the register vectors;
    998    so REL_ALLOC should not affect this.	 */
    999 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
   1000 #undef MATCH_MAY_ALLOCATE
   1001 #endif
   1002 
   1003 
   1004 /* Failure stack declarations and macros; both re_compile_fastmap and
   1006    re_match_2 use a failure stack.  These have to be macros because of
   1007    REGEX_ALLOCATE_STACK.  */
   1008 
   1009 
   1010 /* Number of failure points for which to initially allocate space
   1011    when matching.  If this number is exceeded, we allocate more
   1012    space, so it is not a hard limit.  */
   1013 #ifndef INIT_FAILURE_ALLOC
   1014 #define INIT_FAILURE_ALLOC 5
   1015 #endif
   1016 
   1017 /* Roughly the maximum number of failure points on the stack.  Would be
   1018    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
   1019    This is a variable only so users of regex can assign to it; we never
   1020    change it ourselves.	 */
   1021 #if defined (MATCH_MAY_ALLOCATE)
   1022 /* 4400 was enough to cause a crash on Alpha OSF/1,
   1023    whose default stack limit is 2mb.  */
   1024 int re_max_failures = 20000;
   1025 #else
   1026 int re_max_failures = 2000;
   1027 #endif
   1028 
   1029 union fail_stack_elt
   1030 {
   1031   unsigned char *pointer;
   1032   int integer;
   1033 };
   1034 
   1035 typedef union fail_stack_elt fail_stack_elt_t;
   1036 
   1037 typedef struct
   1038 {
   1039   fail_stack_elt_t *stack;
   1040   unsigned size;
   1041   unsigned avail;			/* Offset of next open position.  */
   1042 } fail_stack_type;
   1043 
   1044 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
   1045 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
   1046 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
   1047 
   1048 
   1049 /* Define macros to initialize and free the failure stack.
   1050    Do `return -2' if the alloc fails.  */
   1051 
   1052 #ifdef MATCH_MAY_ALLOCATE
   1053 #define INIT_FAIL_STACK()						\
   1054   do {									\
   1055     fail_stack.stack = (fail_stack_elt_t *)				\
   1056       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
   1057 									\
   1058     if (fail_stack.stack == NULL)					\
   1059       return -2;							\
   1060 									\
   1061     fail_stack.size = INIT_FAILURE_ALLOC;				\
   1062     fail_stack.avail = 0;						\
   1063   } while (0)
   1064 
   1065 #define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
   1066 #else
   1067 #define INIT_FAIL_STACK()						\
   1068   do {									\
   1069     fail_stack.avail = 0;						\
   1070   } while (0)
   1071 
   1072 #define RESET_FAIL_STACK()
   1073 #endif
   1074 
   1075 
   1076 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
   1077 
   1078    Return 1 if succeeds, and 0 if either ran out of memory
   1079    allocating space for it or it was already too large.
   1080 
   1081    REGEX_REALLOCATE_STACK requires `destination' be declared.	*/
   1082 
   1083 #define DOUBLE_FAIL_STACK(fail_stack)					\
   1084   ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
   1085    ? 0									\
   1086    : ((fail_stack).stack = (fail_stack_elt_t *)				\
   1087 	REGEX_REALLOCATE_STACK ((fail_stack).stack,			\
   1088 	  (fail_stack).size * sizeof (fail_stack_elt_t),		\
   1089 	  ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
   1090 									\
   1091       (fail_stack).stack == NULL					\
   1092       ? 0								\
   1093       : ((fail_stack).size <<= 1,					\
   1094 	 1)))
   1095 
   1096 
   1097 /* Push pointer POINTER on FAIL_STACK.
   1098    Return 1 if was able to do so and 0 if ran out of memory allocating
   1099    space to do so.  */
   1100 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
   1101   ((FAIL_STACK_FULL ()							\
   1102     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
   1103    ? 0									\
   1104    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
   1105       1))
   1106 
   1107 /* Push a pointer value onto the failure stack.
   1108    Assumes the variable `fail_stack'.  Probably should only
   1109    be called from within `PUSH_FAILURE_POINT'.	*/
   1110 #define PUSH_FAILURE_POINTER(item)					\
   1111   fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
   1112 
   1113 /* This pushes an integer-valued item onto the failure stack.
   1114    Assumes the variable `fail_stack'.  Probably should only
   1115    be called from within `PUSH_FAILURE_POINT'.	*/
   1116 #define PUSH_FAILURE_INT(item)					\
   1117   fail_stack.stack[fail_stack.avail++].integer = (item)
   1118 
   1119 /* Push a fail_stack_elt_t value onto the failure stack.
   1120    Assumes the variable `fail_stack'.  Probably should only
   1121    be called from within `PUSH_FAILURE_POINT'.	*/
   1122 #define PUSH_FAILURE_ELT(item)					\
   1123   fail_stack.stack[fail_stack.avail++] =  (item)
   1124 
   1125 /* These three POP... operations complement the three PUSH... operations.
   1126    All assume that `fail_stack' is nonempty.  */
   1127 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
   1128 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
   1129 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
   1130 
   1131 /* Used to omit pushing failure point id's when we're not debugging.  */
   1132 #ifdef DEBUG
   1133 #define DEBUG_PUSH PUSH_FAILURE_INT
   1134 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
   1135 #else
   1136 #define DEBUG_PUSH(item)
   1137 #define DEBUG_POP(item_addr)
   1138 #endif
   1139 
   1140 
   1141 /* Push the information about the state we will need
   1142    if we ever fail back to it.
   1143 
   1144    Requires variables fail_stack, regstart, regend, reg_info, and
   1145    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
   1146    declared.
   1147 
   1148    Does `return FAILURE_CODE' if runs out of memory.  */
   1149 
   1150 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
   1151   do {									\
   1152     char *destination;							\
   1153     /* Must be int, so when we don't save any registers, the arithmetic	\
   1154        of 0 + -1 isn't done as unsigned.  */				\
   1155     int this_reg;							\
   1156 									\
   1157     DEBUG_STATEMENT (failure_id++);					\
   1158     DEBUG_STATEMENT (nfailure_points_pushed++);				\
   1159     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
   1160     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
   1161     DEBUG_PRINT2 ("			size: %d\n", (fail_stack).size);\
   1162 									\
   1163     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
   1164     DEBUG_PRINT2 ("	available: %d\n", REMAINING_AVAIL_SLOTS);	\
   1165 									\
   1166     /* Ensure we have enough space allocated for what we will push.  */	\
   1167     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
   1168       {									\
   1169 	if (!DOUBLE_FAIL_STACK (fail_stack))				\
   1170 	  return failure_code;						\
   1171 									\
   1172 	DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
   1173 		       (fail_stack).size);				\
   1174 	DEBUG_PRINT2 ("	 slots available: %d\n", REMAINING_AVAIL_SLOTS);\
   1175       }									\
   1176 									\
   1177     /* Push the info, starting with the registers.  */			\
   1178     DEBUG_PRINT1 ("\n");						\
   1179 									\
   1180     if (1)								\
   1181       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
   1182 	   this_reg++)							\
   1183 	{								\
   1184 	  DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);		\
   1185 	  DEBUG_STATEMENT (num_regs_pushed++);				\
   1186 									\
   1187 	  DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);	\
   1188 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
   1189 									\
   1190 	  DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
   1191 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
   1192 									\
   1193 	  DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
   1194 	  DEBUG_PRINT2 (" match_null=%d",				\
   1195 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
   1196 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
   1197 	  DEBUG_PRINT2 (" matched_something=%d",			\
   1198 			MATCHED_SOMETHING (reg_info[this_reg]));	\
   1199 	  DEBUG_PRINT2 (" ever_matched=%d",				\
   1200 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
   1201 	  DEBUG_PRINT1 ("\n");						\
   1202 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
   1203 	}								\
   1204 									\
   1205     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
   1206     PUSH_FAILURE_INT (lowest_active_reg);				\
   1207 									\
   1208     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
   1209     PUSH_FAILURE_INT (highest_active_reg);				\
   1210 									\
   1211     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
   1212     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
   1213     PUSH_FAILURE_POINTER (pattern_place);				\
   1214 									\
   1215     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
   1216     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,	\
   1217 				 size2);				\
   1218     DEBUG_PRINT1 ("'\n");						\
   1219     PUSH_FAILURE_POINTER (string_place);				\
   1220 									\
   1221     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
   1222     DEBUG_PUSH (failure_id);						\
   1223   } while (0)
   1224 
   1225 /* This is the number of items that are pushed and popped on the stack
   1226    for each register.  */
   1227 #define NUM_REG_ITEMS  3
   1228 
   1229 /* Individual items aside from the registers.  */
   1230 #ifdef DEBUG
   1231 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
   1232 #else
   1233 #define NUM_NONREG_ITEMS 4
   1234 #endif
   1235 
   1236 /* We push at most this many items on the stack.  */
   1237 /* We used to use (num_regs - 1), which is the number of registers
   1238    this regexp will save; but that was changed to 5
   1239    to avoid stack overflow for a regexp with lots of parens.  */
   1240 #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
   1241 
   1242 /* We actually push this many items.  */
   1243 #define NUM_FAILURE_ITEMS				\
   1244   (((0							\
   1245      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
   1246     * NUM_REG_ITEMS)					\
   1247    + NUM_NONREG_ITEMS)
   1248 
   1249 /* How many items can still be added to the stack without overflowing it.  */
   1250 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
   1251 
   1252 
   1253 /* Pops what PUSH_FAIL_STACK pushes.
   1254 
   1255    We restore into the parameters, all of which should be lvalues:
   1256      STR -- the saved data position.
   1257      PAT -- the saved pattern position.
   1258      LOW_REG, HIGH_REG -- the highest and lowest active registers.
   1259      REGSTART, REGEND -- arrays of string positions.
   1260      REG_INFO -- array of information about each subexpression.
   1261 
   1262    Also assumes the variables `fail_stack' and (if debugging), `bufp',
   1263    `pend', `string1', `size1', `string2', and `size2'.	*/
   1264 
   1265 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
   1266 {									\
   1267   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
   1268   int this_reg;								\
   1269   const unsigned char *string_temp;					\
   1270 									\
   1271   assert (!FAIL_STACK_EMPTY ());					\
   1272 									\
   1273   /* Remove failure points and point to how many regs pushed.  */	\
   1274   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
   1275   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
   1276   DEBUG_PRINT2 ("		     size: %d\n", fail_stack.size);	\
   1277 									\
   1278   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
   1279 									\
   1280   DEBUG_POP (&failure_id);						\
   1281   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
   1282 									\
   1283   /* If the saved string location is NULL, it came from an		\
   1284      on_failure_keep_string_jump opcode, and we want to throw away the	\
   1285      saved NULL, thus retaining our current position in the string.  */	\
   1286   string_temp = POP_FAILURE_POINTER ();					\
   1287   if (string_temp != NULL)						\
   1288     str = (const char *) string_temp;					\
   1289 									\
   1290   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
   1291   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
   1292   DEBUG_PRINT1 ("'\n");							\
   1293 									\
   1294   pat = (unsigned char *) POP_FAILURE_POINTER ();			\
   1295   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
   1296   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
   1297 									\
   1298   /* Restore register info.  */						\
   1299   high_reg = (unsigned) POP_FAILURE_INT ();				\
   1300   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
   1301 									\
   1302   low_reg = (unsigned) POP_FAILURE_INT ();				\
   1303   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
   1304 									\
   1305   if (1)								\
   1306     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
   1307       {									\
   1308 	DEBUG_PRINT2 ("	   Popping reg: %d\n", this_reg);		\
   1309 									\
   1310 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
   1311 	DEBUG_PRINT2 ("	     info: 0x%x\n", reg_info[this_reg]);	\
   1312 									\
   1313 	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
   1314 	DEBUG_PRINT2 ("	     end: 0x%x\n", regend[this_reg]);		\
   1315 									\
   1316 	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
   1317 	DEBUG_PRINT2 ("	     start: 0x%x\n", regstart[this_reg]);	\
   1318       }									\
   1319   else									\
   1320     {									\
   1321       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
   1322 	{								\
   1323 	  reg_info[this_reg].word.integer = 0;				\
   1324 	  regend[this_reg] = 0;						\
   1325 	  regstart[this_reg] = 0;					\
   1326 	}								\
   1327       highest_active_reg = high_reg;					\
   1328     }									\
   1329 									\
   1330   set_regs_matched_done = 0;						\
   1331   DEBUG_STATEMENT (nfailure_points_popped++);				\
   1332 } /* POP_FAILURE_POINT */
   1333 
   1334 
   1335 
   1336 /* Structure for per-register (a.k.a. per-group) information.
   1338    Other register information, such as the
   1339    starting and ending positions (which are addresses), and the list of
   1340    inner groups (which is a bits list) are maintained in separate
   1341    variables.
   1342 
   1343    We are making a (strictly speaking) nonportable assumption here: that
   1344    the compiler will pack our bit fields into something that fits into
   1345    the type of `word', i.e., is something that fits into one item on the
   1346    failure stack.  */
   1347 
   1348 typedef union
   1349 {
   1350   fail_stack_elt_t word;
   1351   struct
   1352   {
   1353       /* This field is one if this group can match the empty string,
   1354 	 zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
   1355 #define MATCH_NULL_UNSET_VALUE 3
   1356     unsigned match_null_string_p : 2;
   1357     unsigned is_active : 1;
   1358     unsigned matched_something : 1;
   1359     unsigned ever_matched_something : 1;
   1360   } bits;
   1361 } register_info_type;
   1362 
   1363 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
   1364 #define IS_ACTIVE(R)  ((R).bits.is_active)
   1365 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
   1366 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
   1367 
   1368 
   1369 /* Call this when have matched a real character; it sets `matched' flags
   1370    for the subexpressions which we are currently inside.  Also records
   1371    that those subexprs have matched.  */
   1372 #define SET_REGS_MATCHED()						\
   1373   do									\
   1374     {									\
   1375       if (!set_regs_matched_done)					\
   1376 	{								\
   1377 	  unsigned r;							\
   1378 	  set_regs_matched_done = 1;					\
   1379 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
   1380 	    {								\
   1381 	      MATCHED_SOMETHING (reg_info[r])				\
   1382 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
   1383 		= 1;							\
   1384 	    }								\
   1385 	}								\
   1386     }									\
   1387   while (0)
   1388 
   1389 /* Registers are set to a sentinel when they haven't yet matched.  */
   1390 static char reg_unset_dummy;
   1391 #define REG_UNSET_VALUE (&reg_unset_dummy)
   1392 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
   1393 
   1394 /* Subroutine declarations and macros for regex_compile.  */
   1396 
   1397 static void store_op1 (), store_op2 ();
   1398 static void insert_op1 (), insert_op2 ();
   1399 static boolean at_begline_loc_p (), at_endline_loc_p ();
   1400 static boolean group_in_compile_stack ();
   1401 static reg_errcode_t compile_range ();
   1402 
   1403 /* Fetch the next character in the uncompiled pattern---translating it
   1404    if necessary.  Also cast from a signed character in the constant
   1405    string passed to us by the user to an unsigned char that we can use
   1406    as an array index (in, e.g., `translate').  */
   1407 #ifndef PATFETCH
   1408 #define PATFETCH(c)							\
   1409   do {if (p == pend) return REG_EEND;					\
   1410     c = (unsigned char) *p++;						\
   1411     if (translate) c = (unsigned char) translate[c];			\
   1412   } while (0)
   1413 #endif
   1414 
   1415 /* Fetch the next character in the uncompiled pattern, with no
   1416    translation.	 */
   1417 #define PATFETCH_RAW(c)							\
   1418   do {if (p == pend) return REG_EEND;					\
   1419     c = (unsigned char) *p++;						\
   1420   } while (0)
   1421 
   1422 /* Go backwards one character in the pattern.  */
   1423 #define PATUNFETCH p--
   1424 
   1425 
   1426 /* If `translate' is non-null, return translate[D], else just D.  We
   1427    cast the subscript to translate because some data is declared as
   1428    `char *', to avoid warnings when a string constant is passed.  But
   1429    when we use a character as a subscript we must make it unsigned.  */
   1430 #ifndef TRANSLATE
   1431 #define TRANSLATE(d) \
   1432   (translate ? (char) translate[(unsigned char) (d)] : (d))
   1433 #endif
   1434 
   1435 
   1436 /* Macros for outputting the compiled pattern into `buffer'.  */
   1437 
   1438 /* If the buffer isn't allocated when it comes in, use this.  */
   1439 #define INIT_BUF_SIZE  32
   1440 
   1441 /* Make sure we have at least N more bytes of space in buffer.	*/
   1442 #define GET_BUFFER_SPACE(n)						\
   1443     while (b - bufp->buffer + (n) > bufp->allocated)			\
   1444       EXTEND_BUFFER ()
   1445 
   1446 /* Make sure we have one more byte of buffer space and then add C to it.  */
   1447 #define BUF_PUSH(c)							\
   1448   do {									\
   1449     GET_BUFFER_SPACE (1);						\
   1450     *b++ = (unsigned char) (c);						\
   1451   } while (0)
   1452 
   1453 
   1454 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
   1455 #define BUF_PUSH_2(c1, c2)						\
   1456   do {									\
   1457     GET_BUFFER_SPACE (2);						\
   1458     *b++ = (unsigned char) (c1);					\
   1459     *b++ = (unsigned char) (c2);					\
   1460   } while (0)
   1461 
   1462 
   1463 /* As with BUF_PUSH_2, except for three bytes.	*/
   1464 #define BUF_PUSH_3(c1, c2, c3)						\
   1465   do {									\
   1466     GET_BUFFER_SPACE (3);						\
   1467     *b++ = (unsigned char) (c1);					\
   1468     *b++ = (unsigned char) (c2);					\
   1469     *b++ = (unsigned char) (c3);					\
   1470   } while (0)
   1471 
   1472 
   1473 /* Store a jump with opcode OP at LOC to location TO.  We store a
   1474    relative address offset by the three bytes the jump itself occupies.	 */
   1475 #define STORE_JUMP(op, loc, to) \
   1476   store_op1 (op, loc, (to) - (loc) - 3)
   1477 
   1478 /* Likewise, for a two-argument jump.  */
   1479 #define STORE_JUMP2(op, loc, to, arg) \
   1480   store_op2 (op, loc, (to) - (loc) - 3, arg)
   1481 
   1482 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.	 */
   1483 #define INSERT_JUMP(op, loc, to) \
   1484   insert_op1 (op, loc, (to) - (loc) - 3, b)
   1485 
   1486 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
   1487 #define INSERT_JUMP2(op, loc, to, arg) \
   1488   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
   1489 
   1490 
   1491 /* This is not an arbitrary limit: the arguments which represent offsets
   1492    into the pattern are two bytes long.	 So if 2^16 bytes turns out to
   1493    be too small, many things would have to change.  */
   1494 #define MAX_BUF_SIZE (1L << 16)
   1495 
   1496 
   1497 /* Extend the buffer by twice its current size via realloc and
   1498    reset the pointers that pointed into the old block to point to the
   1499    correct places in the new one.  If extending the buffer results in it
   1500    being larger than MAX_BUF_SIZE, then flag memory exhausted.	*/
   1501 #define EXTEND_BUFFER()							\
   1502   do {									\
   1503     unsigned char *old_buffer = bufp->buffer;				\
   1504     if (bufp->allocated == MAX_BUF_SIZE)				\
   1505       return REG_ESIZE;							\
   1506     bufp->allocated <<= 1;						\
   1507     if (bufp->allocated > MAX_BUF_SIZE)					\
   1508       bufp->allocated = MAX_BUF_SIZE;					\
   1509     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
   1510     if (bufp->buffer == NULL)						\
   1511       return REG_ESPACE;						\
   1512     /* If the buffer moved, move all the pointers into it.  */		\
   1513     if (old_buffer != bufp->buffer)					\
   1514       {									\
   1515 	b = (b - old_buffer) + bufp->buffer;				\
   1516 	begalt = (begalt - old_buffer) + bufp->buffer;			\
   1517 	if (fixup_alt_jump)						\
   1518 	  fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
   1519 	if (laststart)							\
   1520 	  laststart = (laststart - old_buffer) + bufp->buffer;		\
   1521 	if (pending_exact)						\
   1522 	  pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
   1523       }									\
   1524   } while (0)
   1525 
   1526 
   1527 /* Since we have one byte reserved for the register number argument to
   1528    {start,stop}_memory, the maximum number of groups we can report
   1529    things about is what fits in that byte.  */
   1530 #define MAX_REGNUM 255
   1531 
   1532 /* But patterns can have more than `MAX_REGNUM' registers.  We just
   1533    ignore the excess.  */
   1534 typedef unsigned regnum_t;
   1535 
   1536 
   1537 /* Macros for the compile stack.  */
   1538 
   1539 /* Since offsets can go either forwards or backwards, this type needs to
   1540    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.	 */
   1541 typedef int pattern_offset_t;
   1542 
   1543 typedef struct
   1544 {
   1545   pattern_offset_t begalt_offset;
   1546   pattern_offset_t fixup_alt_jump;
   1547   pattern_offset_t inner_group_offset;
   1548   pattern_offset_t laststart_offset;
   1549   regnum_t regnum;
   1550 } compile_stack_elt_t;
   1551 
   1552 
   1553 typedef struct
   1554 {
   1555   compile_stack_elt_t *stack;
   1556   unsigned size;
   1557   unsigned avail;			/* Offset of next open position.  */
   1558 } compile_stack_type;
   1559 
   1560 
   1561 #define INIT_COMPILE_STACK_SIZE 32
   1562 
   1563 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
   1564 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
   1565 
   1566 /* The next available element.	*/
   1567 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
   1568 
   1569 
   1570 /* Set the bit for character C in a list.  */
   1571 #define SET_LIST_BIT(c)				      \
   1572   (b[((unsigned char) (c)) / BYTEWIDTH]		      \
   1573    |= 1 << (((unsigned char) c) % BYTEWIDTH))
   1574 
   1575 
   1576 /* Get the next unsigned number in the uncompiled pattern.  */
   1577 #define GET_UNSIGNED_NUMBER(num)					\
   1578   { if (p != pend)							\
   1579      {									\
   1580        PATFETCH (c);							\
   1581        while (ISDIGIT (c))						\
   1582 	 {								\
   1583 	   if (num < 0)							\
   1584 	      num = 0;							\
   1585 	   num = num * 10 + c - '0';					\
   1586 	   if (p == pend)						\
   1587 	      break;							\
   1588 	   PATFETCH (c);						\
   1589 	 }								\
   1590        }								\
   1591     }
   1592 
   1593 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
   1594 
   1595 #define IS_CHAR_CLASS(string)						\
   1596    (STREQ (string, "alpha") || STREQ (string, "upper")			\
   1597     || STREQ (string, "lower") || STREQ (string, "digit")		\
   1598     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
   1599     || STREQ (string, "space") || STREQ (string, "print")		\
   1600     || STREQ (string, "punct") || STREQ (string, "graph")		\
   1601     || STREQ (string, "cntrl") || STREQ (string, "blank"))
   1602 
   1603 #ifndef MATCH_MAY_ALLOCATE
   1605 
   1606 /* If we cannot allocate large objects within re_match_2_internal,
   1607    we make the fail stack and register vectors global.
   1608    The fail stack, we grow to the maximum size when a regexp
   1609    is compiled.
   1610    The register vectors, we adjust in size each time we
   1611    compile a regexp, according to the number of registers it needs.  */
   1612 
   1613 static fail_stack_type fail_stack;
   1614 
   1615 /* Size with which the following vectors are currently allocated.
   1616    That is so we can make them bigger as needed,
   1617    but never make them smaller.	 */
   1618 static int regs_allocated_size;
   1619 
   1620 static const char **	 regstart, **	  regend;
   1621 static const char ** old_regstart, ** old_regend;
   1622 static const char **best_regstart, **best_regend;
   1623 static register_info_type *reg_info;
   1624 static const char **reg_dummy;
   1625 static register_info_type *reg_info_dummy;
   1626 
   1627 /* Make the register vectors big enough for NUM_REGS registers,
   1628    but don't make them smaller.	 */
   1629 
   1630 static
   1631 regex_grow_registers (num_regs)
   1632      int num_regs;
   1633 {
   1634   if (num_regs > regs_allocated_size)
   1635     {
   1636       RETALLOC_IF (regstart,	 num_regs, const char *);
   1637       RETALLOC_IF (regend,	 num_regs, const char *);
   1638       RETALLOC_IF (old_regstart, num_regs, const char *);
   1639       RETALLOC_IF (old_regend,	 num_regs, const char *);
   1640       RETALLOC_IF (best_regstart, num_regs, const char *);
   1641       RETALLOC_IF (best_regend,	 num_regs, const char *);
   1642       RETALLOC_IF (reg_info,	 num_regs, register_info_type);
   1643       RETALLOC_IF (reg_dummy,	 num_regs, const char *);
   1644       RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
   1645 
   1646       regs_allocated_size = num_regs;
   1647     }
   1648 }
   1649 
   1650 #endif /* not MATCH_MAY_ALLOCATE */
   1651 
   1652 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
   1654    Returns one of error codes defined in `regex.h', or zero for success.
   1655 
   1656    Assumes the `allocated' (and perhaps `buffer') and `translate'
   1657    fields are set in BUFP on entry.
   1658 
   1659    If it succeeds, results are put in BUFP (if it returns an error, the
   1660    contents of BUFP are undefined):
   1661      `buffer' is the compiled pattern;
   1662      `syntax' is set to SYNTAX;
   1663      `used' is set to the length of the compiled pattern;
   1664      `fastmap_accurate' is zero;
   1665      `re_nsub' is the number of subexpressions in PATTERN;
   1666      `not_bol' and `not_eol' are zero;
   1667 
   1668    The `fastmap' and `newline_anchor' fields are neither
   1669    examined nor set.  */
   1670 
   1671 /* Return, freeing storage we allocated.  */
   1672 #define FREE_STACK_RETURN(value)		\
   1673   return (free (compile_stack.stack), value)
   1674 
   1675 static reg_errcode_t
   1676 regex_compile (pattern, size, syntax, bufp)
   1677      const char *pattern;
   1678      int size;
   1679      reg_syntax_t syntax;
   1680      struct re_pattern_buffer *bufp;
   1681 {
   1682   /* We fetch characters from PATTERN here.  Even though PATTERN is
   1683      `char *' (i.e., signed), we declare these variables as unsigned, so
   1684      they can be reliably used as array indices.  */
   1685   register unsigned char c, c1;
   1686 
   1687   /* A random temporary spot in PATTERN.  */
   1688   const char *p1;
   1689 
   1690   /* Points to the end of the buffer, where we should append.  */
   1691   register unsigned char *b;
   1692 
   1693   /* Keeps track of unclosed groups.  */
   1694   compile_stack_type compile_stack;
   1695 
   1696   /* Points to the current (ending) position in the pattern.  */
   1697   const char *p = pattern;
   1698   const char *pend = pattern + size;
   1699 
   1700   /* How to translate the characters in the pattern.  */
   1701   RE_TRANSLATE_TYPE translate = bufp->translate;
   1702 
   1703   /* Address of the count-byte of the most recently inserted `exactn'
   1704      command.  This makes it possible to tell if a new exact-match
   1705      character can be added to that command or if the character requires
   1706      a new `exactn' command.  */
   1707   unsigned char *pending_exact = 0;
   1708 
   1709   /* Address of start of the most recently finished expression.
   1710      This tells, e.g., postfix * where to find the start of its
   1711      operand.  Reset at the beginning of groups and alternatives.  */
   1712   unsigned char *laststart = 0;
   1713 
   1714   /* Address of beginning of regexp, or inside of last group.  */
   1715   unsigned char *begalt;
   1716 
   1717   /* Place in the uncompiled pattern (i.e., the {) to
   1718      which to go back if the interval is invalid.  */
   1719   const char *beg_interval;
   1720 
   1721   /* Address of the place where a forward jump should go to the end of
   1722      the containing expression.	 Each alternative of an `or' -- except the
   1723      last -- ends with a forward jump of this sort.  */
   1724   unsigned char *fixup_alt_jump = 0;
   1725 
   1726   /* Counts open-groups as they are encountered.  Remembered for the
   1727      matching close-group on the compile stack, so the same register
   1728      number is put in the stop_memory as the start_memory.  */
   1729   regnum_t regnum = 0;
   1730 
   1731 #ifdef DEBUG
   1732   DEBUG_PRINT1 ("\nCompiling pattern: ");
   1733   if (debug)
   1734     {
   1735       unsigned debug_count;
   1736 
   1737       for (debug_count = 0; debug_count < size; debug_count++)
   1738 	putchar (pattern[debug_count]);
   1739       putchar ('\n');
   1740     }
   1741 #endif /* DEBUG */
   1742 
   1743   /* Initialize the compile stack.  */
   1744   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
   1745   if (compile_stack.stack == NULL)
   1746     return REG_ESPACE;
   1747 
   1748   compile_stack.size = INIT_COMPILE_STACK_SIZE;
   1749   compile_stack.avail = 0;
   1750 
   1751   /* Initialize the pattern buffer.  */
   1752   bufp->syntax = syntax;
   1753   bufp->fastmap_accurate = 0;
   1754   bufp->not_bol = bufp->not_eol = 0;
   1755 
   1756   /* Set `used' to zero, so that if we return an error, the pattern
   1757      printer (for debugging) will think there's no pattern.  We reset it
   1758      at the end.  */
   1759   bufp->used = 0;
   1760 
   1761   /* Always count groups, whether or not bufp->no_sub is set.  */
   1762   bufp->re_nsub = 0;
   1763 
   1764 #if !defined (emacs) && !defined (SYNTAX_TABLE)
   1765   /* Initialize the syntax table.  */
   1766    init_syntax_once ();
   1767 #endif
   1768 
   1769   if (bufp->allocated == 0)
   1770     {
   1771       if (bufp->buffer)
   1772 	{ /* If zero allocated, but buffer is non-null, try to realloc
   1773 	     enough space.  This loses if buffer's address is bogus, but
   1774 	     that is the user's responsibility.	 */
   1775 	  RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
   1776 	}
   1777       else
   1778 	{ /* Caller did not allocate a buffer.	Do it for them.	 */
   1779 	  bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
   1780 	}
   1781       if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
   1782 
   1783       bufp->allocated = INIT_BUF_SIZE;
   1784     }
   1785 
   1786   begalt = b = bufp->buffer;
   1787 
   1788   /* Loop through the uncompiled pattern until we're at the end.  */
   1789   while (p != pend)
   1790     {
   1791       PATFETCH (c);
   1792 
   1793       switch (c)
   1794 	{
   1795 	case '^':
   1796 	  {
   1797 	    if (   /* If at start of pattern, it's an operator.	 */
   1798 		   p == pattern + 1
   1799 		   /* If context independent, it's an operator.	 */
   1800 		|| syntax & RE_CONTEXT_INDEP_ANCHORS
   1801 		   /* Otherwise, depends on what's come before.	 */
   1802 		|| at_begline_loc_p (pattern, p, syntax))
   1803 	      BUF_PUSH (begline);
   1804 	    else
   1805 	      goto normal_char;
   1806 	  }
   1807 	  break;
   1808 
   1809 
   1810 	case '$':
   1811 	  {
   1812 	    if (   /* If at end of pattern, it's an operator.  */
   1813 		   p == pend
   1814 		   /* If context independent, it's an operator.	 */
   1815 		|| syntax & RE_CONTEXT_INDEP_ANCHORS
   1816 		   /* Otherwise, depends on what's next.  */
   1817 		|| at_endline_loc_p (p, pend, syntax))
   1818 	       BUF_PUSH (endline);
   1819 	     else
   1820 	       goto normal_char;
   1821 	   }
   1822 	   break;
   1823 
   1824 
   1825 	case '+':
   1826 	case '?':
   1827 	  if ((syntax & RE_BK_PLUS_QM)
   1828 	      || (syntax & RE_LIMITED_OPS))
   1829 	    goto normal_char;
   1830 	handle_plus:
   1831 	case '*':
   1832 	  /* If there is no previous pattern... */
   1833 	  if (!laststart)
   1834 	    {
   1835 	      if (syntax & RE_CONTEXT_INVALID_OPS)
   1836 		FREE_STACK_RETURN (REG_BADRPT);
   1837 	      else if (!(syntax & RE_CONTEXT_INDEP_OPS))
   1838 		goto normal_char;
   1839 	    }
   1840 
   1841 	  {
   1842 	    /* Are we optimizing this jump?  */
   1843 	    boolean keep_string_p = false;
   1844 
   1845 	    /* 1 means zero (many) matches is allowed.	*/
   1846 	    char zero_times_ok = 0, many_times_ok = 0;
   1847 
   1848 	    /* If there is a sequence of repetition chars, collapse it
   1849 	       down to just one (the right one).  We can't combine
   1850 	       interval operators with these because of, e.g., `a{2}*',
   1851 	       which should only match an even number of `a's.	*/
   1852 
   1853 	    for (;;)
   1854 	      {
   1855 		zero_times_ok |= c != '+';
   1856 		many_times_ok |= c != '?';
   1857 
   1858 		if (p == pend)
   1859 		  break;
   1860 
   1861 		PATFETCH (c);
   1862 
   1863 		if (c == '*'
   1864 		    || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
   1865 		  ;
   1866 
   1867 		else if (syntax & RE_BK_PLUS_QM	 &&  c == '\\')
   1868 		  {
   1869 		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   1870 
   1871 		    PATFETCH (c1);
   1872 		    if (!(c1 == '+' || c1 == '?'))
   1873 		      {
   1874 			PATUNFETCH;
   1875 			PATUNFETCH;
   1876 			break;
   1877 		      }
   1878 
   1879 		    c = c1;
   1880 		  }
   1881 		else
   1882 		  {
   1883 		    PATUNFETCH;
   1884 		    break;
   1885 		  }
   1886 
   1887 		/* If we get here, we found another repeat character.  */
   1888 	       }
   1889 
   1890 	    /* Star, etc. applied to an empty pattern is equivalent
   1891 	       to an empty pattern.  */
   1892 	    if (!laststart)
   1893 	      break;
   1894 
   1895 	    /* Now we know whether or not zero matches is allowed
   1896 	       and also whether or not two or more matches is allowed.	*/
   1897 	    if (many_times_ok)
   1898 	      { /* More than one repetition is allowed, so put in at the
   1899 		   end a backward relative jump from `b' to before the next
   1900 		   jump we're going to put in below (which jumps from
   1901 		   laststart to after this jump).
   1902 
   1903 		   But if we are at the `*' in the exact sequence `.*\n',
   1904 		   insert an unconditional jump backwards to the .,
   1905 		   instead of the beginning of the loop.  This way we only
   1906 		   push a failure point once, instead of every time
   1907 		   through the loop.  */
   1908 		assert (p - 1 > pattern);
   1909 
   1910 		/* Allocate the space for the jump.  */
   1911 		GET_BUFFER_SPACE (3);
   1912 
   1913 		/* We know we are not at the first character of the pattern,
   1914 		   because laststart was nonzero.  And we've already
   1915 		   incremented `p', by the way, to be the character after
   1916 		   the `*'.  Do we have to do something analogous here
   1917 		   for null bytes, because of RE_DOT_NOT_NULL?	*/
   1918 		if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
   1919 		    && zero_times_ok
   1920 		    && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
   1921 		    && !(syntax & RE_DOT_NEWLINE))
   1922 		  { /* We have .*\n.  */
   1923 		    STORE_JUMP (jump, b, laststart);
   1924 		    keep_string_p = true;
   1925 		  }
   1926 		else
   1927 		  /* Anything else.  */
   1928 		  STORE_JUMP (maybe_pop_jump, b, laststart - 3);
   1929 
   1930 		/* We've added more stuff to the buffer.  */
   1931 		b += 3;
   1932 	      }
   1933 
   1934 	    /* On failure, jump from laststart to b + 3, which will be the
   1935 	       end of the buffer after this jump is inserted.  */
   1936 	    GET_BUFFER_SPACE (3);
   1937 	    INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
   1938 				       : on_failure_jump,
   1939 			 laststart, b + 3);
   1940 	    pending_exact = 0;
   1941 	    b += 3;
   1942 
   1943 	    if (!zero_times_ok)
   1944 	      {
   1945 		/* At least one repetition is required, so insert a
   1946 		   `dummy_failure_jump' before the initial
   1947 		   `on_failure_jump' instruction of the loop. This
   1948 		   effects a skip over that instruction the first time
   1949 		   we hit that loop.  */
   1950 		GET_BUFFER_SPACE (3);
   1951 		INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
   1952 		b += 3;
   1953 	      }
   1954 	    }
   1955 	  break;
   1956 
   1957 
   1958 	case '.':
   1959 	  laststart = b;
   1960 	  BUF_PUSH (anychar);
   1961 	  break;
   1962 
   1963 
   1964 	case '[':
   1965 	  {
   1966 	    boolean had_char_class = false;
   1967 
   1968 	    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   1969 
   1970 	    /* Ensure that we have enough space to push a charset: the
   1971 	       opcode, the length count, and the bitset; 34 bytes in all.  */
   1972 	    GET_BUFFER_SPACE (34);
   1973 
   1974 	    laststart = b;
   1975 
   1976 	    /* We test `*p == '^' twice, instead of using an if
   1977 	       statement, so we only need one BUF_PUSH.	 */
   1978 	    BUF_PUSH (*p == '^' ? charset_not : charset);
   1979 	    if (*p == '^')
   1980 	      p++;
   1981 
   1982 	    /* Remember the first position in the bracket expression.  */
   1983 	    p1 = p;
   1984 
   1985 	    /* Push the number of bytes in the bitmap.	*/
   1986 	    BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
   1987 
   1988 	    /* Clear the whole map.  */
   1989 	    bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
   1990 
   1991 	    /* charset_not matches newline according to a syntax bit.  */
   1992 	    if ((re_opcode_t) b[-2] == charset_not
   1993 		&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
   1994 	      SET_LIST_BIT ('\n');
   1995 
   1996 	    /* Read in characters and ranges, setting map bits.	 */
   1997 	    for (;;)
   1998 	      {
   1999 		if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2000 
   2001 		PATFETCH (c);
   2002 
   2003 		/* \ might escape characters inside [...] and [^...].  */
   2004 		if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
   2005 		  {
   2006 		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   2007 
   2008 		    PATFETCH (c1);
   2009 		    SET_LIST_BIT (c1);
   2010 		    continue;
   2011 		  }
   2012 
   2013 		/* Could be the end of the bracket expression.	If it's
   2014 		   not (i.e., when the bracket expression is `[]' so
   2015 		   far), the ']' character bit gets set way below.  */
   2016 		if (c == ']' && p != p1 + 1)
   2017 		  break;
   2018 
   2019 		/* Look ahead to see if it's a range when the last thing
   2020 		   was a character class.  */
   2021 		if (had_char_class && c == '-' && *p != ']')
   2022 		  FREE_STACK_RETURN (REG_ERANGE);
   2023 
   2024 		/* Look ahead to see if it's a range when the last thing
   2025 		   was a character: if this is a hyphen not at the
   2026 		   beginning or the end of a list, then it's the range
   2027 		   operator.  */
   2028 		if (c == '-'
   2029 		    && !(p - 2 >= pattern && p[-2] == '[')
   2030 		    && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
   2031 		    && *p != ']')
   2032 		  {
   2033 		    reg_errcode_t ret
   2034 		      = compile_range (&p, pend, translate, syntax, b);
   2035 		    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   2036 		  }
   2037 
   2038 		else if (p[0] == '-' && p[1] != ']')
   2039 		  { /* This handles ranges made up of characters only.	*/
   2040 		    reg_errcode_t ret;
   2041 
   2042 		    /* Move past the `-'.  */
   2043 		    PATFETCH (c1);
   2044 
   2045 		    ret = compile_range (&p, pend, translate, syntax, b);
   2046 		    if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
   2047 		  }
   2048 
   2049 		/* See if we're at the beginning of a possible character
   2050 		   class.  */
   2051 
   2052 		else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
   2053 		  { /* Leave room for the null.	 */
   2054 		    char str[CHAR_CLASS_MAX_LENGTH + 1];
   2055 
   2056 		    PATFETCH (c);
   2057 		    c1 = 0;
   2058 
   2059 		    /* If pattern is `[[:'.  */
   2060 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2061 
   2062 		    for (;;)
   2063 		      {
   2064 			PATFETCH (c);
   2065 			if (c == ':' || c == ']' || p == pend
   2066 			    || c1 == CHAR_CLASS_MAX_LENGTH)
   2067 			  break;
   2068 			str[c1++] = c;
   2069 		      }
   2070 		    str[c1] = '\0';
   2071 
   2072 		    /* If isn't a word bracketed by `[:' and:`]':
   2073 		       undo the ending character, the letters, and leave
   2074 		       the leading `:' and `[' (but set bits for them).	 */
   2075 		    if (c == ':' && *p == ']')
   2076 		      {
   2077 			int ch;
   2078 			boolean is_alnum = STREQ (str, "alnum");
   2079 			boolean is_alpha = STREQ (str, "alpha");
   2080 			boolean is_blank = STREQ (str, "blank");
   2081 			boolean is_cntrl = STREQ (str, "cntrl");
   2082 			boolean is_digit = STREQ (str, "digit");
   2083 			boolean is_graph = STREQ (str, "graph");
   2084 			boolean is_lower = STREQ (str, "lower");
   2085 			boolean is_print = STREQ (str, "print");
   2086 			boolean is_punct = STREQ (str, "punct");
   2087 			boolean is_space = STREQ (str, "space");
   2088 			boolean is_upper = STREQ (str, "upper");
   2089 			boolean is_xdigit = STREQ (str, "xdigit");
   2090 
   2091 			if (!IS_CHAR_CLASS (str))
   2092 			  FREE_STACK_RETURN (REG_ECTYPE);
   2093 
   2094 			/* Throw away the ] at the end of the character
   2095 			   class.  */
   2096 			PATFETCH (c);
   2097 
   2098 			if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
   2099 
   2100 			for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
   2101 			  {
   2102 			    int translated = TRANSLATE (ch);
   2103 			    /* This was split into 3 if's to
   2104 			       avoid an arbitrary limit in some compiler.  */
   2105 			    if (   (is_alnum  && ISALNUM (ch))
   2106 				|| (is_alpha  && ISALPHA (ch))
   2107 				|| (is_blank  && ISBLANK (ch))
   2108 				|| (is_cntrl  && ISCNTRL (ch)))
   2109 			      SET_LIST_BIT (translated);
   2110 			    if (   (is_digit  && ISDIGIT (ch))
   2111 				|| (is_graph  && ISGRAPH (ch))
   2112 				|| (is_lower  && ISLOWER (ch))
   2113 				|| (is_print  && ISPRINT (ch)))
   2114 			      SET_LIST_BIT (translated);
   2115 			    if (   (is_punct  && ISPUNCT (ch))
   2116 				|| (is_space  && ISSPACE (ch))
   2117 				|| (is_upper  && ISUPPER (ch))
   2118 				|| (is_xdigit && ISXDIGIT (ch)))
   2119 			      SET_LIST_BIT (translated);
   2120 			  }
   2121 			had_char_class = true;
   2122 		      }
   2123 		    else
   2124 		      {
   2125 			c1++;
   2126 			while (c1--)
   2127 			  PATUNFETCH;
   2128 			SET_LIST_BIT ('[');
   2129 			SET_LIST_BIT (':');
   2130 			had_char_class = false;
   2131 		      }
   2132 		  }
   2133 		else
   2134 		  {
   2135 		    had_char_class = false;
   2136 		    SET_LIST_BIT (c);
   2137 		  }
   2138 	      }
   2139 
   2140 	    /* Discard any (non)matching list bytes that are all 0 at the
   2141 	       end of the map.	Decrease the map-length byte too.  */
   2142 	    while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
   2143 	      b[-1]--;
   2144 	    b += b[-1];
   2145 	  }
   2146 	  break;
   2147 
   2148 
   2149 	case '(':
   2150 	  if (syntax & RE_NO_BK_PARENS)
   2151 	    goto handle_open;
   2152 	  else
   2153 	    goto normal_char;
   2154 
   2155 
   2156 	case ')':
   2157 	  if (syntax & RE_NO_BK_PARENS)
   2158 	    goto handle_close;
   2159 	  else
   2160 	    goto normal_char;
   2161 
   2162 
   2163 	case '\n':
   2164 	  if (syntax & RE_NEWLINE_ALT)
   2165 	    goto handle_alt;
   2166 	  else
   2167 	    goto normal_char;
   2168 
   2169 
   2170 	case '|':
   2171 	  if (syntax & RE_NO_BK_VBAR)
   2172 	    goto handle_alt;
   2173 	  else
   2174 	    goto normal_char;
   2175 
   2176 
   2177 	case '{':
   2178 	   if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
   2179 	     goto handle_interval;
   2180 	   else
   2181 	     goto normal_char;
   2182 
   2183 
   2184 	case '\\':
   2185 	  if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
   2186 
   2187 	  /* Do not translate the character after the \, so that we can
   2188 	     distinguish, e.g., \B from \b, even if we normally would
   2189 	     translate, e.g., B to b.  */
   2190 	  PATFETCH_RAW (c);
   2191 
   2192 	  switch (c)
   2193 	    {
   2194 	    case '(':
   2195 	      if (syntax & RE_NO_BK_PARENS)
   2196 		goto normal_backslash;
   2197 
   2198 	    handle_open:
   2199 	      bufp->re_nsub++;
   2200 	      regnum++;
   2201 
   2202 	      if (COMPILE_STACK_FULL)
   2203 		{
   2204 		  RETALLOC (compile_stack.stack, compile_stack.size << 1,
   2205 			    compile_stack_elt_t);
   2206 		  if (compile_stack.stack == NULL) return REG_ESPACE;
   2207 
   2208 		  compile_stack.size <<= 1;
   2209 		}
   2210 
   2211 	      /* These are the values to restore when we hit end of this
   2212 		 group.	 They are all relative offsets, so that if the
   2213 		 whole pattern moves because of realloc, they will still
   2214 		 be valid.  */
   2215 	      COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
   2216 	      COMPILE_STACK_TOP.fixup_alt_jump
   2217 		= fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
   2218 	      COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
   2219 	      COMPILE_STACK_TOP.regnum = regnum;
   2220 
   2221 	      /* We will eventually replace the 0 with the number of
   2222 		 groups inner to this one.  But do not push a
   2223 		 start_memory for groups beyond the last one we can
   2224 		 represent in the compiled pattern.  */
   2225 	      if (regnum <= MAX_REGNUM)
   2226 		{
   2227 		  COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
   2228 		  BUF_PUSH_3 (start_memory, regnum, 0);
   2229 		}
   2230 
   2231 	      compile_stack.avail++;
   2232 
   2233 	      fixup_alt_jump = 0;
   2234 	      laststart = 0;
   2235 	      begalt = b;
   2236 	      /* If we've reached MAX_REGNUM groups, then this open
   2237 		 won't actually generate any code, so we'll have to
   2238 		 clear pending_exact explicitly.  */
   2239 	      pending_exact = 0;
   2240 	      break;
   2241 
   2242 
   2243 	    case ')':
   2244 	      if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
   2245 
   2246 	      if (COMPILE_STACK_EMPTY)
   2247 	      {
   2248 		if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
   2249 		  goto normal_backslash;
   2250 		else
   2251 		  FREE_STACK_RETURN (REG_ERPAREN);
   2252 	      }
   2253 
   2254 	    handle_close:
   2255 	      if (fixup_alt_jump)
   2256 		{ /* Push a dummy failure point at the end of the
   2257 		     alternative for a possible future
   2258 		     `pop_failure_jump' to pop.	 See comments at
   2259 		     `push_dummy_failure' in `re_match_2'.  */
   2260 		  BUF_PUSH (push_dummy_failure);
   2261 
   2262 		  /* We allocated space for this jump when we assigned
   2263 		     to `fixup_alt_jump', in the `handle_alt' case below.  */
   2264 		  STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
   2265 		}
   2266 
   2267 	      /* See similar code for backslashed left paren above.  */
   2268 	      if (COMPILE_STACK_EMPTY)
   2269 	      {
   2270 		if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
   2271 		  goto normal_char;
   2272 		else
   2273 		  FREE_STACK_RETURN (REG_ERPAREN);
   2274 	      }
   2275 
   2276 	      /* Since we just checked for an empty stack above, this
   2277 		 ``can't happen''.  */
   2278 	      assert (compile_stack.avail != 0);
   2279 	      {
   2280 		/* We don't just want to restore into `regnum', because
   2281 		   later groups should continue to be numbered higher,
   2282 		   as in `(ab)c(de)' -- the second group is #2.	 */
   2283 		regnum_t this_group_regnum;
   2284 
   2285 		compile_stack.avail--;
   2286 		begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
   2287 		fixup_alt_jump
   2288 		  = COMPILE_STACK_TOP.fixup_alt_jump
   2289 		    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
   2290 		    : 0;
   2291 		laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
   2292 		this_group_regnum = COMPILE_STACK_TOP.regnum;
   2293 		/* If we've reached MAX_REGNUM groups, then this open
   2294 		   won't actually generate any code, so we'll have to
   2295 		   clear pending_exact explicitly.  */
   2296 		pending_exact = 0;
   2297 
   2298 		/* We're at the end of the group, so now we know how many
   2299 		   groups were inside this one.	 */
   2300 		if (this_group_regnum <= MAX_REGNUM)
   2301 		  {
   2302 		    unsigned char *inner_group_loc
   2303 		      = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
   2304 
   2305 		    *inner_group_loc = regnum - this_group_regnum;
   2306 		    BUF_PUSH_3 (stop_memory, this_group_regnum,
   2307 				regnum - this_group_regnum);
   2308 		  }
   2309 	      }
   2310 	      break;
   2311 
   2312 
   2313 	    case '|':					/* `\|'.  */
   2314 	      if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
   2315 		goto normal_backslash;
   2316 	    handle_alt:
   2317 	      if (syntax & RE_LIMITED_OPS)
   2318 		goto normal_char;
   2319 
   2320 	      /* Insert before the previous alternative a jump which
   2321 		 jumps to this alternative if the former fails.	 */
   2322 	      GET_BUFFER_SPACE (3);
   2323 	      INSERT_JUMP (on_failure_jump, begalt, b + 6);
   2324 	      pending_exact = 0;
   2325 	      b += 3;
   2326 
   2327 	      /* The alternative before this one has a jump after it
   2328 		 which gets executed if it gets matched.  Adjust that
   2329 		 jump so it will jump to this alternative's analogous
   2330 		 jump (put in below, which in turn will jump to the next
   2331 		 (if any) alternative's such jump, etc.).  The last such
   2332 		 jump jumps to the correct final destination.  A picture:
   2333 			  _____ _____
   2334 			  |   | |   |
   2335 			  |   v |   v
   2336 			 a | b	 | c
   2337 
   2338 		 If we are at `b', then fixup_alt_jump right now points to a
   2339 		 three-byte space after `a'.  We'll put in the jump, set
   2340 		 fixup_alt_jump to right after `b', and leave behind three
   2341 		 bytes which we'll fill in when we get to after `c'.  */
   2342 
   2343 	      if (fixup_alt_jump)
   2344 		STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
   2345 
   2346 	      /* Mark and leave space for a jump after this alternative,
   2347 		 to be filled in later either by next alternative or
   2348 		 when know we're at the end of a series of alternatives.  */
   2349 	      fixup_alt_jump = b;
   2350 	      GET_BUFFER_SPACE (3);
   2351 	      b += 3;
   2352 
   2353 	      laststart = 0;
   2354 	      begalt = b;
   2355 	      break;
   2356 
   2357 
   2358 	    case '{':
   2359 	      /* If \{ is a literal.  */
   2360 	      if (!(syntax & RE_INTERVALS)
   2361 		     /* If we're at `\{' and it's not the open-interval
   2362 			operator.  */
   2363 		  || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
   2364 		  || (p - 2 == pattern	&&  p == pend))
   2365 		goto normal_backslash;
   2366 
   2367 	    handle_interval:
   2368 	      {
   2369 		/* If got here, then the syntax allows intervals.  */
   2370 
   2371 		/* At least (most) this many matches must be made.  */
   2372 		int lower_bound = -1, upper_bound = -1;
   2373 
   2374 		beg_interval = p - 1;
   2375 
   2376 		if (p == pend)
   2377 		  {
   2378 		    if (syntax & RE_NO_BK_BRACES)
   2379 		      goto unfetch_interval;
   2380 		    else
   2381 		      FREE_STACK_RETURN (REG_EBRACE);
   2382 		  }
   2383 
   2384 		GET_UNSIGNED_NUMBER (lower_bound);
   2385 
   2386 		if (c == ',')
   2387 		  {
   2388 		    GET_UNSIGNED_NUMBER (upper_bound);
   2389 		    if (upper_bound < 0) upper_bound = RE_DUP_MAX;
   2390 		  }
   2391 		else
   2392 		  /* Interval such as `{1}' => match exactly once. */
   2393 		  upper_bound = lower_bound;
   2394 
   2395 		if (lower_bound < 0 || upper_bound > RE_DUP_MAX
   2396 		    || lower_bound > upper_bound)
   2397 		  {
   2398 		    if (syntax & RE_NO_BK_BRACES)
   2399 		      goto unfetch_interval;
   2400 		    else
   2401 		      FREE_STACK_RETURN (REG_BADBR);
   2402 		  }
   2403 
   2404 		if (!(syntax & RE_NO_BK_BRACES))
   2405 		  {
   2406 		    if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
   2407 
   2408 		    PATFETCH (c);
   2409 		  }
   2410 
   2411 		if (c != '}')
   2412 		  {
   2413 		    if (syntax & RE_NO_BK_BRACES)
   2414 		      goto unfetch_interval;
   2415 		    else
   2416 		      FREE_STACK_RETURN (REG_BADBR);
   2417 		  }
   2418 
   2419 		/* We just parsed a valid interval.  */
   2420 
   2421 		/* If it's invalid to have no preceding re.  */
   2422 		if (!laststart)
   2423 		  {
   2424 		    if (syntax & RE_CONTEXT_INVALID_OPS)
   2425 		      FREE_STACK_RETURN (REG_BADRPT);
   2426 		    else if (syntax & RE_CONTEXT_INDEP_OPS)
   2427 		      laststart = b;
   2428 		    else
   2429 		      goto unfetch_interval;
   2430 		  }
   2431 
   2432 		/* If the upper bound is zero, don't want to succeed at
   2433 		   all; jump from `laststart' to `b + 3', which will be
   2434 		   the end of the buffer after we insert the jump.  */
   2435 		 if (upper_bound == 0)
   2436 		   {
   2437 		     GET_BUFFER_SPACE (3);
   2438 		     INSERT_JUMP (jump, laststart, b + 3);
   2439 		     b += 3;
   2440 		   }
   2441 
   2442 		 /* Otherwise, we have a nontrivial interval.  When
   2443 		    we're all done, the pattern will look like:
   2444 		      set_number_at <jump count> <upper bound>
   2445 		      set_number_at <succeed_n count> <lower bound>
   2446 		      succeed_n <after jump addr> <succeed_n count>
   2447 		      <body of loop>
   2448 		      jump_n <succeed_n addr> <jump count>
   2449 		    (The upper bound and `jump_n' are omitted if
   2450 		    `upper_bound' is 1, though.)  */
   2451 		 else
   2452 		   { /* If the upper bound is > 1, we need to insert
   2453 			more at the end of the loop.  */
   2454 		     unsigned nbytes = 10 + (upper_bound > 1) * 10;
   2455 
   2456 		     GET_BUFFER_SPACE (nbytes);
   2457 
   2458 		     /* Initialize lower bound of the `succeed_n', even
   2459 			though it will be set during matching by its
   2460 			attendant `set_number_at' (inserted next),
   2461 			because `re_compile_fastmap' needs to know.
   2462 			Jump to the `jump_n' we might insert below.  */
   2463 		     INSERT_JUMP2 (succeed_n, laststart,
   2464 				   b + 5 + (upper_bound > 1) * 5,
   2465 				   lower_bound);
   2466 		     b += 5;
   2467 
   2468 		     /* Code to initialize the lower bound.  Insert
   2469 			before the `succeed_n'.	 The `5' is the last two
   2470 			bytes of this `set_number_at', plus 3 bytes of
   2471 			the following `succeed_n'.  */
   2472 		     insert_op2 (set_number_at, laststart, 5, lower_bound, b);
   2473 		     b += 5;
   2474 
   2475 		     if (upper_bound > 1)
   2476 		       { /* More than one repetition is allowed, so
   2477 			    append a backward jump to the `succeed_n'
   2478 			    that starts this interval.
   2479 
   2480 			    When we've reached this during matching,
   2481 			    we'll have matched the interval once, so
   2482 			    jump back only `upper_bound - 1' times.  */
   2483 			 STORE_JUMP2 (jump_n, b, laststart + 5,
   2484 				      upper_bound - 1);
   2485 			 b += 5;
   2486 
   2487 			 /* The location we want to set is the second
   2488 			    parameter of the `jump_n'; that is `b-2' as
   2489 			    an absolute address.  `laststart' will be
   2490 			    the `set_number_at' we're about to insert;
   2491 			    `laststart+3' the number to set, the source
   2492 			    for the relative address.  But we are
   2493 			    inserting into the middle of the pattern --
   2494 			    so everything is getting moved up by 5.
   2495 			    Conclusion: (b - 2) - (laststart + 3) + 5,
   2496 			    i.e., b - laststart.
   2497 
   2498 			    We insert this at the beginning of the loop
   2499 			    so that if we fail during matching, we'll
   2500 			    reinitialize the bounds.  */
   2501 			 insert_op2 (set_number_at, laststart, b - laststart,
   2502 				     upper_bound - 1, b);
   2503 			 b += 5;
   2504 		       }
   2505 		   }
   2506 		pending_exact = 0;
   2507 		beg_interval = NULL;
   2508 	      }
   2509 	      break;
   2510 
   2511 	    unfetch_interval:
   2512 	      /* If an invalid interval, match the characters as literals.  */
   2513 	       assert (beg_interval);
   2514 	       p = beg_interval;
   2515 	       beg_interval = NULL;
   2516 
   2517 	       /* normal_char and normal_backslash need `c'.  */
   2518 	       PATFETCH (c);
   2519 
   2520 	       if (!(syntax & RE_NO_BK_BRACES))
   2521 		 {
   2522 		   if (p > pattern  &&	p[-1] == '\\')
   2523 		     goto normal_backslash;
   2524 		 }
   2525 	       goto normal_char;
   2526 
   2527 #ifdef emacs
   2528 	    /* There is no way to specify the before_dot and after_dot
   2529 	       operators.  rms says this is ok.	 --karl	 */
   2530 	    case '=':
   2531 	      BUF_PUSH (at_dot);
   2532 	      break;
   2533 
   2534 	    case 's':
   2535 	      laststart = b;
   2536 	      PATFETCH (c);
   2537 	      BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
   2538 	      break;
   2539 
   2540 	    case 'S':
   2541 	      laststart = b;
   2542 	      PATFETCH (c);
   2543 	      BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
   2544 	      break;
   2545 #endif /* emacs */
   2546 
   2547 
   2548 	    case 'w':
   2549 	      laststart = b;
   2550 	      BUF_PUSH (wordchar);
   2551 	      break;
   2552 
   2553 
   2554 	    case 'W':
   2555 	      laststart = b;
   2556 	      BUF_PUSH (notwordchar);
   2557 	      break;
   2558 
   2559 
   2560 	    case '<':
   2561 	      BUF_PUSH (wordbeg);
   2562 	      break;
   2563 
   2564 	    case '>':
   2565 	      BUF_PUSH (wordend);
   2566 	      break;
   2567 
   2568 	    case 'b':
   2569 	      BUF_PUSH (wordbound);
   2570 	      break;
   2571 
   2572 	    case 'B':
   2573 	      BUF_PUSH (notwordbound);
   2574 	      break;
   2575 
   2576 	    case '`':
   2577 	      BUF_PUSH (begbuf);
   2578 	      break;
   2579 
   2580 	    case '\'':
   2581 	      BUF_PUSH (endbuf);
   2582 	      break;
   2583 
   2584 	    case '1': case '2': case '3': case '4': case '5':
   2585 	    case '6': case '7': case '8': case '9':
   2586 	      if (syntax & RE_NO_BK_REFS)
   2587 		goto normal_char;
   2588 
   2589 	      c1 = c - '0';
   2590 
   2591 	      if (c1 > regnum)
   2592 		FREE_STACK_RETURN (REG_ESUBREG);
   2593 
   2594 	      /* Can't back reference to a subexpression if inside of it.  */
   2595 	      if (group_in_compile_stack (compile_stack, c1))
   2596 		goto normal_char;
   2597 
   2598 	      laststart = b;
   2599 	      BUF_PUSH_2 (duplicate, c1);
   2600 	      break;
   2601 
   2602 
   2603 	    case '+':
   2604 	    case '?':
   2605 	      if (syntax & RE_BK_PLUS_QM)
   2606 		goto handle_plus;
   2607 	      else
   2608 		goto normal_backslash;
   2609 
   2610 	    default:
   2611 	    normal_backslash:
   2612 	      /* You might think it would be useful for \ to mean
   2613 		 not to translate; but if we don't translate it
   2614 		 it will never match anything.	*/
   2615 	      c = TRANSLATE (c);
   2616 	      goto normal_char;
   2617 	    }
   2618 	  break;
   2619 
   2620 
   2621 	default:
   2622 	/* Expects the character in `c'.  */
   2623 	normal_char:
   2624 	      /* If no exactn currently being built.  */
   2625 	  if (!pending_exact
   2626 
   2627 	      /* If last exactn not at current position.  */
   2628 	      || pending_exact + *pending_exact + 1 != b
   2629 
   2630 	      /* We have only one byte following the exactn for the count.  */
   2631 	      || *pending_exact == (1 << BYTEWIDTH) - 1
   2632 
   2633 	      /* If followed by a repetition operator.	*/
   2634 	      || *p == '*' || *p == '^'
   2635 	      || ((syntax & RE_BK_PLUS_QM)
   2636 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
   2637 		  : (*p == '+' || *p == '?'))
   2638 	      || ((syntax & RE_INTERVALS)
   2639 		  && ((syntax & RE_NO_BK_BRACES)
   2640 		      ? *p == '{'
   2641 		      : (p[0] == '\\' && p[1] == '{'))))
   2642 	    {
   2643 	      /* Start building a new exactn.  */
   2644 
   2645 	      laststart = b;
   2646 
   2647 	      BUF_PUSH_2 (exactn, 0);
   2648 	      pending_exact = b - 1;
   2649 	    }
   2650 
   2651 	  BUF_PUSH (c);
   2652 	  (*pending_exact)++;
   2653 	  break;
   2654 	} /* switch (c) */
   2655     } /* while p != pend */
   2656 
   2657 
   2658   /* Through the pattern now.  */
   2659 
   2660   if (fixup_alt_jump)
   2661     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
   2662 
   2663   if (!COMPILE_STACK_EMPTY)
   2664     FREE_STACK_RETURN (REG_EPAREN);
   2665 
   2666   /* If we don't want backtracking, force success
   2667      the first time we reach the end of the compiled pattern.  */
   2668   if (syntax & RE_NO_POSIX_BACKTRACKING)
   2669     BUF_PUSH (succeed);
   2670 
   2671   free (compile_stack.stack);
   2672 
   2673   /* We have succeeded; set the length of the buffer.  */
   2674   bufp->used = b - bufp->buffer;
   2675 
   2676 #ifdef DEBUG
   2677   if (debug)
   2678     {
   2679       DEBUG_PRINT1 ("\nCompiled pattern: \n");
   2680       print_compiled_pattern (bufp);
   2681     }
   2682 #endif /* DEBUG */
   2683 
   2684 #ifndef MATCH_MAY_ALLOCATE
   2685   /* Initialize the failure stack to the largest possible stack.  This
   2686      isn't necessary unless we're trying to avoid calling alloca in
   2687      the search and match routines.  */
   2688   {
   2689     int num_regs = bufp->re_nsub + 1;
   2690 
   2691     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
   2692        is strictly greater than re_max_failures, the largest possible stack
   2693        is 2 * re_max_failures failure points.  */
   2694     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
   2695       {
   2696 	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
   2697 
   2698 #ifdef emacs
   2699 	if (! fail_stack.stack)
   2700 	  fail_stack.stack
   2701 	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
   2702 					    * sizeof (fail_stack_elt_t));
   2703 	else
   2704 	  fail_stack.stack
   2705 	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
   2706 					     (fail_stack.size
   2707 					      * sizeof (fail_stack_elt_t)));
   2708 #else /* not emacs */
   2709 	if (! fail_stack.stack)
   2710 	  fail_stack.stack
   2711 	    = (fail_stack_elt_t *) malloc (fail_stack.size
   2712 					   * sizeof (fail_stack_elt_t));
   2713 	else
   2714 	  fail_stack.stack
   2715 	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
   2716 					    (fail_stack.size
   2717 					     * sizeof (fail_stack_elt_t)));
   2718 #endif /* not emacs */
   2719       }
   2720 
   2721     regex_grow_registers (num_regs);
   2722   }
   2723 #endif /* not MATCH_MAY_ALLOCATE */
   2724 
   2725   return REG_NOERROR;
   2726 } /* regex_compile */
   2727 
   2728 /* Subroutines for `regex_compile'.  */
   2730 
   2731 /* Store OP at LOC followed by two-byte integer parameter ARG.	*/
   2732 
   2733 static void
   2734 store_op1 (op, loc, arg)
   2735     re_opcode_t op;
   2736     unsigned char *loc;
   2737     int arg;
   2738 {
   2739   *loc = (unsigned char) op;
   2740   STORE_NUMBER (loc + 1, arg);
   2741 }
   2742 
   2743 
   2744 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
   2745 
   2746 static void
   2747 store_op2 (op, loc, arg1, arg2)
   2748     re_opcode_t op;
   2749     unsigned char *loc;
   2750     int arg1, arg2;
   2751 {
   2752   *loc = (unsigned char) op;
   2753   STORE_NUMBER (loc + 1, arg1);
   2754   STORE_NUMBER (loc + 3, arg2);
   2755 }
   2756 
   2757 
   2758 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
   2759    for OP followed by two-byte integer parameter ARG.  */
   2760 
   2761 static void
   2762 insert_op1 (op, loc, arg, end)
   2763     re_opcode_t op;
   2764     unsigned char *loc;
   2765     int arg;
   2766     unsigned char *end;
   2767 {
   2768   register unsigned char *pfrom = end;
   2769   register unsigned char *pto = end + 3;
   2770 
   2771   while (pfrom != loc)
   2772     *--pto = *--pfrom;
   2773 
   2774   store_op1 (op, loc, arg);
   2775 }
   2776 
   2777 
   2778 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
   2779 
   2780 static void
   2781 insert_op2 (op, loc, arg1, arg2, end)
   2782     re_opcode_t op;
   2783     unsigned char *loc;
   2784     int arg1, arg2;
   2785     unsigned char *end;
   2786 {
   2787   register unsigned char *pfrom = end;
   2788   register unsigned char *pto = end + 5;
   2789 
   2790   while (pfrom != loc)
   2791     *--pto = *--pfrom;
   2792 
   2793   store_op2 (op, loc, arg1, arg2);
   2794 }
   2795 
   2796 
   2797 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
   2798    after an alternative or a begin-subexpression.  We assume there is at
   2799    least one character before the ^.  */
   2800 
   2801 static boolean
   2802 at_begline_loc_p (pattern, p, syntax)
   2803     const char *pattern, *p;
   2804     reg_syntax_t syntax;
   2805 {
   2806   const char *prev = p - 2;
   2807   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
   2808 
   2809   return
   2810        /* After a subexpression?  */
   2811        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
   2812        /* After an alternative?	 */
   2813     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
   2814 }
   2815 
   2816 
   2817 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
   2818    at least one character after the $, i.e., `P < PEND'.  */
   2819 
   2820 static boolean
   2821 at_endline_loc_p (p, pend, syntax)
   2822     const char *p, *pend;
   2823     int syntax;
   2824 {
   2825   const char *next = p;
   2826   boolean next_backslash = *next == '\\';
   2827   const char *next_next = p + 1 < pend ? p + 1 : 0;
   2828 
   2829   return
   2830        /* Before a subexpression?  */
   2831        (syntax & RE_NO_BK_PARENS ? *next == ')'
   2832 	: next_backslash && next_next && *next_next == ')')
   2833        /* Before an alternative?  */
   2834     || (syntax & RE_NO_BK_VBAR ? *next == '|'
   2835 	: next_backslash && next_next && *next_next == '|');
   2836 }
   2837 
   2838 
   2839 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
   2840    false if it's not.  */
   2841 
   2842 static boolean
   2843 group_in_compile_stack (compile_stack, regnum)
   2844     compile_stack_type compile_stack;
   2845     regnum_t regnum;
   2846 {
   2847   int this_element;
   2848 
   2849   for (this_element = compile_stack.avail - 1;
   2850        this_element >= 0;
   2851        this_element--)
   2852     if (compile_stack.stack[this_element].regnum == regnum)
   2853       return true;
   2854 
   2855   return false;
   2856 }
   2857 
   2858 
   2859 /* Read the ending character of a range (in a bracket expression) from the
   2860    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
   2861    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
   2862    Then we set the translation of all bits between the starting and
   2863    ending characters (inclusive) in the compiled pattern B.
   2864 
   2865    Return an error code.
   2866 
   2867    We use these short variable names so we can use the same macros as
   2868    `regex_compile' itself.  */
   2869 
   2870 static reg_errcode_t
   2871 compile_range (p_ptr, pend, translate, syntax, b)
   2872     const char **p_ptr, *pend;
   2873     RE_TRANSLATE_TYPE translate;
   2874     reg_syntax_t syntax;
   2875     unsigned char *b;
   2876 {
   2877   unsigned this_char;
   2878 
   2879   const char *p = *p_ptr;
   2880   int range_start, range_end;
   2881 
   2882   if (p == pend)
   2883     return REG_ERANGE;
   2884 
   2885   /* Even though the pattern is a signed `char *', we need to fetch
   2886      with unsigned char *'s; if the high bit of the pattern character
   2887      is set, the range endpoints will be negative if we fetch using a
   2888      signed char *.
   2889 
   2890      We also want to fetch the endpoints without translating them; the
   2891      appropriate translation is done in the bit-setting loop below.  */
   2892   /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
   2893   range_start = ((const unsigned char *) p)[-2];
   2894   range_end   = ((const unsigned char *) p)[0];
   2895 
   2896   /* Have to increment the pointer into the pattern string, so the
   2897      caller isn't still at the ending character.  */
   2898   (*p_ptr)++;
   2899 
   2900   /* If the start is after the end, the range is empty.	 */
   2901   if (range_start > range_end)
   2902     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
   2903 
   2904   /* Here we see why `this_char' has to be larger than an `unsigned
   2905      char' -- the range is inclusive, so if `range_end' == 0xff
   2906      (assuming 8-bit characters), we would otherwise go into an infinite
   2907      loop, since all characters <= 0xff.  */
   2908   for (this_char = range_start; this_char <= range_end; this_char++)
   2909     {
   2910       SET_LIST_BIT (TRANSLATE (this_char));
   2911     }
   2912 
   2913   return REG_NOERROR;
   2914 }
   2915 
   2916 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
   2918    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
   2919    characters can start a string that matches the pattern.  This fastmap
   2920    is used by re_search to skip quickly over impossible starting points.
   2921 
   2922    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
   2923    area as BUFP->fastmap.
   2924 
   2925    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
   2926    the pattern buffer.
   2927 
   2928    Returns 0 if we succeed, -2 if an internal error.   */
   2929 
   2930 int
   2931 re_compile_fastmap (bufp)
   2932      struct re_pattern_buffer *bufp;
   2933 {
   2934   int j, k;
   2935 #ifdef MATCH_MAY_ALLOCATE
   2936   fail_stack_type fail_stack;
   2937 #endif
   2938 #ifndef REGEX_MALLOC
   2939   char *destination;
   2940 #endif
   2941   /* We don't push any register information onto the failure stack.  */
   2942   unsigned num_regs = 0;
   2943 
   2944   register char *fastmap = bufp->fastmap;
   2945   unsigned char *pattern = bufp->buffer;
   2946   unsigned long size = bufp->used;
   2947   unsigned char *p = pattern;
   2948   register unsigned char *pend = pattern + size;
   2949 
   2950   /* This holds the pointer to the failure stack, when
   2951      it is allocated relocatably.  */
   2952 #ifdef REL_ALLOC
   2953   fail_stack_elt_t *failure_stack_ptr;
   2954 #endif
   2955 
   2956   /* Assume that each path through the pattern can be null until
   2957      proven otherwise.	We set this false at the bottom of switch
   2958      statement, to which we get only if a particular path doesn't
   2959      match the empty string.  */
   2960   boolean path_can_be_null = true;
   2961 
   2962   /* We aren't doing a `succeed_n' to begin with.  */
   2963   boolean succeed_n_p = false;
   2964 
   2965   assert (fastmap != NULL && p != NULL);
   2966 
   2967   INIT_FAIL_STACK ();
   2968   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.	*/
   2969   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
   2970   bufp->can_be_null = 0;
   2971 
   2972   while (1)
   2973     {
   2974       if (p == pend || *p == succeed)
   2975 	{
   2976 	  /* We have reached the (effective) end of pattern.  */
   2977 	  if (!FAIL_STACK_EMPTY ())
   2978 	    {
   2979 	      bufp->can_be_null |= path_can_be_null;
   2980 
   2981 	      /* Reset for next path.  */
   2982 	      path_can_be_null = true;
   2983 
   2984 	      p = fail_stack.stack[--fail_stack.avail].pointer;
   2985 
   2986 	      continue;
   2987 	    }
   2988 	  else
   2989 	    break;
   2990 	}
   2991 
   2992       /* We should never be about to go beyond the end of the pattern.	*/
   2993       assert (p < pend);
   2994 
   2995       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
   2996 	{
   2997 
   2998 	/* I guess the idea here is to simply not bother with a fastmap
   2999 	   if a backreference is used, since it's too hard to figure out
   3000 	   the fastmap for the corresponding group.  Setting
   3001 	   `can_be_null' stops `re_search_2' from using the fastmap, so
   3002 	   that is all we do.  */
   3003 	case duplicate:
   3004 	  bufp->can_be_null = 1;
   3005 	  goto done;
   3006 
   3007 
   3008       /* Following are the cases which match a character.  These end
   3009 	 with `break'.	*/
   3010 
   3011 	case exactn:
   3012 	  fastmap[p[1]] = 1;
   3013 	  break;
   3014 
   3015 
   3016 	case charset:
   3017 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
   3018 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
   3019 	      fastmap[j] = 1;
   3020 	  break;
   3021 
   3022 
   3023 	case charset_not:
   3024 	  /* Chars beyond end of map must be allowed.  */
   3025 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
   3026 	    fastmap[j] = 1;
   3027 
   3028 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
   3029 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
   3030 	      fastmap[j] = 1;
   3031 	  break;
   3032 
   3033 
   3034 	case wordchar:
   3035 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   3036 	    if (SYNTAX (j) == Sword)
   3037 	      fastmap[j] = 1;
   3038 	  break;
   3039 
   3040 
   3041 	case notwordchar:
   3042 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   3043 	    if (SYNTAX (j) != Sword)
   3044 	      fastmap[j] = 1;
   3045 	  break;
   3046 
   3047 
   3048 	case anychar:
   3049 	  {
   3050 	    int fastmap_newline = fastmap['\n'];
   3051 
   3052 	    /* `.' matches anything ...	 */
   3053 	    for (j = 0; j < (1 << BYTEWIDTH); j++)
   3054 	      fastmap[j] = 1;
   3055 
   3056 	    /* ... except perhaps newline.  */
   3057 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
   3058 	      fastmap['\n'] = fastmap_newline;
   3059 
   3060 	    /* Return if we have already set `can_be_null'; if we have,
   3061 	       then the fastmap is irrelevant.	Something's wrong here.	 */
   3062 	    else if (bufp->can_be_null)
   3063 	      goto done;
   3064 
   3065 	    /* Otherwise, have to check alternative paths.  */
   3066 	    break;
   3067 	  }
   3068 
   3069 #ifdef emacs
   3070 	case syntaxspec:
   3071 	  k = *p++;
   3072 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   3073 	    if (SYNTAX (j) == (enum syntaxcode) k)
   3074 	      fastmap[j] = 1;
   3075 	  break;
   3076 
   3077 
   3078 	case notsyntaxspec:
   3079 	  k = *p++;
   3080 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
   3081 	    if (SYNTAX (j) != (enum syntaxcode) k)
   3082 	      fastmap[j] = 1;
   3083 	  break;
   3084 
   3085 
   3086       /* All cases after this match the empty string.  These end with
   3087 	 `continue'.  */
   3088 
   3089 
   3090 	case before_dot:
   3091 	case at_dot:
   3092 	case after_dot:
   3093 	  continue;
   3094 #endif /* emacs */
   3095 
   3096 
   3097 	case no_op:
   3098 	case begline:
   3099 	case endline:
   3100 	case begbuf:
   3101 	case endbuf:
   3102 	case wordbound:
   3103 	case notwordbound:
   3104 	case wordbeg:
   3105 	case wordend:
   3106 	case push_dummy_failure:
   3107 	  continue;
   3108 
   3109 
   3110 	case jump_n:
   3111 	case pop_failure_jump:
   3112 	case maybe_pop_jump:
   3113 	case jump:
   3114 	case jump_past_alt:
   3115 	case dummy_failure_jump:
   3116 	  EXTRACT_NUMBER_AND_INCR (j, p);
   3117 	  p += j;
   3118 	  if (j > 0)
   3119 	    continue;
   3120 
   3121 	  /* Jump backward implies we just went through the body of a
   3122 	     loop and matched nothing.	Opcode jumped to should be
   3123 	     `on_failure_jump' or `succeed_n'.	Just treat it like an
   3124 	     ordinary jump.  For a * loop, it has pushed its failure
   3125 	     point already; if so, discard that as redundant.  */
   3126 	  if ((re_opcode_t) *p != on_failure_jump
   3127 	      && (re_opcode_t) *p != succeed_n)
   3128 	    continue;
   3129 
   3130 	  p++;
   3131 	  EXTRACT_NUMBER_AND_INCR (j, p);
   3132 	  p += j;
   3133 
   3134 	  /* If what's on the stack is where we are now, pop it.  */
   3135 	  if (!FAIL_STACK_EMPTY ()
   3136 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
   3137 	    fail_stack.avail--;
   3138 
   3139 	  continue;
   3140 
   3141 
   3142 	case on_failure_jump:
   3143 	case on_failure_keep_string_jump:
   3144 	handle_on_failure_jump:
   3145 	  EXTRACT_NUMBER_AND_INCR (j, p);
   3146 
   3147 	  /* For some patterns, e.g., `(a?)?', `p+j' here points to the
   3148 	     end of the pattern.  We don't want to push such a point,
   3149 	     since when we restore it above, entering the switch will
   3150 	     increment `p' past the end of the pattern.	 We don't need
   3151 	     to push such a point since we obviously won't find any more
   3152 	     fastmap entries beyond `pend'.  Such a pattern can match
   3153 	     the null string, though.  */
   3154 	  if (p + j < pend)
   3155 	    {
   3156 	      if (!PUSH_PATTERN_OP (p + j, fail_stack))
   3157 		{
   3158 		  RESET_FAIL_STACK ();
   3159 		  return -2;
   3160 		}
   3161 	    }
   3162 	  else
   3163 	    bufp->can_be_null = 1;
   3164 
   3165 	  if (succeed_n_p)
   3166 	    {
   3167 	      EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.	*/
   3168 	      succeed_n_p = false;
   3169 	    }
   3170 
   3171 	  continue;
   3172 
   3173 
   3174 	case succeed_n:
   3175 	  /* Get to the number of times to succeed.  */
   3176 	  p += 2;
   3177 
   3178 	  /* Increment p past the n for when k != 0.  */
   3179 	  EXTRACT_NUMBER_AND_INCR (k, p);
   3180 	  if (k == 0)
   3181 	    {
   3182 	      p -= 4;
   3183 	      succeed_n_p = true;  /* Spaghetti code alert.  */
   3184 	      goto handle_on_failure_jump;
   3185 	    }
   3186 	  continue;
   3187 
   3188 
   3189 	case set_number_at:
   3190 	  p += 4;
   3191 	  continue;
   3192 
   3193 
   3194 	case start_memory:
   3195 	case stop_memory:
   3196 	  p += 2;
   3197 	  continue;
   3198 
   3199 
   3200 	default:
   3201 	  abort (); /* We have listed all the cases.  */
   3202 	} /* switch *p++ */
   3203 
   3204       /* Getting here means we have found the possible starting
   3205 	 characters for one path of the pattern -- and that the empty
   3206 	 string does not match.	 We need not follow this path further.
   3207 	 Instead, look at the next alternative (remembered on the
   3208 	 stack), or quit if no more.  The test at the top of the loop
   3209 	 does these things.  */
   3210       path_can_be_null = false;
   3211       p = pend;
   3212     } /* while p */
   3213 
   3214   /* Set `can_be_null' for the last path (also the first path, if the
   3215      pattern is empty).	 */
   3216   bufp->can_be_null |= path_can_be_null;
   3217 
   3218  done:
   3219   RESET_FAIL_STACK ();
   3220   return 0;
   3221 } /* re_compile_fastmap */
   3222 
   3223 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
   3225    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
   3226    this memory for recording register information.  STARTS and ENDS
   3227    must be allocated using the malloc library routine, and must each
   3228    be at least NUM_REGS * sizeof (regoff_t) bytes long.
   3229 
   3230    If NUM_REGS == 0, then subsequent matches should allocate their own
   3231    register data.
   3232 
   3233    Unless this function is called, the first search or match using
   3234    PATTERN_BUFFER will allocate its own register data, without
   3235    freeing the old data.  */
   3236 
   3237 void
   3238 re_set_registers (bufp, regs, num_regs, starts, ends)
   3239     struct re_pattern_buffer *bufp;
   3240     struct re_registers *regs;
   3241     unsigned num_regs;
   3242     regoff_t *starts, *ends;
   3243 {
   3244   if (num_regs)
   3245     {
   3246       bufp->regs_allocated = REGS_REALLOCATE;
   3247       regs->num_regs = num_regs;
   3248       regs->start = starts;
   3249       regs->end = ends;
   3250     }
   3251   else
   3252     {
   3253       bufp->regs_allocated = REGS_UNALLOCATED;
   3254       regs->num_regs = 0;
   3255       regs->start = regs->end = (regoff_t *) 0;
   3256     }
   3257 }
   3258 
   3259 /* Searching routines.	*/
   3261 
   3262 /* Like re_search_2, below, but only one string is specified, and
   3263    doesn't let you say where to stop matching. */
   3264 
   3265 int
   3266 re_search (bufp, string, size, startpos, range, regs)
   3267      struct re_pattern_buffer *bufp;
   3268      const char *string;
   3269      int size, startpos, range;
   3270      struct re_registers *regs;
   3271 {
   3272   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
   3273 		      regs, size);
   3274 }
   3275 
   3276 
   3277 /* Using the compiled pattern in BUFP->buffer, first tries to match the
   3278    virtual concatenation of STRING1 and STRING2, starting first at index
   3279    STARTPOS, then at STARTPOS + 1, and so on.
   3280 
   3281    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
   3282 
   3283    RANGE is how far to scan while trying to match.  RANGE = 0 means try
   3284    only at STARTPOS; in general, the last start tried is STARTPOS +
   3285    RANGE.
   3286 
   3287    In REGS, return the indices of the virtual concatenation of STRING1
   3288    and STRING2 that matched the entire BUFP->buffer and its contained
   3289    subexpressions.
   3290 
   3291    Do not consider matching one past the index STOP in the virtual
   3292    concatenation of STRING1 and STRING2.
   3293 
   3294    We return either the position in the strings at which the match was
   3295    found, -1 if no match, or -2 if error (such as failure
   3296    stack overflow).  */
   3297 
   3298 int
   3299 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
   3300      struct re_pattern_buffer *bufp;
   3301      const char *string1, *string2;
   3302      int size1, size2;
   3303      int startpos;
   3304      int range;
   3305      struct re_registers *regs;
   3306      int stop;
   3307 {
   3308   int val;
   3309   register char *fastmap = bufp->fastmap;
   3310   register RE_TRANSLATE_TYPE translate = bufp->translate;
   3311   int total_size = size1 + size2;
   3312   int endpos = startpos + range;
   3313   int anchored_start = 0;
   3314 
   3315   /* Check for out-of-range STARTPOS.  */
   3316   if (startpos < 0 || startpos > total_size)
   3317     return -1;
   3318 
   3319   /* Fix up RANGE if it might eventually take us outside
   3320      the virtual concatenation of STRING1 and STRING2.
   3321      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
   3322   if (endpos < 0)
   3323     range = 0 - startpos;
   3324   else if (endpos > total_size)
   3325     range = total_size - startpos;
   3326 
   3327   /* If the search isn't to be a backwards one, don't waste time in a
   3328      search for a pattern that must be anchored.  */
   3329   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
   3330     {
   3331       if (startpos > 0)
   3332 	return -1;
   3333       else
   3334 	range = 1;
   3335     }
   3336 
   3337 #ifdef emacs
   3338   /* In a forward search for something that starts with \=.
   3339      don't keep searching past point.  */
   3340   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
   3341     {
   3342       range = PT - startpos;
   3343       if (range <= 0)
   3344 	return -1;
   3345     }
   3346 #endif /* emacs */
   3347 
   3348   /* Update the fastmap now if not correct already.  */
   3349   if (fastmap && !bufp->fastmap_accurate)
   3350     if (re_compile_fastmap (bufp) == -2)
   3351       return -2;
   3352 
   3353   /* See whether the pattern is anchored.  */
   3354   if (bufp->buffer[0] == begline)
   3355     anchored_start = 1;
   3356 
   3357   /* Loop through the string, looking for a place to start matching.  */
   3358   for (;;)
   3359     {
   3360       /* If the pattern is anchored,
   3361 	 skip quickly past places we cannot match.
   3362 	 We don't bother to treat startpos == 0 specially
   3363 	 because that case doesn't repeat.  */
   3364       if (anchored_start && startpos > 0)
   3365 	{
   3366 	  if (! (bufp->newline_anchor
   3367 		 && ((startpos <= size1 ? string1[startpos - 1]
   3368 		      : string2[startpos - size1 - 1])
   3369 		     == '\n')))
   3370 	    goto advance;
   3371 	}
   3372 
   3373       /* If a fastmap is supplied, skip quickly over characters that
   3374 	 cannot be the start of a match.  If the pattern can match the
   3375 	 null string, however, we don't need to skip characters; we want
   3376 	 the first null string.	 */
   3377       if (fastmap && startpos < total_size && !bufp->can_be_null)
   3378 	{
   3379 	  if (range > 0)	/* Searching forwards.	*/
   3380 	    {
   3381 	      register const char *d;
   3382 	      register int lim = 0;
   3383 	      int irange = range;
   3384 
   3385 	      if (startpos < size1 && startpos + range >= size1)
   3386 		lim = range - (size1 - startpos);
   3387 
   3388 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
   3389 
   3390 	      /* Written out as an if-else to avoid testing `translate'
   3391 		 inside the loop.  */
   3392 	      if (translate)
   3393 		while (range > lim
   3394 		       && !fastmap[(unsigned char)
   3395 				   translate[(unsigned char) *d++]])
   3396 		  range--;
   3397 	      else
   3398 		while (range > lim && !fastmap[(unsigned char) *d++])
   3399 		  range--;
   3400 
   3401 	      startpos += irange - range;
   3402 	    }
   3403 	  else				/* Searching backwards.	 */
   3404 	    {
   3405 	      register char c = (size1 == 0 || startpos >= size1
   3406 				 ? string2[startpos - size1]
   3407 				 : string1[startpos]);
   3408 
   3409 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
   3410 		goto advance;
   3411 	    }
   3412 	}
   3413 
   3414       /* If can't match the null string, and that's all we have left, fail.  */
   3415       if (range >= 0 && startpos == total_size && fastmap
   3416 	  && !bufp->can_be_null)
   3417 	return -1;
   3418 
   3419       val = re_match_2_internal (bufp, string1, size1, string2, size2,
   3420 				 startpos, regs, stop);
   3421 #ifndef REGEX_MALLOC
   3422 #ifdef C_ALLOCA
   3423       alloca (0);
   3424 #endif
   3425 #endif
   3426 
   3427       if (val >= 0)
   3428 	return startpos;
   3429 
   3430       if (val == -2)
   3431 	return -2;
   3432 
   3433     advance:
   3434       if (!range)
   3435 	break;
   3436       else if (range > 0)
   3437 	{
   3438 	  range--;
   3439 	  startpos++;
   3440 	}
   3441       else
   3442 	{
   3443 	  range++;
   3444 	  startpos--;
   3445 	}
   3446     }
   3447   return -1;
   3448 } /* re_search_2 */
   3449 
   3450 /* Declarations and macros for re_match_2.  */
   3452 
   3453 static int bcmp_translate ();
   3454 static boolean alt_match_null_string_p (),
   3455 	       common_op_match_null_string_p (),
   3456 	       group_match_null_string_p ();
   3457 
   3458 /* This converts PTR, a pointer into one of the search strings `string1'
   3459    and `string2' into an offset from the beginning of that string.  */
   3460 #define POINTER_TO_OFFSET(ptr)			\
   3461   (FIRST_STRING_P (ptr)				\
   3462    ? ((regoff_t) ((ptr) - string1))		\
   3463    : ((regoff_t) ((ptr) - string2 + size1)))
   3464 
   3465 /* Macros for dealing with the split strings in re_match_2.  */
   3466 
   3467 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
   3468 
   3469 /* Call before fetching a character with *d.  This switches over to
   3470    string2 if necessary.  */
   3471 #define PREFETCH()							\
   3472   while (d == dend)							\
   3473     {									\
   3474       /* End of string2 => fail.  */					\
   3475       if (dend == end_match_2)						\
   3476 	goto fail;							\
   3477       /* End of string1 => advance to string2.	*/			\
   3478       d = string2;							\
   3479       dend = end_match_2;						\
   3480     }
   3481 
   3482 
   3483 /* Test if at very beginning or at very end of the virtual concatenation
   3484    of `string1' and `string2'.	If only one string, it's `string2'.  */
   3485 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
   3486 #define AT_STRINGS_END(d) ((d) == end2)
   3487 
   3488 
   3489 /* Test if D points to a character which is word-constituent.  We have
   3490    two special cases to check for: if past the end of string1, look at
   3491    the first character in string2; and if before the beginning of
   3492    string2, look at the last character in string1.  */
   3493 #define WORDCHAR_P(d)							\
   3494   (SYNTAX ((d) == end1 ? *string2					\
   3495 	   : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
   3496    == Sword)
   3497 
   3498 /* Disabled due to a compiler bug -- see comment at case wordbound */
   3499 #if 0
   3500 /* Test if the character before D and the one at D differ with respect
   3501    to being word-constituent.  */
   3502 #define AT_WORD_BOUNDARY(d)						\
   3503   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
   3504    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
   3505 #endif
   3506 
   3507 /* Free everything we malloc.  */
   3508 #ifdef MATCH_MAY_ALLOCATE
   3509 #define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
   3510 #define FREE_VARIABLES()						\
   3511   do {									\
   3512     REGEX_FREE_STACK (fail_stack.stack);				\
   3513     FREE_VAR (regstart);						\
   3514     FREE_VAR (regend);							\
   3515     FREE_VAR (old_regstart);						\
   3516     FREE_VAR (old_regend);						\
   3517     FREE_VAR (best_regstart);						\
   3518     FREE_VAR (best_regend);						\
   3519     FREE_VAR (reg_info);						\
   3520     FREE_VAR (reg_dummy);						\
   3521     FREE_VAR (reg_info_dummy);						\
   3522   } while (0)
   3523 #else
   3524 #define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
   3525 #endif /* not MATCH_MAY_ALLOCATE */
   3526 
   3527 /* These values must meet several constraints.	They must not be valid
   3528    register values; since we have a limit of 255 registers (because
   3529    we use only one byte in the pattern for the register number), we can
   3530    use numbers larger than 255.	 They must differ by 1, because of
   3531    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
   3532    be larger than the value for the highest register, so we do not try
   3533    to actually save any registers when none are active.	 */
   3534 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
   3535 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
   3536 
   3537 /* Matching routines.  */
   3539 
   3540 #ifndef emacs	/* Emacs never uses this.  */
   3541 /* re_match is like re_match_2 except it takes only a single string.  */
   3542 
   3543 int
   3544 re_match (bufp, string, size, pos, regs)
   3545      struct re_pattern_buffer *bufp;
   3546      const char *string;
   3547      int size, pos;
   3548      struct re_registers *regs;
   3549 {
   3550   int result = re_match_2_internal (bufp, NULL, 0, string, size,
   3551 				    pos, regs, size);
   3552   alloca (0);
   3553   return result;
   3554 }
   3555 #endif /* not emacs */
   3556 
   3557 
   3558 /* re_match_2 matches the compiled pattern in BUFP against the
   3559    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
   3560    and SIZE2, respectively).  We start matching at POS, and stop
   3561    matching at STOP.
   3562 
   3563    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
   3564    store offsets for the substring each group matched in REGS.	See the
   3565    documentation for exactly how many groups we fill.
   3566 
   3567    We return -1 if no match, -2 if an internal error (such as the
   3568    failure stack overflowing).	Otherwise, we return the length of the
   3569    matched substring.  */
   3570 
   3571 int
   3572 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
   3573      struct re_pattern_buffer *bufp;
   3574      const char *string1, *string2;
   3575      int size1, size2;
   3576      int pos;
   3577      struct re_registers *regs;
   3578      int stop;
   3579 {
   3580   int result = re_match_2_internal (bufp, string1, size1, string2, size2,
   3581 				    pos, regs, stop);
   3582   alloca (0);
   3583   return result;
   3584 }
   3585 
   3586 /* This is a separate function so that we can force an alloca cleanup
   3587    afterwards.	*/
   3588 static int
   3589 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
   3590      struct re_pattern_buffer *bufp;
   3591      const char *string1, *string2;
   3592      int size1, size2;
   3593      int pos;
   3594      struct re_registers *regs;
   3595      int stop;
   3596 {
   3597   /* General temporaries.  */
   3598   int mcnt;
   3599   unsigned char *p1;
   3600 
   3601   /* Just past the end of the corresponding string.  */
   3602   const char *end1, *end2;
   3603 
   3604   /* Pointers into string1 and string2, just past the last characters in
   3605      each to consider matching.	 */
   3606   const char *end_match_1, *end_match_2;
   3607 
   3608   /* Where we are in the data, and the end of the current string.  */
   3609   const char *d, *dend;
   3610 
   3611   /* Where we are in the pattern, and the end of the pattern.  */
   3612   unsigned char *p = bufp->buffer;
   3613   register unsigned char *pend = p + bufp->used;
   3614 
   3615   /* Mark the opcode just after a start_memory, so we can test for an
   3616      empty subpattern when we get to the stop_memory.  */
   3617   unsigned char *just_past_start_mem = 0;
   3618 
   3619   /* We use this to map every character in the string.	*/
   3620   RE_TRANSLATE_TYPE translate = bufp->translate;
   3621 
   3622   /* Failure point stack.  Each place that can handle a failure further
   3623      down the line pushes a failure point on this stack.  It consists of
   3624      restart, regend, and reg_info for all registers corresponding to
   3625      the subexpressions we're currently inside, plus the number of such
   3626      registers, and, finally, two char *'s.  The first char * is where
   3627      to resume scanning the pattern; the second one is where to resume
   3628      scanning the strings.  If the latter is zero, the failure point is
   3629      a ``dummy''; if a failure happens and the failure point is a dummy,
   3630      it gets discarded and the next next one is tried.	*/
   3631 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.	 */
   3632   fail_stack_type fail_stack;
   3633 #endif
   3634 #ifdef DEBUG
   3635   static unsigned failure_id = 0;
   3636   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
   3637 #endif
   3638 
   3639   /* This holds the pointer to the failure stack, when
   3640      it is allocated relocatably.  */
   3641 #ifdef REL_ALLOC
   3642   fail_stack_elt_t *failure_stack_ptr;
   3643 #endif
   3644 
   3645   /* We fill all the registers internally, independent of what we
   3646      return, for use in backreferences.	 The number here includes
   3647      an element for register zero.  */
   3648   unsigned num_regs = bufp->re_nsub + 1;
   3649 
   3650   /* The currently active registers.  */
   3651   unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   3652   unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   3653 
   3654   /* Information on the contents of registers. These are pointers into
   3655      the input strings; they record just what was matched (on this
   3656      attempt) by a subexpression part of the pattern, that is, the
   3657      regnum-th regstart pointer points to where in the pattern we began
   3658      matching and the regnum-th regend points to right after where we
   3659      stopped matching the regnum-th subexpression.  (The zeroth register
   3660      keeps track of what the whole pattern matches.)  */
   3661 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   3662   const char **regstart, **regend;
   3663 #endif
   3664 
   3665   /* If a group that's operated upon by a repetition operator fails to
   3666      match anything, then the register for its start will need to be
   3667      restored because it will have been set to wherever in the string we
   3668      are when we last see its open-group operator.  Similarly for a
   3669      register's end.  */
   3670 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   3671   const char **old_regstart, **old_regend;
   3672 #endif
   3673 
   3674   /* The is_active field of reg_info helps us keep track of which (possibly
   3675      nested) subexpressions we are currently in. The matched_something
   3676      field of reg_info[reg_num] helps us tell whether or not we have
   3677      matched any of the pattern so far this time through the reg_num-th
   3678      subexpression.  These two fields get reset each time through any
   3679      loop their register is in.	 */
   3680 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.	 */
   3681   register_info_type *reg_info;
   3682 #endif
   3683 
   3684   /* The following record the register info as found in the above
   3685      variables when we find a match better than any we've seen before.
   3686      This happens as we backtrack through the failure points, which in
   3687      turn happens only if we have not yet matched the entire string. */
   3688   unsigned best_regs_set = false;
   3689 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   3690   const char **best_regstart, **best_regend;
   3691 #endif
   3692 
   3693   /* Logically, this is `best_regend[0]'.  But we don't want to have to
   3694      allocate space for that if we're not allocating space for anything
   3695      else (see below).	Also, we never need info about register 0 for
   3696      any of the other register vectors, and it seems rather a kludge to
   3697      treat `best_regend' differently than the rest.  So we keep track of
   3698      the end of the best match so far in a separate variable.  We
   3699      initialize this to NULL so that when we backtrack the first time
   3700      and need to test it, it's not garbage.  */
   3701   const char *match_end = NULL;
   3702 
   3703   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
   3704   int set_regs_matched_done = 0;
   3705 
   3706   /* Used when we pop values we don't care about.  */
   3707 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
   3708   const char **reg_dummy;
   3709   register_info_type *reg_info_dummy;
   3710 #endif
   3711 
   3712 #ifdef DEBUG
   3713   /* Counts the total number of registers pushed.  */
   3714   unsigned num_regs_pushed = 0;
   3715 #endif
   3716 
   3717   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
   3718 
   3719   INIT_FAIL_STACK ();
   3720 
   3721 #ifdef MATCH_MAY_ALLOCATE
   3722   /* Do not bother to initialize all the register variables if there are
   3723      no groups in the pattern, as it takes a fair amount of time.  If
   3724      there are groups, we include space for register 0 (the whole
   3725      pattern), even though we never use it, since it simplifies the
   3726      array indexing.  We should fix this.  */
   3727   if (bufp->re_nsub)
   3728     {
   3729       regstart = REGEX_TALLOC (num_regs, const char *);
   3730       regend = REGEX_TALLOC (num_regs, const char *);
   3731       old_regstart = REGEX_TALLOC (num_regs, const char *);
   3732       old_regend = REGEX_TALLOC (num_regs, const char *);
   3733       best_regstart = REGEX_TALLOC (num_regs, const char *);
   3734       best_regend = REGEX_TALLOC (num_regs, const char *);
   3735       reg_info = REGEX_TALLOC (num_regs, register_info_type);
   3736       reg_dummy = REGEX_TALLOC (num_regs, const char *);
   3737       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
   3738 
   3739       if (!(regstart && regend && old_regstart && old_regend && reg_info
   3740 	    && best_regstart && best_regend && reg_dummy && reg_info_dummy))
   3741 	{
   3742 	  FREE_VARIABLES ();
   3743 	  return -2;
   3744 	}
   3745     }
   3746   else
   3747     {
   3748       /* We must initialize all our variables to NULL, so that
   3749 	 `FREE_VARIABLES' doesn't try to free them.  */
   3750       regstart = regend = old_regstart = old_regend = best_regstart
   3751 	= best_regend = reg_dummy = NULL;
   3752       reg_info = reg_info_dummy = (register_info_type *) NULL;
   3753     }
   3754 #endif /* MATCH_MAY_ALLOCATE */
   3755 
   3756   /* The starting position is bogus.  */
   3757   if (pos < 0 || pos > size1 + size2)
   3758     {
   3759       FREE_VARIABLES ();
   3760       return -1;
   3761     }
   3762 
   3763   /* Initialize subexpression text positions to -1 to mark ones that no
   3764      start_memory/stop_memory has been seen for. Also initialize the
   3765      register information struct.  */
   3766   for (mcnt = 1; mcnt < num_regs; mcnt++)
   3767     {
   3768       regstart[mcnt] = regend[mcnt]
   3769 	= old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
   3770 
   3771       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
   3772       IS_ACTIVE (reg_info[mcnt]) = 0;
   3773       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
   3774       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
   3775     }
   3776 
   3777   /* We move `string1' into `string2' if the latter's empty -- but not if
   3778      `string1' is null.	 */
   3779   if (size2 == 0 && string1 != NULL)
   3780     {
   3781       string2 = string1;
   3782       size2 = size1;
   3783       string1 = 0;
   3784       size1 = 0;
   3785     }
   3786   end1 = string1 + size1;
   3787   end2 = string2 + size2;
   3788 
   3789   /* Compute where to stop matching, within the two strings.  */
   3790   if (stop <= size1)
   3791     {
   3792       end_match_1 = string1 + stop;
   3793       end_match_2 = string2;
   3794     }
   3795   else
   3796     {
   3797       end_match_1 = end1;
   3798       end_match_2 = string2 + stop - size1;
   3799     }
   3800 
   3801   /* `p' scans through the pattern as `d' scans through the data.
   3802      `dend' is the end of the input string that `d' points within.  `d'
   3803      is advanced into the following input string whenever necessary, but
   3804      this happens before fetching; therefore, at the beginning of the
   3805      loop, `d' can be pointing at the end of a string, but it cannot
   3806      equal `string2'.  */
   3807   if (size1 > 0 && pos <= size1)
   3808     {
   3809       d = string1 + pos;
   3810       dend = end_match_1;
   3811     }
   3812   else
   3813     {
   3814       d = string2 + pos - size1;
   3815       dend = end_match_2;
   3816     }
   3817 
   3818   DEBUG_PRINT1 ("The compiled pattern is: ");
   3819   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
   3820   DEBUG_PRINT1 ("The string to match is: `");
   3821   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
   3822   DEBUG_PRINT1 ("'\n");
   3823 
   3824   /* This loops over pattern commands.	It exits by returning from the
   3825      function if the match is complete, or it drops through if the match
   3826      fails at this starting point in the input data.  */
   3827   for (;;)
   3828     {
   3829       DEBUG_PRINT2 ("\n0x%x: ", p);
   3830 
   3831       if (p == pend)
   3832 	{ /* End of pattern means we might have succeeded.  */
   3833 	  DEBUG_PRINT1 ("end of pattern ... ");
   3834 
   3835 	  /* If we haven't matched the entire string, and we want the
   3836 	     longest match, try backtracking.  */
   3837 	  if (d != end_match_2)
   3838 	    {
   3839 	      /* 1 if this match ends in the same string (string1 or string2)
   3840 		 as the best previous match.  */
   3841 	      boolean same_str_p = (FIRST_STRING_P (match_end)
   3842 				    == MATCHING_IN_FIRST_STRING);
   3843 	      /* 1 if this match is the best seen so far.  */
   3844 	      boolean best_match_p;
   3845 
   3846 	      /* AIX compiler got confused when this was combined
   3847 		 with the previous declaration.	 */
   3848 	      if (same_str_p)
   3849 		best_match_p = d > match_end;
   3850 	      else
   3851 		best_match_p = !MATCHING_IN_FIRST_STRING;
   3852 
   3853 	      DEBUG_PRINT1 ("backtracking.\n");
   3854 
   3855 	      if (!FAIL_STACK_EMPTY ())
   3856 		{ /* More failure points to try.  */
   3857 
   3858 		  /* If exceeds best match so far, save it.  */
   3859 		  if (!best_regs_set || best_match_p)
   3860 		    {
   3861 		      best_regs_set = true;
   3862 		      match_end = d;
   3863 
   3864 		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
   3865 
   3866 		      for (mcnt = 1; mcnt < num_regs; mcnt++)
   3867 			{
   3868 			  best_regstart[mcnt] = regstart[mcnt];
   3869 			  best_regend[mcnt] = regend[mcnt];
   3870 			}
   3871 		    }
   3872 		  goto fail;
   3873 		}
   3874 
   3875 	      /* If no failure points, don't restore garbage.  And if
   3876 		 last match is real best match, don't restore second
   3877 		 best one. */
   3878 	      else if (best_regs_set && !best_match_p)
   3879 		{
   3880 		restore_best_regs:
   3881 		  /* Restore best match.  It may happen that `dend ==
   3882 		     end_match_1' while the restored d is in string2.
   3883 		     For example, the pattern `x.*y.*z' against the
   3884 		     strings `x-' and `y-z-', if the two strings are
   3885 		     not consecutive in memory.	 */
   3886 		  DEBUG_PRINT1 ("Restoring best registers.\n");
   3887 
   3888 		  d = match_end;
   3889 		  dend = ((d >= string1 && d <= end1)
   3890 			   ? end_match_1 : end_match_2);
   3891 
   3892 		  for (mcnt = 1; mcnt < num_regs; mcnt++)
   3893 		    {
   3894 		      regstart[mcnt] = best_regstart[mcnt];
   3895 		      regend[mcnt] = best_regend[mcnt];
   3896 		    }
   3897 		}
   3898 	    } /* d != end_match_2 */
   3899 
   3900 	succeed_label:
   3901 	  DEBUG_PRINT1 ("Accepting match.\n");
   3902 
   3903 	  /* If caller wants register contents data back, do it.  */
   3904 	  if (regs && !bufp->no_sub)
   3905 	    {
   3906 	      /* Have the register data arrays been allocated?	*/
   3907 	      if (bufp->regs_allocated == REGS_UNALLOCATED)
   3908 		{ /* No.  So allocate them with malloc.	 We need one
   3909 		     extra element beyond `num_regs' for the `-1' marker
   3910 		     GNU code uses.  */
   3911 		  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
   3912 		  regs->start = TALLOC (regs->num_regs, regoff_t);
   3913 		  regs->end = TALLOC (regs->num_regs, regoff_t);
   3914 		  if (regs->start == NULL || regs->end == NULL)
   3915 		    {
   3916 		      FREE_VARIABLES ();
   3917 		      return -2;
   3918 		    }
   3919 		  bufp->regs_allocated = REGS_REALLOCATE;
   3920 		}
   3921 	      else if (bufp->regs_allocated == REGS_REALLOCATE)
   3922 		{ /* Yes.  If we need more elements than were already
   3923 		     allocated, reallocate them.  If we need fewer, just
   3924 		     leave it alone.  */
   3925 		  if (regs->num_regs < num_regs + 1)
   3926 		    {
   3927 		      regs->num_regs = num_regs + 1;
   3928 		      RETALLOC (regs->start, regs->num_regs, regoff_t);
   3929 		      RETALLOC (regs->end, regs->num_regs, regoff_t);
   3930 		      if (regs->start == NULL || regs->end == NULL)
   3931 			{
   3932 			  FREE_VARIABLES ();
   3933 			  return -2;
   3934 			}
   3935 		    }
   3936 		}
   3937 	      else
   3938 		{
   3939 		  /* These braces fend off a "empty body in an else-statement"
   3940 		     warning under GCC when assert expands to nothing.	*/
   3941 		  assert (bufp->regs_allocated == REGS_FIXED);
   3942 		}
   3943 
   3944 	      /* Convert the pointer data in `regstart' and `regend' to
   3945 		 indices.  Register zero has to be set differently,
   3946 		 since we haven't kept track of any info for it.  */
   3947 	      if (regs->num_regs > 0)
   3948 		{
   3949 		  regs->start[0] = pos;
   3950 		  regs->end[0] = (MATCHING_IN_FIRST_STRING
   3951 				  ? ((regoff_t) (d - string1))
   3952 				  : ((regoff_t) (d - string2 + size1)));
   3953 		}
   3954 
   3955 	      /* Go through the first `min (num_regs, regs->num_regs)'
   3956 		 registers, since that is all we initialized.  */
   3957 	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
   3958 		{
   3959 		  if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
   3960 		    regs->start[mcnt] = regs->end[mcnt] = -1;
   3961 		  else
   3962 		    {
   3963 		      regs->start[mcnt]
   3964 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
   3965 		      regs->end[mcnt]
   3966 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
   3967 		    }
   3968 		}
   3969 
   3970 	      /* If the regs structure we return has more elements than
   3971 		 were in the pattern, set the extra elements to -1.  If
   3972 		 we (re)allocated the registers, this is the case,
   3973 		 because we always allocate enough to have at least one
   3974 		 -1 at the end.	 */
   3975 	      for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
   3976 		regs->start[mcnt] = regs->end[mcnt] = -1;
   3977 	    } /* regs && !bufp->no_sub */
   3978 
   3979 	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
   3980 			nfailure_points_pushed, nfailure_points_popped,
   3981 			nfailure_points_pushed - nfailure_points_popped);
   3982 	  DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
   3983 
   3984 	  mcnt = d - pos - (MATCHING_IN_FIRST_STRING
   3985 			    ? string1
   3986 			    : string2 - size1);
   3987 
   3988 	  DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
   3989 
   3990 	  FREE_VARIABLES ();
   3991 	  return mcnt;
   3992 	}
   3993 
   3994       /* Otherwise match next pattern command.	*/
   3995       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
   3996 	{
   3997 	/* Ignore these.  Used to ignore the n of succeed_n's which
   3998 	   currently have n == 0.  */
   3999 	case no_op:
   4000 	  DEBUG_PRINT1 ("EXECUTING no_op.\n");
   4001 	  break;
   4002 
   4003 	case succeed:
   4004 	  DEBUG_PRINT1 ("EXECUTING succeed.\n");
   4005 	  goto succeed_label;
   4006 
   4007 	/* Match the next n pattern characters exactly.	 The following
   4008 	   byte in the pattern defines n, and the n bytes after that
   4009 	   are the characters to match.	 */
   4010 	case exactn:
   4011 	  mcnt = *p++;
   4012 	  DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
   4013 
   4014 	  /* This is written out as an if-else so we don't waste time
   4015 	     testing `translate' inside the loop.  */
   4016 	  if (translate)
   4017 	    {
   4018 	      do
   4019 		{
   4020 		  PREFETCH ();
   4021 		  if ((unsigned char) translate[(unsigned char) *d++]
   4022 		      != (unsigned char) *p++)
   4023 		    goto fail;
   4024 		}
   4025 	      while (--mcnt);
   4026 	    }
   4027 	  else
   4028 	    {
   4029 	      do
   4030 		{
   4031 		  PREFETCH ();
   4032 		  if (*d++ != (char) *p++) goto fail;
   4033 		}
   4034 	      while (--mcnt);
   4035 	    }
   4036 	  SET_REGS_MATCHED ();
   4037 	  break;
   4038 
   4039 
   4040 	/* Match any character except possibly a newline or a null.  */
   4041 	case anychar:
   4042 	  DEBUG_PRINT1 ("EXECUTING anychar.\n");
   4043 
   4044 	  PREFETCH ();
   4045 
   4046 	  if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
   4047 	      || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
   4048 	    goto fail;
   4049 
   4050 	  SET_REGS_MATCHED ();
   4051 	  DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
   4052 	  d++;
   4053 	  break;
   4054 
   4055 
   4056 	case charset:
   4057 	case charset_not:
   4058 	  {
   4059 	    register unsigned char c;
   4060 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
   4061 
   4062 	    DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
   4063 
   4064 	    PREFETCH ();
   4065 	    c = TRANSLATE (*d); /* The character to match.  */
   4066 
   4067 	    /* Cast to `unsigned' instead of `unsigned char' in case the
   4068 	       bit list is a full 32 bytes long.  */
   4069 	    if (c < (unsigned) (*p * BYTEWIDTH)
   4070 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
   4071 	      not = !not;
   4072 
   4073 	    p += 1 + *p;
   4074 
   4075 	    if (!not) goto fail;
   4076 
   4077 	    SET_REGS_MATCHED ();
   4078 	    d++;
   4079 	    break;
   4080 	  }
   4081 
   4082 
   4083 	/* The beginning of a group is represented by start_memory.
   4084 	   The arguments are the register number in the next byte, and the
   4085 	   number of groups inner to this one in the next.  The text
   4086 	   matched within the group is recorded (in the internal
   4087 	   registers data structure) under the register number.	 */
   4088 	case start_memory:
   4089 	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
   4090 
   4091 	  /* Find out if this group can match the empty string.	 */
   4092 	  p1 = p;		/* To send to group_match_null_string_p.  */
   4093 
   4094 	  if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
   4095 	    REG_MATCH_NULL_STRING_P (reg_info[*p])
   4096 	      = group_match_null_string_p (&p1, pend, reg_info);
   4097 
   4098 	  /* Save the position in the string where we were the last time
   4099 	     we were at this open-group operator in case the group is
   4100 	     operated upon by a repetition operator, e.g., with `(a*)*b'
   4101 	     against `ab'; then we want to ignore where we are now in
   4102 	     the string in case this attempt to match fails.  */
   4103 	  old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
   4104 			     ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
   4105 			     : regstart[*p];
   4106 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
   4107 			 POINTER_TO_OFFSET (old_regstart[*p]));
   4108 
   4109 	  regstart[*p] = d;
   4110 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
   4111 
   4112 	  IS_ACTIVE (reg_info[*p]) = 1;
   4113 	  MATCHED_SOMETHING (reg_info[*p]) = 0;
   4114 
   4115 	  /* Clear this whenever we change the register activity status.  */
   4116 	  set_regs_matched_done = 0;
   4117 
   4118 	  /* This is the new highest active register.  */
   4119 	  highest_active_reg = *p;
   4120 
   4121 	  /* If nothing was active before, this is the new lowest active
   4122 	     register.	*/
   4123 	  if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
   4124 	    lowest_active_reg = *p;
   4125 
   4126 	  /* Move past the register number and inner group count.  */
   4127 	  p += 2;
   4128 	  just_past_start_mem = p;
   4129 
   4130 	  break;
   4131 
   4132 
   4133 	/* The stop_memory opcode represents the end of a group.  Its
   4134 	   arguments are the same as start_memory's: the register
   4135 	   number, and the number of inner groups.  */
   4136 	case stop_memory:
   4137 	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
   4138 
   4139 	  /* We need to save the string position the last time we were at
   4140 	     this close-group operator in case the group is operated
   4141 	     upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
   4142 	     against `aba'; then we want to ignore where we are now in
   4143 	     the string in case this attempt to match fails.  */
   4144 	  old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
   4145 			   ? REG_UNSET (regend[*p]) ? d : regend[*p]
   4146 			   : regend[*p];
   4147 	  DEBUG_PRINT2 ("      old_regend: %d\n",
   4148 			 POINTER_TO_OFFSET (old_regend[*p]));
   4149 
   4150 	  regend[*p] = d;
   4151 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
   4152 
   4153 	  /* This register isn't active anymore.  */
   4154 	  IS_ACTIVE (reg_info[*p]) = 0;
   4155 
   4156 	  /* Clear this whenever we change the register activity status.  */
   4157 	  set_regs_matched_done = 0;
   4158 
   4159 	  /* If this was the only register active, nothing is active
   4160 	     anymore.  */
   4161 	  if (lowest_active_reg == highest_active_reg)
   4162 	    {
   4163 	      lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   4164 	      highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   4165 	    }
   4166 	  else
   4167 	    { /* We must scan for the new highest active register, since
   4168 		 it isn't necessarily one less than now: consider
   4169 		 (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
   4170 		 new highest active register is 1.  */
   4171 	      unsigned char r = *p - 1;
   4172 	      while (r > 0 && !IS_ACTIVE (reg_info[r]))
   4173 		r--;
   4174 
   4175 	      /* If we end up at register zero, that means that we saved
   4176 		 the registers as the result of an `on_failure_jump', not
   4177 		 a `start_memory', and we jumped to past the innermost
   4178 		 `stop_memory'.	 For example, in ((.)*) we save
   4179 		 registers 1 and 2 as a result of the *, but when we pop
   4180 		 back to the second ), we are at the stop_memory 1.
   4181 		 Thus, nothing is active.  */
   4182 	      if (r == 0)
   4183 		{
   4184 		  lowest_active_reg = NO_LOWEST_ACTIVE_REG;
   4185 		  highest_active_reg = NO_HIGHEST_ACTIVE_REG;
   4186 		}
   4187 	      else
   4188 		highest_active_reg = r;
   4189 	    }
   4190 
   4191 	  /* If just failed to match something this time around with a
   4192 	     group that's operated on by a repetition operator, try to
   4193 	     force exit from the ``loop'', and restore the register
   4194 	     information for this group that we had before trying this
   4195 	     last match.  */
   4196 	  if ((!MATCHED_SOMETHING (reg_info[*p])
   4197 	       || just_past_start_mem == p - 1)
   4198 	      && (p + 2) < pend)
   4199 	    {
   4200 	      boolean is_a_jump_n = false;
   4201 
   4202 	      p1 = p + 2;
   4203 	      mcnt = 0;
   4204 	      switch ((re_opcode_t) *p1++)
   4205 		{
   4206 		  case jump_n:
   4207 		    is_a_jump_n = true;
   4208 		  case pop_failure_jump:
   4209 		  case maybe_pop_jump:
   4210 		  case jump:
   4211 		  case dummy_failure_jump:
   4212 		    EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   4213 		    if (is_a_jump_n)
   4214 		      p1 += 2;
   4215 		    break;
   4216 
   4217 		  default:
   4218 		    /* do nothing */ ;
   4219 		}
   4220 	      p1 += mcnt;
   4221 
   4222 	      /* If the next operation is a jump backwards in the pattern
   4223 		 to an on_failure_jump right before the start_memory
   4224 		 corresponding to this stop_memory, exit from the loop
   4225 		 by forcing a failure after pushing on the stack the
   4226 		 on_failure_jump's jump in the pattern, and d.	*/
   4227 	      if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
   4228 		  && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
   4229 		{
   4230 		  /* If this group ever matched anything, then restore
   4231 		     what its registers were before trying this last
   4232 		     failed match, e.g., with `(a*)*b' against `ab' for
   4233 		     regstart[1], and, e.g., with `((a*)*(b*)*)*'
   4234 		     against `aba' for regend[3].
   4235 
   4236 		     Also restore the registers for inner groups for,
   4237 		     e.g., `((a*)(b*))*' against `aba' (register 3 would
   4238 		     otherwise get trashed).  */
   4239 
   4240 		  if (EVER_MATCHED_SOMETHING (reg_info[*p]))
   4241 		    {
   4242 		      unsigned r;
   4243 
   4244 		      EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
   4245 
   4246 		      /* Restore this and inner groups' (if any) registers.  */
   4247 		      for (r = *p; r < *p + *(p + 1); r++)
   4248 			{
   4249 			  regstart[r] = old_regstart[r];
   4250 
   4251 			  /* xx why this test?	*/
   4252 			  if (old_regend[r] >= regstart[r])
   4253 			    regend[r] = old_regend[r];
   4254 			}
   4255 		    }
   4256 		  p1++;
   4257 		  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   4258 		  PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
   4259 
   4260 		  goto fail;
   4261 		}
   4262 	    }
   4263 
   4264 	  /* Move past the register number and the inner group count.  */
   4265 	  p += 2;
   4266 	  break;
   4267 
   4268 
   4269 	/* \<digit> has been turned into a `duplicate' command which is
   4270 	   followed by the numeric value of <digit> as the register number.  */
   4271 	case duplicate:
   4272 	  {
   4273 	    register const char *d2, *dend2;
   4274 	    int regno = *p++;	/* Get which register to match against.	 */
   4275 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
   4276 
   4277 	    /* Can't back reference a group which we've never matched.	*/
   4278 	    if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
   4279 	      goto fail;
   4280 
   4281 	    /* Where in input to try to start matching.	 */
   4282 	    d2 = regstart[regno];
   4283 
   4284 	    /* Where to stop matching; if both the place to start and
   4285 	       the place to stop matching are in the same string, then
   4286 	       set to the place to stop, otherwise, for now have to use
   4287 	       the end of the first string.  */
   4288 
   4289 	    dend2 = ((FIRST_STRING_P (regstart[regno])
   4290 		      == FIRST_STRING_P (regend[regno]))
   4291 		     ? regend[regno] : end_match_1);
   4292 	    for (;;)
   4293 	      {
   4294 		/* If necessary, advance to next segment in register
   4295 		   contents.  */
   4296 		while (d2 == dend2)
   4297 		  {
   4298 		    if (dend2 == end_match_2) break;
   4299 		    if (dend2 == regend[regno]) break;
   4300 
   4301 		    /* End of string1 => advance to string2. */
   4302 		    d2 = string2;
   4303 		    dend2 = regend[regno];
   4304 		  }
   4305 		/* At end of register contents => success */
   4306 		if (d2 == dend2) break;
   4307 
   4308 		/* If necessary, advance to next segment in data.  */
   4309 		PREFETCH ();
   4310 
   4311 		/* How many characters left in this segment to match.  */
   4312 		mcnt = dend - d;
   4313 
   4314 		/* Want how many consecutive characters we can match in
   4315 		   one shot, so, if necessary, adjust the count.  */
   4316 		if (mcnt > dend2 - d2)
   4317 		  mcnt = dend2 - d2;
   4318 
   4319 		/* Compare that many; failure if mismatch, else move
   4320 		   past them.  */
   4321 		if (translate
   4322 		    ? bcmp_translate (d, d2, mcnt, translate)
   4323 		    : bcmp (d, d2, mcnt))
   4324 		  goto fail;
   4325 		d += mcnt, d2 += mcnt;
   4326 
   4327 		/* Do this because we've match some characters.	 */
   4328 		SET_REGS_MATCHED ();
   4329 	      }
   4330 	  }
   4331 	  break;
   4332 
   4333 
   4334 	/* begline matches the empty string at the beginning of the string
   4335 	   (unless `not_bol' is set in `bufp'), and, if
   4336 	   `newline_anchor' is set, after newlines.  */
   4337 	case begline:
   4338 	  DEBUG_PRINT1 ("EXECUTING begline.\n");
   4339 
   4340 	  if (AT_STRINGS_BEG (d))
   4341 	    {
   4342 	      if (!bufp->not_bol) break;
   4343 	    }
   4344 	  else if (d[-1] == '\n' && bufp->newline_anchor)
   4345 	    {
   4346 	      break;
   4347 	    }
   4348 	  /* In all other cases, we fail.  */
   4349 	  goto fail;
   4350 
   4351 
   4352 	/* endline is the dual of begline.  */
   4353 	case endline:
   4354 	  DEBUG_PRINT1 ("EXECUTING endline.\n");
   4355 
   4356 	  if (AT_STRINGS_END (d))
   4357 	    {
   4358 	      if (!bufp->not_eol) break;
   4359 	    }
   4360 
   4361 	  /* We have to ``prefetch'' the next character.  */
   4362 	  else if ((d == end1 ? *string2 : *d) == '\n'
   4363 		   && bufp->newline_anchor)
   4364 	    {
   4365 	      break;
   4366 	    }
   4367 	  goto fail;
   4368 
   4369 
   4370 	/* Match at the very beginning of the data.  */
   4371 	case begbuf:
   4372 	  DEBUG_PRINT1 ("EXECUTING begbuf.\n");
   4373 	  if (AT_STRINGS_BEG (d))
   4374 	    break;
   4375 	  goto fail;
   4376 
   4377 
   4378 	/* Match at the very end of the data.  */
   4379 	case endbuf:
   4380 	  DEBUG_PRINT1 ("EXECUTING endbuf.\n");
   4381 	  if (AT_STRINGS_END (d))
   4382 	    break;
   4383 	  goto fail;
   4384 
   4385 
   4386 	/* on_failure_keep_string_jump is used to optimize `.*\n'.  It
   4387 	   pushes NULL as the value for the string on the stack.  Then
   4388 	   `pop_failure_point' will keep the current value for the
   4389 	   string, instead of restoring it.  To see why, consider
   4390 	   matching `foo\nbar' against `.*\n'.	The .* matches the foo;
   4391 	   then the . fails against the \n.  But the next thing we want
   4392 	   to do is match the \n against the \n; if we restored the
   4393 	   string value, we would be back at the foo.
   4394 
   4395 	   Because this is used only in specific cases, we don't need to
   4396 	   check all the things that `on_failure_jump' does, to make
   4397 	   sure the right things get saved on the stack.  Hence we don't
   4398 	   share its code.  The only reason to push anything on the
   4399 	   stack at all is that otherwise we would have to change
   4400 	   `anychar's code to do something besides goto fail in this
   4401 	   case; that seems worse than this.  */
   4402 	case on_failure_keep_string_jump:
   4403 	  DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
   4404 
   4405 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
   4406 	  DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
   4407 
   4408 	  PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
   4409 	  break;
   4410 
   4411 
   4412 	/* Uses of on_failure_jump:
   4413 
   4414 	   Each alternative starts with an on_failure_jump that points
   4415 	   to the beginning of the next alternative.  Each alternative
   4416 	   except the last ends with a jump that in effect jumps past
   4417 	   the rest of the alternatives.  (They really jump to the
   4418 	   ending jump of the following alternative, because tensioning
   4419 	   these jumps is a hassle.)
   4420 
   4421 	   Repeats start with an on_failure_jump that points past both
   4422 	   the repetition text and either the following jump or
   4423 	   pop_failure_jump back to this on_failure_jump.  */
   4424 	case on_failure_jump:
   4425 	on_failure:
   4426 	  DEBUG_PRINT1 ("EXECUTING on_failure_jump");
   4427 
   4428 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
   4429 	  DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
   4430 
   4431 	  /* If this on_failure_jump comes right before a group (i.e.,
   4432 	     the original * applied to a group), save the information
   4433 	     for that group and all inner ones, so that if we fail back
   4434 	     to this point, the group's information will be correct.
   4435 	     For example, in \(a*\)*\1, we need the preceding group,
   4436 	     and in \(zz\(a*\)b*\)\2, we need the inner group.	*/
   4437 
   4438 	  /* We can't use `p' to check ahead because we push
   4439 	     a failure point to `p + mcnt' after we do this.  */
   4440 	  p1 = p;
   4441 
   4442 	  /* We need to skip no_op's before we look for the
   4443 	     start_memory in case this on_failure_jump is happening as
   4444 	     the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
   4445 	     against aba.  */
   4446 	  while (p1 < pend && (re_opcode_t) *p1 == no_op)
   4447 	    p1++;
   4448 
   4449 	  if (p1 < pend && (re_opcode_t) *p1 == start_memory)
   4450 	    {
   4451 	      /* We have a new highest active register now.  This will
   4452 		 get reset at the start_memory we are about to get to,
   4453 		 but we will have saved all the registers relevant to
   4454 		 this repetition op, as described above.  */
   4455 	      highest_active_reg = *(p1 + 1) + *(p1 + 2);
   4456 	      if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
   4457 		lowest_active_reg = *(p1 + 1);
   4458 	    }
   4459 
   4460 	  DEBUG_PRINT1 (":\n");
   4461 	  PUSH_FAILURE_POINT (p + mcnt, d, -2);
   4462 	  break;
   4463 
   4464 
   4465 	/* A smart repeat ends with `maybe_pop_jump'.
   4466 	   We change it to either `pop_failure_jump' or `jump'.	 */
   4467 	case maybe_pop_jump:
   4468 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
   4469 	  DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
   4470 	  {
   4471 	    register unsigned char *p2 = p;
   4472 
   4473 	    /* Compare the beginning of the repeat with what in the
   4474 	       pattern follows its end. If we can establish that there
   4475 	       is nothing that they would both match, i.e., that we
   4476 	       would have to backtrack because of (as in, e.g., `a*a')
   4477 	       then we can change to pop_failure_jump, because we'll
   4478 	       never have to backtrack.
   4479 
   4480 	       This is not true in the case of alternatives: in
   4481 	       `(a|ab)*' we do need to backtrack to the `ab' alternative
   4482 	       (e.g., if the string was `ab').	But instead of trying to
   4483 	       detect that here, the alternative has put on a dummy
   4484 	       failure point which is what we will end up popping.  */
   4485 
   4486 	    /* Skip over open/close-group commands.
   4487 	       If what follows this loop is a ...+ construct,
   4488 	       look at what begins its body, since we will have to
   4489 	       match at least one of that.  */
   4490 	    while (1)
   4491 	      {
   4492 		if (p2 + 2 < pend
   4493 		    && ((re_opcode_t) *p2 == stop_memory
   4494 			|| (re_opcode_t) *p2 == start_memory))
   4495 		  p2 += 3;
   4496 		else if (p2 + 6 < pend
   4497 			 && (re_opcode_t) *p2 == dummy_failure_jump)
   4498 		  p2 += 6;
   4499 		else
   4500 		  break;
   4501 	      }
   4502 
   4503 	    p1 = p + mcnt;
   4504 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
   4505 	       to the `maybe_finalize_jump' of this case.  Examine what
   4506 	       follows.	 */
   4507 
   4508 	    /* If we're at the end of the pattern, we can change.  */
   4509 	    if (p2 == pend)
   4510 	      {
   4511 		/* Consider what happens when matching ":\(.*\)"
   4512 		   against ":/".  I don't really understand this code
   4513 		   yet.	 */
   4514 		p[-3] = (unsigned char) pop_failure_jump;
   4515 		DEBUG_PRINT1
   4516 		  ("  End of pattern: change to `pop_failure_jump'.\n");
   4517 	      }
   4518 
   4519 	    else if ((re_opcode_t) *p2 == exactn
   4520 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
   4521 	      {
   4522 		register unsigned char c
   4523 		  = *p2 == (unsigned char) endline ? '\n' : p2[2];
   4524 
   4525 		if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
   4526 		  {
   4527 		    p[-3] = (unsigned char) pop_failure_jump;
   4528 		    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
   4529 				  c, p1[5]);
   4530 		  }
   4531 
   4532 		else if ((re_opcode_t) p1[3] == charset
   4533 			 || (re_opcode_t) p1[3] == charset_not)
   4534 		  {
   4535 		    int not = (re_opcode_t) p1[3] == charset_not;
   4536 
   4537 		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
   4538 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
   4539 		      not = !not;
   4540 
   4541 		    /* `not' is equal to 1 if c would match, which means
   4542 			that we can't change to pop_failure_jump.  */
   4543 		    if (!not)
   4544 		      {
   4545 			p[-3] = (unsigned char) pop_failure_jump;
   4546 			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
   4547 		      }
   4548 		  }
   4549 	      }
   4550 	    else if ((re_opcode_t) *p2 == charset)
   4551 	      {
   4552 #ifdef DEBUG
   4553 		register unsigned char c
   4554 		  = *p2 == (unsigned char) endline ? '\n' : p2[2];
   4555 #endif
   4556 
   4557 		if ((re_opcode_t) p1[3] == exactn
   4558 		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
   4559 			  && (p2[2 + p1[5] / BYTEWIDTH]
   4560 			      & (1 << (p1[5] % BYTEWIDTH)))))
   4561 		  {
   4562 		    p[-3] = (unsigned char) pop_failure_jump;
   4563 		    DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
   4564 				  c, p1[5]);
   4565 		  }
   4566 
   4567 		else if ((re_opcode_t) p1[3] == charset_not)
   4568 		  {
   4569 		    int idx;
   4570 		    /* We win if the charset_not inside the loop
   4571 		       lists every character listed in the charset after.  */
   4572 		    for (idx = 0; idx < (int) p2[1]; idx++)
   4573 		      if (! (p2[2 + idx] == 0
   4574 			     || (idx < (int) p1[4]
   4575 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
   4576 			break;
   4577 
   4578 		    if (idx == p2[1])
   4579 		      {
   4580 			p[-3] = (unsigned char) pop_failure_jump;
   4581 			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
   4582 		      }
   4583 		  }
   4584 		else if ((re_opcode_t) p1[3] == charset)
   4585 		  {
   4586 		    int idx;
   4587 		    /* We win if the charset inside the loop
   4588 		       has no overlap with the one after the loop.  */
   4589 		    for (idx = 0;
   4590 			 idx < (int) p2[1] && idx < (int) p1[4];
   4591 			 idx++)
   4592 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
   4593 			break;
   4594 
   4595 		    if (idx == p2[1] || idx == p1[4])
   4596 		      {
   4597 			p[-3] = (unsigned char) pop_failure_jump;
   4598 			DEBUG_PRINT1 ("	 No match => pop_failure_jump.\n");
   4599 		      }
   4600 		  }
   4601 	      }
   4602 	  }
   4603 	  p -= 2;		/* Point at relative address again.  */
   4604 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
   4605 	    {
   4606 	      p[-1] = (unsigned char) jump;
   4607 	      DEBUG_PRINT1 ("  Match => jump.\n");
   4608 	      goto unconditional_jump;
   4609 	    }
   4610 	/* Note fall through.  */
   4611 
   4612 
   4613 	/* The end of a simple repeat has a pop_failure_jump back to
   4614 	   its matching on_failure_jump, where the latter will push a
   4615 	   failure point.  The pop_failure_jump takes off failure
   4616 	   points put on by this pop_failure_jump's matching
   4617 	   on_failure_jump; we got through the pattern to here from the
   4618 	   matching on_failure_jump, so didn't fail.  */
   4619 	case pop_failure_jump:
   4620 	  {
   4621 	    /* We need to pass separate storage for the lowest and
   4622 	       highest registers, even though we don't care about the
   4623 	       actual values.  Otherwise, we will restore only one
   4624 	       register from the stack, since lowest will == highest in
   4625 	       `pop_failure_point'.  */
   4626 	    unsigned dummy_low_reg, dummy_high_reg;
   4627 	    unsigned char *pdummy;
   4628 	    const char *sdummy;
   4629 
   4630 	    DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
   4631 	    POP_FAILURE_POINT (sdummy, pdummy,
   4632 			       dummy_low_reg, dummy_high_reg,
   4633 			       reg_dummy, reg_dummy, reg_info_dummy);
   4634 	  }
   4635 	  /* Note fall through.	 */
   4636 
   4637 
   4638 	/* Unconditionally jump (without popping any failure points).  */
   4639 	case jump:
   4640 	unconditional_jump:
   4641 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
   4642 	  DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
   4643 	  p += mcnt;				/* Do the jump.	 */
   4644 	  DEBUG_PRINT2 ("(to 0x%x).\n", p);
   4645 	  break;
   4646 
   4647 
   4648 	/* We need this opcode so we can detect where alternatives end
   4649 	   in `group_match_null_string_p' et al.  */
   4650 	case jump_past_alt:
   4651 	  DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
   4652 	  goto unconditional_jump;
   4653 
   4654 
   4655 	/* Normally, the on_failure_jump pushes a failure point, which
   4656 	   then gets popped at pop_failure_jump.  We will end up at
   4657 	   pop_failure_jump, also, and with a pattern of, say, `a+', we
   4658 	   are skipping over the on_failure_jump, so we have to push
   4659 	   something meaningless for pop_failure_jump to pop.  */
   4660 	case dummy_failure_jump:
   4661 	  DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
   4662 	  /* It doesn't matter what we push for the string here.  What
   4663 	     the code at `fail' tests is the value for the pattern.  */
   4664 	  PUSH_FAILURE_POINT (0, 0, -2);
   4665 	  goto unconditional_jump;
   4666 
   4667 
   4668 	/* At the end of an alternative, we need to push a dummy failure
   4669 	   point in case we are followed by a `pop_failure_jump', because
   4670 	   we don't want the failure point for the alternative to be
   4671 	   popped.  For example, matching `(a|ab)*' against `aab'
   4672 	   requires that we match the `ab' alternative.	 */
   4673 	case push_dummy_failure:
   4674 	  DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
   4675 	  /* See comments just above at `dummy_failure_jump' about the
   4676 	     two zeroes.  */
   4677 	  PUSH_FAILURE_POINT (0, 0, -2);
   4678 	  break;
   4679 
   4680 	/* Have to succeed matching what follows at least n times.
   4681 	   After that, handle like `on_failure_jump'.  */
   4682 	case succeed_n:
   4683 	  EXTRACT_NUMBER (mcnt, p + 2);
   4684 	  DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
   4685 
   4686 	  assert (mcnt >= 0);
   4687 	  /* Originally, this is how many times we HAVE to succeed.  */
   4688 	  if (mcnt > 0)
   4689 	    {
   4690 	       mcnt--;
   4691 	       p += 2;
   4692 	       STORE_NUMBER_AND_INCR (p, mcnt);
   4693 	       DEBUG_PRINT3 ("	Setting 0x%x to %d.\n", p, mcnt);
   4694 	    }
   4695 	  else if (mcnt == 0)
   4696 	    {
   4697 	      DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
   4698 	      p[2] = (unsigned char) no_op;
   4699 	      p[3] = (unsigned char) no_op;
   4700 	      goto on_failure;
   4701 	    }
   4702 	  break;
   4703 
   4704 	case jump_n:
   4705 	  EXTRACT_NUMBER (mcnt, p + 2);
   4706 	  DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
   4707 
   4708 	  /* Originally, this is how many times we CAN jump.  */
   4709 	  if (mcnt)
   4710 	    {
   4711 	       mcnt--;
   4712 	       STORE_NUMBER (p + 2, mcnt);
   4713 	       goto unconditional_jump;
   4714 	    }
   4715 	  /* If don't have to jump any more, skip over the rest of command.  */
   4716 	  else
   4717 	    p += 4;
   4718 	  break;
   4719 
   4720 	case set_number_at:
   4721 	  {
   4722 	    DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
   4723 
   4724 	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
   4725 	    p1 = p + mcnt;
   4726 	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
   4727 	    DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
   4728 	    STORE_NUMBER (p1, mcnt);
   4729 	    break;
   4730 	  }
   4731 
   4732 #if 0
   4733 	/* The DEC Alpha C compiler 3.x generates incorrect code for the
   4734 	   test	 WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
   4735 	   AT_WORD_BOUNDARY, so this code is disabled.	Expanding the
   4736 	   macro and introducing temporary variables works around the bug.  */
   4737 
   4738 	case wordbound:
   4739 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
   4740 	  if (AT_WORD_BOUNDARY (d))
   4741 	    break;
   4742 	  goto fail;
   4743 
   4744 	case notwordbound:
   4745 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
   4746 	  if (AT_WORD_BOUNDARY (d))
   4747 	    goto fail;
   4748 	  break;
   4749 #else
   4750 	case wordbound:
   4751 	{
   4752 	  boolean prevchar, thischar;
   4753 
   4754 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
   4755 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
   4756 	    break;
   4757 
   4758 	  prevchar = WORDCHAR_P (d - 1);
   4759 	  thischar = WORDCHAR_P (d);
   4760 	  if (prevchar != thischar)
   4761 	    break;
   4762 	  goto fail;
   4763 	}
   4764 
   4765       case notwordbound:
   4766 	{
   4767 	  boolean prevchar, thischar;
   4768 
   4769 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
   4770 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
   4771 	    goto fail;
   4772 
   4773 	  prevchar = WORDCHAR_P (d - 1);
   4774 	  thischar = WORDCHAR_P (d);
   4775 	  if (prevchar != thischar)
   4776 	    goto fail;
   4777 	  break;
   4778 	}
   4779 #endif
   4780 
   4781 	case wordbeg:
   4782 	  DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
   4783 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
   4784 	    break;
   4785 	  goto fail;
   4786 
   4787 	case wordend:
   4788 	  DEBUG_PRINT1 ("EXECUTING wordend.\n");
   4789 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
   4790 	      && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
   4791 	    break;
   4792 	  goto fail;
   4793 
   4794 #ifdef emacs
   4795 	case before_dot:
   4796 	  DEBUG_PRINT1 ("EXECUTING before_dot.\n");
   4797 	  if (PTR_CHAR_POS ((unsigned char *) d) >= PT)
   4798 	    goto fail;
   4799 	  break;
   4800 
   4801 	case at_dot:
   4802 	  DEBUG_PRINT1 ("EXECUTING at_dot.\n");
   4803 	  if (PTR_CHAR_POS ((unsigned char *) d) != PT)
   4804 	    goto fail;
   4805 	  break;
   4806 
   4807 	case after_dot:
   4808 	  DEBUG_PRINT1 ("EXECUTING after_dot.\n");
   4809 	  if (PTR_CHAR_POS ((unsigned char *) d) <= PT)
   4810 	    goto fail;
   4811 	  break;
   4812 
   4813 	case syntaxspec:
   4814 	  DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
   4815 	  mcnt = *p++;
   4816 	  goto matchsyntax;
   4817 
   4818 	case wordchar:
   4819 	  DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
   4820 	  mcnt = (int) Sword;
   4821 	matchsyntax:
   4822 	  PREFETCH ();
   4823 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
   4824 	  d++;
   4825 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
   4826 	    goto fail;
   4827 	  SET_REGS_MATCHED ();
   4828 	  break;
   4829 
   4830 	case notsyntaxspec:
   4831 	  DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
   4832 	  mcnt = *p++;
   4833 	  goto matchnotsyntax;
   4834 
   4835 	case notwordchar:
   4836 	  DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
   4837 	  mcnt = (int) Sword;
   4838 	matchnotsyntax:
   4839 	  PREFETCH ();
   4840 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
   4841 	  d++;
   4842 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
   4843 	    goto fail;
   4844 	  SET_REGS_MATCHED ();
   4845 	  break;
   4846 
   4847 #else /* not emacs */
   4848 	case wordchar:
   4849 	  DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
   4850 	  PREFETCH ();
   4851 	  if (!WORDCHAR_P (d))
   4852 	    goto fail;
   4853 	  SET_REGS_MATCHED ();
   4854 	  d++;
   4855 	  break;
   4856 
   4857 	case notwordchar:
   4858 	  DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
   4859 	  PREFETCH ();
   4860 	  if (WORDCHAR_P (d))
   4861 	    goto fail;
   4862 	  SET_REGS_MATCHED ();
   4863 	  d++;
   4864 	  break;
   4865 #endif /* not emacs */
   4866 
   4867 	default:
   4868 	  abort ();
   4869 	}
   4870       continue;	 /* Successfully executed one pattern command; keep going.  */
   4871 
   4872 
   4873     /* We goto here if a matching operation fails. */
   4874     fail:
   4875       if (!FAIL_STACK_EMPTY ())
   4876 	{ /* A restart point is known.	Restore to that state.	*/
   4877 	  DEBUG_PRINT1 ("\nFAIL:\n");
   4878 	  POP_FAILURE_POINT (d, p,
   4879 			     lowest_active_reg, highest_active_reg,
   4880 			     regstart, regend, reg_info);
   4881 
   4882 	  /* If this failure point is a dummy, try the next one.  */
   4883 	  if (!p)
   4884 	    goto fail;
   4885 
   4886 	  /* If we failed to the end of the pattern, don't examine *p.	*/
   4887 	  assert (p <= pend);
   4888 	  if (p < pend)
   4889 	    {
   4890 	      boolean is_a_jump_n = false;
   4891 
   4892 	      /* If failed to a backwards jump that's part of a repetition
   4893 		 loop, need to pop this failure point and use the next one.  */
   4894 	      switch ((re_opcode_t) *p)
   4895 		{
   4896 		case jump_n:
   4897 		  is_a_jump_n = true;
   4898 		case maybe_pop_jump:
   4899 		case pop_failure_jump:
   4900 		case jump:
   4901 		  p1 = p + 1;
   4902 		  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   4903 		  p1 += mcnt;
   4904 
   4905 		  if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
   4906 		      || (!is_a_jump_n
   4907 			  && (re_opcode_t) *p1 == on_failure_jump))
   4908 		    goto fail;
   4909 		  break;
   4910 		default:
   4911 		  /* do nothing */ ;
   4912 		}
   4913 	    }
   4914 
   4915 	  if (d >= string1 && d <= end1)
   4916 	    dend = end_match_1;
   4917 	}
   4918       else
   4919 	break;	 /* Matching at this starting point really fails.  */
   4920     } /* for (;;) */
   4921 
   4922   if (best_regs_set)
   4923     goto restore_best_regs;
   4924 
   4925   FREE_VARIABLES ();
   4926 
   4927   return -1;				/* Failure to match.  */
   4928 } /* re_match_2 */
   4929 
   4930 /* Subroutine definitions for re_match_2.  */
   4932 
   4933 
   4934 /* We are passed P pointing to a register number after a start_memory.
   4935 
   4936    Return true if the pattern up to the corresponding stop_memory can
   4937    match the empty string, and false otherwise.
   4938 
   4939    If we find the matching stop_memory, sets P to point to one past its number.
   4940    Otherwise, sets P to an undefined byte less than or equal to END.
   4941 
   4942    We don't handle duplicates properly (yet).  */
   4943 
   4944 static boolean
   4945 group_match_null_string_p (p, end, reg_info)
   4946     unsigned char **p, *end;
   4947     register_info_type *reg_info;
   4948 {
   4949   int mcnt;
   4950   /* Point to after the args to the start_memory.  */
   4951   unsigned char *p1 = *p + 2;
   4952 
   4953   while (p1 < end)
   4954     {
   4955       /* Skip over opcodes that can match nothing, and return true or
   4956 	 false, as appropriate, when we get to one that can't, or to the
   4957 	 matching stop_memory.	*/
   4958 
   4959       switch ((re_opcode_t) *p1)
   4960 	{
   4961 	/* Could be either a loop or a series of alternatives.	*/
   4962 	case on_failure_jump:
   4963 	  p1++;
   4964 	  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   4965 
   4966 	  /* If the next operation is not a jump backwards in the
   4967 	     pattern.  */
   4968 
   4969 	  if (mcnt >= 0)
   4970 	    {
   4971 	      /* Go through the on_failure_jumps of the alternatives,
   4972 		 seeing if any of the alternatives cannot match nothing.
   4973 		 The last alternative starts with only a jump,
   4974 		 whereas the rest start with on_failure_jump and end
   4975 		 with a jump, e.g., here is the pattern for `a|b|c':
   4976 
   4977 		 /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
   4978 		 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
   4979 		 /exactn/1/c
   4980 
   4981 		 So, we have to first go through the first (n-1)
   4982 		 alternatives and then deal with the last one separately.  */
   4983 
   4984 
   4985 	      /* Deal with the first (n-1) alternatives, which start
   4986 		 with an on_failure_jump (see above) that jumps to right
   4987 		 past a jump_past_alt.	*/
   4988 
   4989 	      while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
   4990 		{
   4991 		  /* `mcnt' holds how many bytes long the alternative
   4992 		     is, including the ending `jump_past_alt' and
   4993 		     its number.  */
   4994 
   4995 		  if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
   4996 						      reg_info))
   4997 		    return false;
   4998 
   4999 		  /* Move to right after this alternative, including the
   5000 		     jump_past_alt.  */
   5001 		  p1 += mcnt;
   5002 
   5003 		  /* Break if it's the beginning of an n-th alternative
   5004 		     that doesn't begin with an on_failure_jump.  */
   5005 		  if ((re_opcode_t) *p1 != on_failure_jump)
   5006 		    break;
   5007 
   5008 		  /* Still have to check that it's not an n-th
   5009 		     alternative that starts with an on_failure_jump.  */
   5010 		  p1++;
   5011 		  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   5012 		  if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
   5013 		    {
   5014 		      /* Get to the beginning of the n-th alternative.	*/
   5015 		      p1 -= 3;
   5016 		      break;
   5017 		    }
   5018 		}
   5019 
   5020 	      /* Deal with the last alternative: go back and get number
   5021 		 of the `jump_past_alt' just before it.	 `mcnt' contains
   5022 		 the length of the alternative.	 */
   5023 	      EXTRACT_NUMBER (mcnt, p1 - 2);
   5024 
   5025 	      if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
   5026 		return false;
   5027 
   5028 	      p1 += mcnt;	/* Get past the n-th alternative.  */
   5029 	    } /* if mcnt > 0 */
   5030 	  break;
   5031 
   5032 
   5033 	case stop_memory:
   5034 	  assert (p1[1] == **p);
   5035 	  *p = p1 + 2;
   5036 	  return true;
   5037 
   5038 
   5039 	default:
   5040 	  if (!common_op_match_null_string_p (&p1, end, reg_info))
   5041 	    return false;
   5042 	}
   5043     } /* while p1 < end */
   5044 
   5045   return false;
   5046 } /* group_match_null_string_p */
   5047 
   5048 
   5049 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
   5050    It expects P to be the first byte of a single alternative and END one
   5051    byte past the last. The alternative can contain groups.  */
   5052 
   5053 static boolean
   5054 alt_match_null_string_p (p, end, reg_info)
   5055     unsigned char *p, *end;
   5056     register_info_type *reg_info;
   5057 {
   5058   int mcnt;
   5059   unsigned char *p1 = p;
   5060 
   5061   while (p1 < end)
   5062     {
   5063       /* Skip over opcodes that can match nothing, and break when we get
   5064 	 to one that can't.  */
   5065 
   5066       switch ((re_opcode_t) *p1)
   5067 	{
   5068 	/* It's a loop.	 */
   5069 	case on_failure_jump:
   5070 	  p1++;
   5071 	  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   5072 	  p1 += mcnt;
   5073 	  break;
   5074 
   5075 	default:
   5076 	  if (!common_op_match_null_string_p (&p1, end, reg_info))
   5077 	    return false;
   5078 	}
   5079     }  /* while p1 < end */
   5080 
   5081   return true;
   5082 } /* alt_match_null_string_p */
   5083 
   5084 
   5085 /* Deals with the ops common to group_match_null_string_p and
   5086    alt_match_null_string_p.
   5087 
   5088    Sets P to one after the op and its arguments, if any.  */
   5089 
   5090 static boolean
   5091 common_op_match_null_string_p (p, end, reg_info)
   5092     unsigned char **p, *end;
   5093     register_info_type *reg_info;
   5094 {
   5095   int mcnt;
   5096   boolean ret;
   5097   int reg_no;
   5098   unsigned char *p1 = *p;
   5099 
   5100   switch ((re_opcode_t) *p1++)
   5101     {
   5102     case no_op:
   5103     case begline:
   5104     case endline:
   5105     case begbuf:
   5106     case endbuf:
   5107     case wordbeg:
   5108     case wordend:
   5109     case wordbound:
   5110     case notwordbound:
   5111 #ifdef emacs
   5112     case before_dot:
   5113     case at_dot:
   5114     case after_dot:
   5115 #endif
   5116       break;
   5117 
   5118     case start_memory:
   5119       reg_no = *p1;
   5120       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
   5121       ret = group_match_null_string_p (&p1, end, reg_info);
   5122 
   5123       /* Have to set this here in case we're checking a group which
   5124 	 contains a group and a back reference to it.  */
   5125 
   5126       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
   5127 	REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
   5128 
   5129       if (!ret)
   5130 	return false;
   5131       break;
   5132 
   5133     /* If this is an optimized succeed_n for zero times, make the jump.	 */
   5134     case jump:
   5135       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   5136       if (mcnt >= 0)
   5137 	p1 += mcnt;
   5138       else
   5139 	return false;
   5140       break;
   5141 
   5142     case succeed_n:
   5143       /* Get to the number of times to succeed.	 */
   5144       p1 += 2;
   5145       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   5146 
   5147       if (mcnt == 0)
   5148 	{
   5149 	  p1 -= 4;
   5150 	  EXTRACT_NUMBER_AND_INCR (mcnt, p1);
   5151 	  p1 += mcnt;
   5152 	}
   5153       else
   5154 	return false;
   5155       break;
   5156 
   5157     case duplicate:
   5158       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
   5159 	return false;
   5160       break;
   5161 
   5162     case set_number_at:
   5163       p1 += 4;
   5164 
   5165     default:
   5166       /* All other opcodes mean we cannot match the empty string.  */
   5167       return false;
   5168   }
   5169 
   5170   *p = p1;
   5171   return true;
   5172 } /* common_op_match_null_string_p */
   5173 
   5174 
   5175 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
   5176    bytes; nonzero otherwise.  */
   5177 
   5178 static int
   5179 bcmp_translate (s1, s2, len, translate)
   5180      unsigned char *s1, *s2;
   5181      register int len;
   5182      RE_TRANSLATE_TYPE translate;
   5183 {
   5184   register unsigned char *p1 = s1, *p2 = s2;
   5185   while (len)
   5186     {
   5187       if (translate[*p1++] != translate[*p2++]) return 1;
   5188       len--;
   5189     }
   5190   return 0;
   5191 }
   5192 
   5193 /* Entry points for GNU code.  */
   5195 
   5196 /* re_compile_pattern is the GNU regular expression compiler: it
   5197    compiles PATTERN (of length SIZE) and puts the result in BUFP.
   5198    Returns 0 if the pattern was valid, otherwise an error string.
   5199 
   5200    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
   5201    are set in BUFP on entry.
   5202 
   5203    We call regex_compile to do the actual compilation.	*/
   5204 
   5205 const char *
   5206 re_compile_pattern (pattern, length, bufp)
   5207      const char *pattern;
   5208      int length;
   5209      struct re_pattern_buffer *bufp;
   5210 {
   5211   reg_errcode_t ret;
   5212 
   5213   /* GNU code is written to assume at least RE_NREGS registers will be set
   5214      (and at least one extra will be -1).  */
   5215   bufp->regs_allocated = REGS_UNALLOCATED;
   5216 
   5217   /* And GNU code determines whether or not to get register information
   5218      by passing null for the REGS argument to re_match, etc., not by
   5219      setting no_sub.  */
   5220   bufp->no_sub = 0;
   5221 
   5222   /* Match anchors at newline.	*/
   5223   bufp->newline_anchor = 1;
   5224 
   5225   ret = regex_compile (pattern, length, re_syntax_options, bufp);
   5226 
   5227   if (!ret)
   5228     return NULL;
   5229   return gettext (re_error_msgid[(int) ret]);
   5230 }
   5231 
   5232 /* Entry points compatible with 4.2 BSD regex library.	We don't define
   5234    them unless specifically requested.	*/
   5235 
   5236 #if defined (_REGEX_RE_COMP) || defined (_LIBC)
   5237 
   5238 /* BSD has one and only one pattern buffer.  */
   5239 static struct re_pattern_buffer re_comp_buf;
   5240 
   5241 char *
   5242 #ifdef _LIBC
   5243 /* Make these definitions weak in libc, so POSIX programs can redefine
   5244    these names if they don't use our functions, and still use
   5245    regcomp/regexec below without link errors.  */
   5246 weak_function
   5247 #endif
   5248 re_comp (s)
   5249     const char *s;
   5250 {
   5251   reg_errcode_t ret;
   5252 
   5253   if (!s)
   5254     {
   5255       if (!re_comp_buf.buffer)
   5256 	return gettext ("No previous regular expression");
   5257       return 0;
   5258     }
   5259 
   5260   if (!re_comp_buf.buffer)
   5261     {
   5262       re_comp_buf.buffer = (unsigned char *) malloc (200);
   5263       if (re_comp_buf.buffer == NULL)
   5264 	return gettext (re_error_msgid[(int) REG_ESPACE]);
   5265       re_comp_buf.allocated = 200;
   5266 
   5267       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
   5268       if (re_comp_buf.fastmap == NULL)
   5269 	return gettext (re_error_msgid[(int) REG_ESPACE]);
   5270     }
   5271 
   5272   /* Since `re_exec' always passes NULL for the `regs' argument, we
   5273      don't need to initialize the pattern buffer fields which affect it.  */
   5274 
   5275   /* Match anchors at newlines.	 */
   5276   re_comp_buf.newline_anchor = 1;
   5277 
   5278   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
   5279 
   5280   if (!ret)
   5281     return NULL;
   5282 
   5283   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
   5284   return (char *) gettext (re_error_msgid[(int) ret]);
   5285 }
   5286 
   5287 
   5288 int
   5289 #ifdef _LIBC
   5290 weak_function
   5291 #endif
   5292 re_exec (s)
   5293     const char *s;
   5294 {
   5295   const int len = strlen (s);
   5296   return
   5297     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
   5298 }
   5299 #endif /* _REGEX_RE_COMP */
   5300 
   5301 /* POSIX.2 functions.  Don't define these for Emacs.  */
   5303 
   5304 #ifndef emacs
   5305 
   5306 /* regcomp takes a regular expression as a string and compiles it.
   5307 
   5308    PREG is a regex_t *.	 We do not expect any fields to be initialized,
   5309    since POSIX says we shouldn't.  Thus, we set
   5310 
   5311      `buffer' to the compiled pattern;
   5312      `used' to the length of the compiled pattern;
   5313      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
   5314        REG_EXTENDED bit in CFLAGS is set; otherwise, to
   5315        RE_SYNTAX_POSIX_BASIC;
   5316      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
   5317      `fastmap' and `fastmap_accurate' to zero;
   5318      `re_nsub' to the number of subexpressions in PATTERN.
   5319 
   5320    PATTERN is the address of the pattern string.
   5321 
   5322    CFLAGS is a series of bits which affect compilation.
   5323 
   5324      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
   5325      use POSIX basic syntax.
   5326 
   5327      If REG_NEWLINE is set, then . and [^...] don't match newline.
   5328      Also, regexec will try a match beginning after every newline.
   5329 
   5330      If REG_ICASE is set, then we considers upper- and lowercase
   5331      versions of letters to be equivalent when matching.
   5332 
   5333      If REG_NOSUB is set, then when PREG is passed to regexec, that
   5334      routine will report only success or failure, and nothing about the
   5335      registers.
   5336 
   5337    It returns 0 if it succeeds, nonzero if it doesn't.	(See regex.h for
   5338    the return codes and their meanings.)  */
   5339 
   5340 int
   5341 regcomp (preg, pattern, cflags)
   5342     regex_t *preg;
   5343     const char *pattern;
   5344     int cflags;
   5345 {
   5346   reg_errcode_t ret;
   5347   unsigned syntax
   5348     = (cflags & REG_EXTENDED) ?
   5349       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
   5350 
   5351   /* regex_compile will allocate the space for the compiled pattern.  */
   5352   preg->buffer = 0;
   5353   preg->allocated = 0;
   5354   preg->used = 0;
   5355 
   5356   /* Don't bother to use a fastmap when searching.  This simplifies the
   5357      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
   5358      characters after newlines into the fastmap.  This way, we just try
   5359      every character.  */
   5360   preg->fastmap = 0;
   5361 
   5362   if (cflags & REG_ICASE)
   5363     {
   5364       unsigned i;
   5365 
   5366       preg->translate
   5367 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
   5368 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
   5369       if (preg->translate == NULL)
   5370 	return (int) REG_ESPACE;
   5371 
   5372       /* Map uppercase characters to corresponding lowercase ones.  */
   5373       for (i = 0; i < CHAR_SET_SIZE; i++)
   5374 	preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
   5375     }
   5376   else
   5377     preg->translate = NULL;
   5378 
   5379   /* If REG_NEWLINE is set, newlines are treated differently.  */
   5380   if (cflags & REG_NEWLINE)
   5381     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
   5382       syntax &= ~RE_DOT_NEWLINE;
   5383       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
   5384       /* It also changes the matching behavior.	 */
   5385       preg->newline_anchor = 1;
   5386     }
   5387   else
   5388     preg->newline_anchor = 0;
   5389 
   5390   preg->no_sub = !!(cflags & REG_NOSUB);
   5391 
   5392   /* POSIX says a null character in the pattern terminates it, so we
   5393      can use strlen here in compiling the pattern.  */
   5394   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
   5395 
   5396   /* POSIX doesn't distinguish between an unmatched open-group and an
   5397      unmatched close-group: both are REG_EPAREN.  */
   5398   if (ret == REG_ERPAREN) ret = REG_EPAREN;
   5399 
   5400   return (int) ret;
   5401 }
   5402 
   5403 
   5404 /* regexec searches for a given pattern, specified by PREG, in the
   5405    string STRING.
   5406 
   5407    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
   5408    `regcomp', we ignore PMATCH.	 Otherwise, we assume PMATCH has at
   5409    least NMATCH elements, and we set them to the offsets of the
   5410    corresponding matched substrings.
   5411 
   5412    EFLAGS specifies `execution flags' which affect matching: if
   5413    REG_NOTBOL is set, then ^ does not match at the beginning of the
   5414    string; if REG_NOTEOL is set, then $ does not match at the end.
   5415 
   5416    We return 0 if we find a match and REG_NOMATCH if not.  */
   5417 
   5418 int
   5419 regexec (preg, string, nmatch, pmatch, eflags)
   5420     const regex_t *preg;
   5421     const char *string;
   5422     size_t nmatch;
   5423     regmatch_t pmatch[];
   5424     int eflags;
   5425 {
   5426   int ret;
   5427   struct re_registers regs;
   5428   regex_t private_preg;
   5429   int len = strlen (string);
   5430   boolean want_reg_info = !preg->no_sub && nmatch > 0;
   5431 
   5432   private_preg = *preg;
   5433 
   5434   private_preg.not_bol = !!(eflags & REG_NOTBOL);
   5435   private_preg.not_eol = !!(eflags & REG_NOTEOL);
   5436 
   5437   /* The user has told us exactly how many registers to return
   5438      information about, via `nmatch'.  We have to pass that on to the
   5439      matching routines.	 */
   5440   private_preg.regs_allocated = REGS_FIXED;
   5441 
   5442   if (want_reg_info)
   5443     {
   5444       regs.num_regs = nmatch;
   5445       regs.start = TALLOC (nmatch, regoff_t);
   5446       regs.end = TALLOC (nmatch, regoff_t);
   5447       if (regs.start == NULL || regs.end == NULL)
   5448 	return (int) REG_NOMATCH;
   5449     }
   5450 
   5451   /* Perform the searching operation.  */
   5452   ret = re_search (&private_preg, string, len,
   5453 		   /* start: */ 0, /* range: */ len,
   5454 		   want_reg_info ? &regs : (struct re_registers *) 0);
   5455 
   5456   /* Copy the register information to the POSIX structure.  */
   5457   if (want_reg_info)
   5458     {
   5459       if (ret >= 0)
   5460 	{
   5461 	  unsigned r;
   5462 
   5463 	  for (r = 0; r < nmatch; r++)
   5464 	    {
   5465 	      pmatch[r].rm_so = regs.start[r];
   5466 	      pmatch[r].rm_eo = regs.end[r];
   5467 	    }
   5468 	}
   5469 
   5470       /* If we needed the temporary register info, free the space now.	*/
   5471       free (regs.start);
   5472       free (regs.end);
   5473     }
   5474 
   5475   /* We want zero return to mean success, unlike `re_search'.  */
   5476   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
   5477 }
   5478 
   5479 
   5480 /* Returns a message corresponding to an error code, ERRCODE, returned
   5481    from either regcomp or regexec.   We don't use PREG here.  */
   5482 
   5483 size_t
   5484 regerror (errcode, preg, errbuf, errbuf_size)
   5485     int errcode;
   5486     const regex_t *preg;
   5487     char *errbuf;
   5488     size_t errbuf_size;
   5489 {
   5490   const char *msg;
   5491   size_t msg_size;
   5492 
   5493   if (errcode < 0
   5494       || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
   5495     /* Only error codes returned by the rest of the code should be passed
   5496        to this routine.	 If we are given anything else, or if other regex
   5497        code generates an invalid error code, then the program has a bug.
   5498        Dump core so we can fix it.  */
   5499     abort ();
   5500 
   5501   msg = gettext (re_error_msgid[errcode]);
   5502 
   5503   msg_size = strlen (msg) + 1; /* Includes the null.  */
   5504 
   5505   if (errbuf_size != 0)
   5506     {
   5507       if (msg_size > errbuf_size)
   5508 	{
   5509 	  strncpy (errbuf, msg, errbuf_size - 1);
   5510 	  errbuf[errbuf_size - 1] = 0;
   5511 	}
   5512       else
   5513 	strcpy (errbuf, msg);
   5514     }
   5515 
   5516   return msg_size;
   5517 }
   5518 
   5519 
   5520 /* Free dynamically allocated space used by PREG.  */
   5521 
   5522 void
   5523 regfree (preg)
   5524     regex_t *preg;
   5525 {
   5526   if (preg->buffer != NULL)
   5527     free (preg->buffer);
   5528   preg->buffer = NULL;
   5529 
   5530   preg->allocated = 0;
   5531   preg->used = 0;
   5532 
   5533   if (preg->fastmap != NULL)
   5534     free (preg->fastmap);
   5535   preg->fastmap = NULL;
   5536   preg->fastmap_accurate = 0;
   5537 
   5538   if (preg->translate != NULL)
   5539     free (preg->translate);
   5540   preg->translate = NULL;
   5541 }
   5542 
   5543 #endif /* not emacs  */
   5544