Home | History | Annotate | Download | only in lib
      1 /* -*- buffer-read-only: t -*- vi: set ro: */
      2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
      3 /* Extended regular expression matching and search library.
      4    Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
      5    Free Software Foundation, Inc.
      6    This file is part of the GNU C Library.
      7    Contributed by Isamu Hasegawa <isamu (at) yamato.ibm.com>.
      8 
      9    This program is free software; you can redistribute it and/or modify
     10    it under the terms of the GNU General Public License as published by
     11    the Free Software Foundation; either version 3, or (at your option)
     12    any later version.
     13 
     14    This program is distributed in the hope that it will be useful,
     15    but WITHOUT ANY WARRANTY; without even the implied warranty of
     16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17    GNU General Public License for more details.
     18 
     19    You should have received a copy of the GNU General Public License along
     20    with this program; if not, write to the Free Software Foundation,
     21    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
     22 
     23 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
     24 				     Idx n) internal_function;
     25 static void match_ctx_clean (re_match_context_t *mctx) internal_function;
     26 static void match_ctx_free (re_match_context_t *cache) internal_function;
     27 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
     28 					  Idx str_idx, Idx from, Idx to)
     29      internal_function;
     30 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
     31      internal_function;
     32 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
     33 					   Idx str_idx) internal_function;
     34 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
     35 						    Idx node, Idx str_idx)
     36      internal_function;
     37 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
     38 			   re_dfastate_t **limited_sts, Idx last_node,
     39 			   Idx last_str_idx)
     40      internal_function;
     41 static reg_errcode_t re_search_internal (const regex_t *preg,
     42 					 const char *string, Idx length,
     43 					 Idx start, Idx last_start, Idx stop,
     44 					 size_t nmatch, regmatch_t pmatch[],
     45 					 int eflags) internal_function;
     46 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
     47 				  const char *string1, Idx length1,
     48 				  const char *string2, Idx length2,
     49 				  Idx start, regoff_t range,
     50 				  struct re_registers *regs,
     51 				  Idx stop, bool ret_len) internal_function;
     52 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
     53 				const char *string, Idx length, Idx start,
     54 				regoff_t range, Idx stop,
     55 				struct re_registers *regs,
     56 				bool ret_len) internal_function;
     57 static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
     58 				  Idx nregs, int regs_allocated)
     59      internal_function;
     60 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx)
     61      internal_function;
     62 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
     63 			   Idx *p_match_first) internal_function;
     64 static Idx check_halt_state_context (const re_match_context_t *mctx,
     65 				     const re_dfastate_t *state, Idx idx)
     66      internal_function;
     67 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
     68 			 regmatch_t *prev_idx_match, Idx cur_node,
     69 			 Idx cur_idx, Idx nmatch) internal_function;
     70 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
     71 				      Idx str_idx, Idx dest_node, Idx nregs,
     72 				      regmatch_t *regs,
     73 				      re_node_set *eps_via_nodes)
     74      internal_function;
     75 static reg_errcode_t set_regs (const regex_t *preg,
     76 			       const re_match_context_t *mctx,
     77 			       size_t nmatch, regmatch_t *pmatch,
     78 			       bool fl_backtrack) internal_function;
     79 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs)
     80      internal_function;
     81 
     82 #ifdef RE_ENABLE_I18N
     83 static int sift_states_iter_mb (const re_match_context_t *mctx,
     84 				re_sift_context_t *sctx,
     85 				Idx node_idx, Idx str_idx, Idx max_str_idx)
     86      internal_function;
     87 #endif /* RE_ENABLE_I18N */
     88 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
     89 					   re_sift_context_t *sctx)
     90      internal_function;
     91 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
     92 					  re_sift_context_t *sctx, Idx str_idx,
     93 					  re_node_set *cur_dest)
     94      internal_function;
     95 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
     96 					      re_sift_context_t *sctx,
     97 					      Idx str_idx,
     98 					      re_node_set *dest_nodes)
     99      internal_function;
    100 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
    101 					    re_node_set *dest_nodes,
    102 					    const re_node_set *candidates)
    103      internal_function;
    104 static bool check_dst_limits (const re_match_context_t *mctx,
    105 			      const re_node_set *limits,
    106 			      Idx dst_node, Idx dst_idx, Idx src_node,
    107 			      Idx src_idx) internal_function;
    108 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
    109 					int boundaries, Idx subexp_idx,
    110 					Idx from_node, Idx bkref_idx)
    111      internal_function;
    112 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
    113 				      Idx limit, Idx subexp_idx,
    114 				      Idx node, Idx str_idx,
    115 				      Idx bkref_idx) internal_function;
    116 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
    117 					  re_node_set *dest_nodes,
    118 					  const re_node_set *candidates,
    119 					  re_node_set *limits,
    120 					  struct re_backref_cache_entry *bkref_ents,
    121 					  Idx str_idx) internal_function;
    122 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
    123 					re_sift_context_t *sctx,
    124 					Idx str_idx, const re_node_set *candidates)
    125      internal_function;
    126 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
    127 					re_dfastate_t **dst,
    128 					re_dfastate_t **src, Idx num)
    129      internal_function;
    130 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
    131 					 re_match_context_t *mctx) internal_function;
    132 static re_dfastate_t *transit_state (reg_errcode_t *err,
    133 				     re_match_context_t *mctx,
    134 				     re_dfastate_t *state) internal_function;
    135 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
    136 					    re_match_context_t *mctx,
    137 					    re_dfastate_t *next_state)
    138      internal_function;
    139 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
    140 						re_node_set *cur_nodes,
    141 						Idx str_idx) internal_function;
    142 #if 0
    143 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
    144 					re_match_context_t *mctx,
    145 					re_dfastate_t *pstate)
    146      internal_function;
    147 #endif
    148 #ifdef RE_ENABLE_I18N
    149 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
    150 				       re_dfastate_t *pstate)
    151      internal_function;
    152 #endif /* RE_ENABLE_I18N */
    153 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
    154 					  const re_node_set *nodes)
    155      internal_function;
    156 static reg_errcode_t get_subexp (re_match_context_t *mctx,
    157 				 Idx bkref_node, Idx bkref_str_idx)
    158      internal_function;
    159 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
    160 				     const re_sub_match_top_t *sub_top,
    161 				     re_sub_match_last_t *sub_last,
    162 				     Idx bkref_node, Idx bkref_str)
    163      internal_function;
    164 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
    165 			     Idx subexp_idx, int type) internal_function;
    166 static reg_errcode_t check_arrival (re_match_context_t *mctx,
    167 				    state_array_t *path, Idx top_node,
    168 				    Idx top_str, Idx last_node, Idx last_str,
    169 				    int type) internal_function;
    170 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
    171 						   Idx str_idx,
    172 						   re_node_set *cur_nodes,
    173 						   re_node_set *next_nodes)
    174      internal_function;
    175 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
    176 					       re_node_set *cur_nodes,
    177 					       Idx ex_subexp, int type)
    178      internal_function;
    179 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
    180 						   re_node_set *dst_nodes,
    181 						   Idx target, Idx ex_subexp,
    182 						   int type) internal_function;
    183 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
    184 					 re_node_set *cur_nodes, Idx cur_str,
    185 					 Idx subexp_num, int type)
    186      internal_function;
    187 static bool build_trtable (const re_dfa_t *dfa,
    188 			   re_dfastate_t *state) internal_function;
    189 #ifdef RE_ENABLE_I18N
    190 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
    191 				    const re_string_t *input, Idx idx)
    192      internal_function;
    193 # ifdef _LIBC
    194 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
    195 						   size_t name_len)
    196      internal_function;
    197 # endif /* _LIBC */
    198 #endif /* RE_ENABLE_I18N */
    199 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
    200 				       const re_dfastate_t *state,
    201 				       re_node_set *states_node,
    202 				       bitset_t *states_ch) internal_function;
    203 static bool check_node_accept (const re_match_context_t *mctx,
    204 			       const re_token_t *node, Idx idx)
    205      internal_function;
    206 static reg_errcode_t extend_buffers (re_match_context_t *mctx)
    207      internal_function;
    208 
    209 /* Entry point for POSIX code.  */
    211 
    212 /* regexec searches for a given pattern, specified by PREG, in the
    213    string STRING.
    214 
    215    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
    216    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
    217    least NMATCH elements, and we set them to the offsets of the
    218    corresponding matched substrings.
    219 
    220    EFLAGS specifies `execution flags' which affect matching: if
    221    REG_NOTBOL is set, then ^ does not match at the beginning of the
    222    string; if REG_NOTEOL is set, then $ does not match at the end.
    223 
    224    We return 0 if we find a match and REG_NOMATCH if not.  */
    225 
    226 int
    227 regexec (preg, string, nmatch, pmatch, eflags)
    228     const regex_t *_Restrict_ preg;
    229     const char *_Restrict_ string;
    230     size_t nmatch;
    231     regmatch_t pmatch[_Restrict_arr_];
    232     int eflags;
    233 {
    234   reg_errcode_t err;
    235   Idx start, length;
    236 #ifdef _LIBC
    237   re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
    238 #endif
    239 
    240   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
    241     return REG_BADPAT;
    242 
    243   if (eflags & REG_STARTEND)
    244     {
    245       start = pmatch[0].rm_so;
    246       length = pmatch[0].rm_eo;
    247     }
    248   else
    249     {
    250       start = 0;
    251       length = strlen (string);
    252     }
    253 
    254   __libc_lock_lock (dfa->lock);
    255   if (preg->no_sub)
    256     err = re_search_internal (preg, string, length, start, length,
    257 			      length, 0, NULL, eflags);
    258   else
    259     err = re_search_internal (preg, string, length, start, length,
    260 			      length, nmatch, pmatch, eflags);
    261   __libc_lock_unlock (dfa->lock);
    262   return err != REG_NOERROR;
    263 }
    264 
    265 #ifdef _LIBC
    266 # include <shlib-compat.h>
    267 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
    268 
    269 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
    270 __typeof__ (__regexec) __compat_regexec;
    271 
    272 int
    273 attribute_compat_text_section
    274 __compat_regexec (const regex_t *_Restrict_ preg,
    275 		  const char *_Restrict_ string, size_t nmatch,
    276 		  regmatch_t pmatch[], int eflags)
    277 {
    278   return regexec (preg, string, nmatch, pmatch,
    279 		  eflags & (REG_NOTBOL | REG_NOTEOL));
    280 }
    281 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
    282 # endif
    283 #endif
    284 
    285 /* Entry points for GNU code.  */
    286 
    287 /* re_match, re_search, re_match_2, re_search_2
    288 
    289    The former two functions operate on STRING with length LENGTH,
    290    while the later two operate on concatenation of STRING1 and STRING2
    291    with lengths LENGTH1 and LENGTH2, respectively.
    292 
    293    re_match() matches the compiled pattern in BUFP against the string,
    294    starting at index START.
    295 
    296    re_search() first tries matching at index START, then it tries to match
    297    starting from index START + 1, and so on.  The last start position tried
    298    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
    299    way as re_match().)
    300 
    301    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
    302    the first STOP characters of the concatenation of the strings should be
    303    concerned.
    304 
    305    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
    306    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
    307    computed relative to the concatenation, not relative to the individual
    308    strings.)
    309 
    310    On success, re_match* functions return the length of the match, re_search*
    311    return the position of the start of the match.  Return value -1 means no
    312    match was found and -2 indicates an internal error.  */
    313 
    314 regoff_t
    315 re_match (bufp, string, length, start, regs)
    316     struct re_pattern_buffer *bufp;
    317     const char *string;
    318     Idx length, start;
    319     struct re_registers *regs;
    320 {
    321   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
    322 }
    323 #ifdef _LIBC
    324 weak_alias (__re_match, re_match)
    325 #endif
    326 
    327 regoff_t
    328 re_search (bufp, string, length, start, range, regs)
    329     struct re_pattern_buffer *bufp;
    330     const char *string;
    331     Idx length, start;
    332     regoff_t range;
    333     struct re_registers *regs;
    334 {
    335   return re_search_stub (bufp, string, length, start, range, length, regs,
    336 			 false);
    337 }
    338 #ifdef _LIBC
    339 weak_alias (__re_search, re_search)
    340 #endif
    341 
    342 regoff_t
    343 re_match_2 (bufp, string1, length1, string2, length2, start, regs, stop)
    344     struct re_pattern_buffer *bufp;
    345     const char *string1, *string2;
    346     Idx length1, length2, start, stop;
    347     struct re_registers *regs;
    348 {
    349   return re_search_2_stub (bufp, string1, length1, string2, length2,
    350 			   start, 0, regs, stop, true);
    351 }
    352 #ifdef _LIBC
    353 weak_alias (__re_match_2, re_match_2)
    354 #endif
    355 
    356 regoff_t
    357 re_search_2 (bufp, string1, length1, string2, length2, start, range, regs, stop)
    358     struct re_pattern_buffer *bufp;
    359     const char *string1, *string2;
    360     Idx length1, length2, start, stop;
    361     regoff_t range;
    362     struct re_registers *regs;
    363 {
    364   return re_search_2_stub (bufp, string1, length1, string2, length2,
    365 			   start, range, regs, stop, false);
    366 }
    367 #ifdef _LIBC
    368 weak_alias (__re_search_2, re_search_2)
    369 #endif
    370 
    371 static regoff_t
    372 internal_function
    373 re_search_2_stub (struct re_pattern_buffer *bufp,
    374 		  const char *string1, Idx length1,
    375 		  const char *string2, Idx length2,
    376 		  Idx start, regoff_t range, struct re_registers *regs,
    377 		  Idx stop, bool ret_len)
    378 {
    379   const char *str;
    380   regoff_t rval;
    381   Idx len = length1 + length2;
    382   char *s = NULL;
    383 
    384   if (BE (length1 < 0 || length2 < 0 || stop < 0 || len < length1, 0))
    385     return -2;
    386 
    387   /* Concatenate the strings.  */
    388   if (length2 > 0)
    389     if (length1 > 0)
    390       {
    391 	s = re_malloc (char, len);
    392 
    393 	if (BE (s == NULL, 0))
    394 	  return -2;
    395 #ifdef _LIBC
    396 	memcpy (__mempcpy (s, string1, length1), string2, length2);
    397 #else
    398 	memcpy (s, string1, length1);
    399 	memcpy (s + length1, string2, length2);
    400 #endif
    401 	str = s;
    402       }
    403     else
    404       str = string2;
    405   else
    406     str = string1;
    407 
    408   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
    409 			 ret_len);
    410   re_free (s);
    411   return rval;
    412 }
    413 
    414 /* The parameters have the same meaning as those of re_search.
    415    Additional parameters:
    416    If RET_LEN is true the length of the match is returned (re_match style);
    417    otherwise the position of the match is returned.  */
    418 
    419 static regoff_t
    420 internal_function
    421 re_search_stub (struct re_pattern_buffer *bufp,
    422 		const char *string, Idx length,
    423 		Idx start, regoff_t range, Idx stop, struct re_registers *regs,
    424 		bool ret_len)
    425 {
    426   reg_errcode_t result;
    427   regmatch_t *pmatch;
    428   Idx nregs;
    429   regoff_t rval;
    430   int eflags = 0;
    431 #ifdef _LIBC
    432   re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
    433 #endif
    434   Idx last_start = start + range;
    435 
    436   /* Check for out-of-range.  */
    437   if (BE (start < 0 || start > length, 0))
    438     return -1;
    439   if (BE (length < last_start || (0 <= range && last_start < start), 0))
    440     last_start = length;
    441   else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0))
    442     last_start = 0;
    443 
    444   __libc_lock_lock (dfa->lock);
    445 
    446   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
    447   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
    448 
    449   /* Compile fastmap if we haven't yet.  */
    450   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
    451     re_compile_fastmap (bufp);
    452 
    453   if (BE (bufp->no_sub, 0))
    454     regs = NULL;
    455 
    456   /* We need at least 1 register.  */
    457   if (regs == NULL)
    458     nregs = 1;
    459   else if (BE (bufp->regs_allocated == REGS_FIXED
    460 	       && regs->num_regs <= bufp->re_nsub, 0))
    461     {
    462       nregs = regs->num_regs;
    463       if (BE (nregs < 1, 0))
    464 	{
    465 	  /* Nothing can be copied to regs.  */
    466 	  regs = NULL;
    467 	  nregs = 1;
    468 	}
    469     }
    470   else
    471     nregs = bufp->re_nsub + 1;
    472   pmatch = re_malloc (regmatch_t, nregs);
    473   if (BE (pmatch == NULL, 0))
    474     {
    475       rval = -2;
    476       goto out;
    477     }
    478 
    479   result = re_search_internal (bufp, string, length, start, last_start, stop,
    480 			       nregs, pmatch, eflags);
    481 
    482   rval = 0;
    483 
    484   /* I hope we needn't fill ther regs with -1's when no match was found.  */
    485   if (result != REG_NOERROR)
    486     rval = -1;
    487   else if (regs != NULL)
    488     {
    489       /* If caller wants register contents data back, copy them.  */
    490       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
    491 					   bufp->regs_allocated);
    492       if (BE (bufp->regs_allocated == REGS_UNALLOCATED, 0))
    493 	rval = -2;
    494     }
    495 
    496   if (BE (rval == 0, 1))
    497     {
    498       if (ret_len)
    499 	{
    500 	  assert (pmatch[0].rm_so == start);
    501 	  rval = pmatch[0].rm_eo - start;
    502 	}
    503       else
    504 	rval = pmatch[0].rm_so;
    505     }
    506   re_free (pmatch);
    507  out:
    508   __libc_lock_unlock (dfa->lock);
    509   return rval;
    510 }
    511 
    512 static unsigned int
    513 internal_function
    514 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
    515 	      int regs_allocated)
    516 {
    517   int rval = REGS_REALLOCATE;
    518   Idx i;
    519   Idx need_regs = nregs + 1;
    520   /* We need one extra element beyond `num_regs' for the `-1' marker GNU code
    521      uses.  */
    522 
    523   /* Have the register data arrays been allocated?  */
    524   if (regs_allocated == REGS_UNALLOCATED)
    525     { /* No.  So allocate them with malloc.  */
    526       regs->start = re_malloc (regoff_t, need_regs);
    527       if (BE (regs->start == NULL, 0))
    528 	return REGS_UNALLOCATED;
    529       regs->end = re_malloc (regoff_t, need_regs);
    530       if (BE (regs->end == NULL, 0))
    531 	{
    532 	  re_free (regs->start);
    533 	  return REGS_UNALLOCATED;
    534 	}
    535       regs->num_regs = need_regs;
    536     }
    537   else if (regs_allocated == REGS_REALLOCATE)
    538     { /* Yes.  If we need more elements than were already
    539 	 allocated, reallocate them.  If we need fewer, just
    540 	 leave it alone.  */
    541       if (BE (need_regs > regs->num_regs, 0))
    542 	{
    543 	  regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
    544 	  regoff_t *new_end;
    545 	  if (BE (new_start == NULL, 0))
    546 	    return REGS_UNALLOCATED;
    547 	  new_end = re_realloc (regs->end, regoff_t, need_regs);
    548 	  if (BE (new_end == NULL, 0))
    549 	    {
    550 	      re_free (new_start);
    551 	      return REGS_UNALLOCATED;
    552 	    }
    553 	  regs->start = new_start;
    554 	  regs->end = new_end;
    555 	  regs->num_regs = need_regs;
    556 	}
    557     }
    558   else
    559     {
    560       assert (regs_allocated == REGS_FIXED);
    561       /* This function may not be called with REGS_FIXED and nregs too big.  */
    562       assert (regs->num_regs >= nregs);
    563       rval = REGS_FIXED;
    564     }
    565 
    566   /* Copy the regs.  */
    567   for (i = 0; i < nregs; ++i)
    568     {
    569       regs->start[i] = pmatch[i].rm_so;
    570       regs->end[i] = pmatch[i].rm_eo;
    571     }
    572   for ( ; i < regs->num_regs; ++i)
    573     regs->start[i] = regs->end[i] = -1;
    574 
    575   return rval;
    576 }
    577 
    578 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
    579    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
    580    this memory for recording register information.  STARTS and ENDS
    581    must be allocated using the malloc library routine, and must each
    582    be at least NUM_REGS * sizeof (regoff_t) bytes long.
    583 
    584    If NUM_REGS == 0, then subsequent matches should allocate their own
    585    register data.
    586 
    587    Unless this function is called, the first search or match using
    588    PATTERN_BUFFER will allocate its own register data, without
    589    freeing the old data.  */
    590 
    591 void
    592 re_set_registers (bufp, regs, num_regs, starts, ends)
    593     struct re_pattern_buffer *bufp;
    594     struct re_registers *regs;
    595     __re_size_t num_regs;
    596     regoff_t *starts, *ends;
    597 {
    598   if (num_regs)
    599     {
    600       bufp->regs_allocated = REGS_REALLOCATE;
    601       regs->num_regs = num_regs;
    602       regs->start = starts;
    603       regs->end = ends;
    604     }
    605   else
    606     {
    607       bufp->regs_allocated = REGS_UNALLOCATED;
    608       regs->num_regs = 0;
    609       regs->start = regs->end = NULL;
    610     }
    611 }
    612 #ifdef _LIBC
    613 weak_alias (__re_set_registers, re_set_registers)
    614 #endif
    615 
    616 /* Entry points compatible with 4.2 BSD regex library.  We don't define
    618    them unless specifically requested.  */
    619 
    620 #if defined _REGEX_RE_COMP || defined _LIBC
    621 int
    622 # ifdef _LIBC
    623 weak_function
    624 # endif
    625 re_exec (s)
    626      const char *s;
    627 {
    628   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
    629 }
    630 #endif /* _REGEX_RE_COMP */
    631 
    632 /* Internal entry point.  */
    634 
    635 /* Searches for a compiled pattern PREG in the string STRING, whose
    636    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
    637    meaning as with regexec.  LAST_START is START + RANGE, where
    638    START and RANGE have the same meaning as with re_search.
    639    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
    640    otherwise return the error code.
    641    Note: We assume front end functions already check ranges.
    642    (0 <= LAST_START && LAST_START <= LENGTH)  */
    643 
    644 static reg_errcode_t
    645 internal_function
    646 re_search_internal (const regex_t *preg,
    647 		    const char *string, Idx length,
    648 		    Idx start, Idx last_start, Idx stop,
    649 		    size_t nmatch, regmatch_t pmatch[],
    650 		    int eflags)
    651 {
    652   reg_errcode_t err;
    653   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
    654   Idx left_lim, right_lim;
    655   int incr;
    656   bool fl_longest_match;
    657   int match_kind;
    658   Idx match_first;
    659   Idx match_last = REG_MISSING;
    660   Idx extra_nmatch;
    661   bool sb;
    662   int ch;
    663 #if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
    664   re_match_context_t mctx = { .dfa = dfa };
    665 #else
    666   re_match_context_t mctx;
    667 #endif
    668   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
    669 		    && start != last_start && !preg->can_be_null)
    670 		   ? preg->fastmap : NULL);
    671   RE_TRANSLATE_TYPE t = preg->translate;
    672 
    673 #if !(defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
    674   memset (&mctx, '\0', sizeof (re_match_context_t));
    675   mctx.dfa = dfa;
    676 #endif
    677 
    678   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
    679   nmatch -= extra_nmatch;
    680 
    681   /* Check if the DFA haven't been compiled.  */
    682   if (BE (preg->used == 0 || dfa->init_state == NULL
    683 	  || dfa->init_state_word == NULL || dfa->init_state_nl == NULL
    684 	  || dfa->init_state_begbuf == NULL, 0))
    685     return REG_NOMATCH;
    686 
    687 #ifdef DEBUG
    688   /* We assume front-end functions already check them.  */
    689   assert (0 <= last_start && last_start <= length);
    690 #endif
    691 
    692   /* If initial states with non-begbuf contexts have no elements,
    693      the regex must be anchored.  If preg->newline_anchor is set,
    694      we'll never use init_state_nl, so do not check it.  */
    695   if (dfa->init_state->nodes.nelem == 0
    696       && dfa->init_state_word->nodes.nelem == 0
    697       && (dfa->init_state_nl->nodes.nelem == 0
    698 	  || !preg->newline_anchor))
    699     {
    700       if (start != 0 && last_start != 0)
    701         return REG_NOMATCH;
    702       start = last_start = 0;
    703     }
    704 
    705   /* We must check the longest matching, if nmatch > 0.  */
    706   fl_longest_match = (nmatch != 0 || dfa->nbackref);
    707 
    708   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
    709 			    preg->translate, (preg->syntax & RE_ICASE) != 0,
    710 			    dfa);
    711   if (BE (err != REG_NOERROR, 0))
    712     goto free_return;
    713   mctx.input.stop = stop;
    714   mctx.input.raw_stop = stop;
    715   mctx.input.newline_anchor = preg->newline_anchor;
    716 
    717   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
    718   if (BE (err != REG_NOERROR, 0))
    719     goto free_return;
    720 
    721   /* We will log all the DFA states through which the dfa pass,
    722      if nmatch > 1, or this dfa has "multibyte node", which is a
    723      back-reference or a node which can accept multibyte character or
    724      multi character collating element.  */
    725   if (nmatch > 1 || dfa->has_mb_node)
    726     {
    727       /* Avoid overflow.  */
    728       if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0))
    729 	{
    730 	  err = REG_ESPACE;
    731 	  goto free_return;
    732 	}
    733 
    734       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
    735       if (BE (mctx.state_log == NULL, 0))
    736 	{
    737 	  err = REG_ESPACE;
    738 	  goto free_return;
    739 	}
    740     }
    741   else
    742     mctx.state_log = NULL;
    743 
    744   match_first = start;
    745   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
    746 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
    747 
    748   /* Check incrementally whether of not the input string match.  */
    749   incr = (last_start < start) ? -1 : 1;
    750   left_lim = (last_start < start) ? last_start : start;
    751   right_lim = (last_start < start) ? start : last_start;
    752   sb = dfa->mb_cur_max == 1;
    753   match_kind =
    754     (fastmap
    755      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
    756 	| (start <= last_start ? 2 : 0)
    757 	| (t != NULL ? 1 : 0))
    758      : 8);
    759 
    760   for (;; match_first += incr)
    761     {
    762       err = REG_NOMATCH;
    763       if (match_first < left_lim || right_lim < match_first)
    764 	goto free_return;
    765 
    766       /* Advance as rapidly as possible through the string, until we
    767 	 find a plausible place to start matching.  This may be done
    768 	 with varying efficiency, so there are various possibilities:
    769 	 only the most common of them are specialized, in order to
    770 	 save on code size.  We use a switch statement for speed.  */
    771       switch (match_kind)
    772 	{
    773 	case 8:
    774 	  /* No fastmap.  */
    775 	  break;
    776 
    777 	case 7:
    778 	  /* Fastmap with single-byte translation, match forward.  */
    779 	  while (BE (match_first < right_lim, 1)
    780 		 && !fastmap[t[(unsigned char) string[match_first]]])
    781 	    ++match_first;
    782 	  goto forward_match_found_start_or_reached_end;
    783 
    784 	case 6:
    785 	  /* Fastmap without translation, match forward.  */
    786 	  while (BE (match_first < right_lim, 1)
    787 		 && !fastmap[(unsigned char) string[match_first]])
    788 	    ++match_first;
    789 
    790 	forward_match_found_start_or_reached_end:
    791 	  if (BE (match_first == right_lim, 0))
    792 	    {
    793 	      ch = match_first >= length
    794 		       ? 0 : (unsigned char) string[match_first];
    795 	      if (!fastmap[t ? t[ch] : ch])
    796 		goto free_return;
    797 	    }
    798 	  break;
    799 
    800 	case 4:
    801 	case 5:
    802 	  /* Fastmap without multi-byte translation, match backwards.  */
    803 	  while (match_first >= left_lim)
    804 	    {
    805 	      ch = match_first >= length
    806 		       ? 0 : (unsigned char) string[match_first];
    807 	      if (fastmap[t ? t[ch] : ch])
    808 		break;
    809 	      --match_first;
    810 	    }
    811 	  if (match_first < left_lim)
    812 	    goto free_return;
    813 	  break;
    814 
    815 	default:
    816 	  /* In this case, we can't determine easily the current byte,
    817 	     since it might be a component byte of a multibyte
    818 	     character.  Then we use the constructed buffer instead.  */
    819 	  for (;;)
    820 	    {
    821 	      /* If MATCH_FIRST is out of the valid range, reconstruct the
    822 		 buffers.  */
    823 	      __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
    824 	      if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, 0))
    825 		{
    826 		  err = re_string_reconstruct (&mctx.input, match_first,
    827 					       eflags);
    828 		  if (BE (err != REG_NOERROR, 0))
    829 		    goto free_return;
    830 
    831 		  offset = match_first - mctx.input.raw_mbs_idx;
    832 		}
    833 	      /* If MATCH_FIRST is out of the buffer, leave it as '\0'.
    834 		 Note that MATCH_FIRST must not be smaller than 0.  */
    835 	      ch = (match_first >= length
    836 		    ? 0 : re_string_byte_at (&mctx.input, offset));
    837 	      if (fastmap[ch])
    838 		break;
    839 	      match_first += incr;
    840 	      if (match_first < left_lim || match_first > right_lim)
    841 	        {
    842 	          err = REG_NOMATCH;
    843 	          goto free_return;
    844 	        }
    845 	    }
    846 	  break;
    847 	}
    848 
    849       /* Reconstruct the buffers so that the matcher can assume that
    850 	 the matching starts from the beginning of the buffer.  */
    851       err = re_string_reconstruct (&mctx.input, match_first, eflags);
    852       if (BE (err != REG_NOERROR, 0))
    853 	goto free_return;
    854 
    855 #ifdef RE_ENABLE_I18N
    856      /* Don't consider this char as a possible match start if it part,
    857 	yet isn't the head, of a multibyte character.  */
    858       if (!sb && !re_string_first_byte (&mctx.input, 0))
    859 	continue;
    860 #endif
    861 
    862       /* It seems to be appropriate one, then use the matcher.  */
    863       /* We assume that the matching starts from 0.  */
    864       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
    865       match_last = check_matching (&mctx, fl_longest_match,
    866 				   start <= last_start ? &match_first : NULL);
    867       if (match_last != REG_MISSING)
    868 	{
    869 	  if (BE (match_last == REG_ERROR, 0))
    870 	    {
    871 	      err = REG_ESPACE;
    872 	      goto free_return;
    873 	    }
    874 	  else
    875 	    {
    876 	      mctx.match_last = match_last;
    877 	      if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
    878 		{
    879 		  re_dfastate_t *pstate = mctx.state_log[match_last];
    880 		  mctx.last_node = check_halt_state_context (&mctx, pstate,
    881 							     match_last);
    882 		}
    883 	      if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
    884 		  || dfa->nbackref)
    885 		{
    886 		  err = prune_impossible_nodes (&mctx);
    887 		  if (err == REG_NOERROR)
    888 		    break;
    889 		  if (BE (err != REG_NOMATCH, 0))
    890 		    goto free_return;
    891 		  match_last = REG_MISSING;
    892 		}
    893 	      else
    894 		break; /* We found a match.  */
    895 	    }
    896 	}
    897 
    898       match_ctx_clean (&mctx);
    899     }
    900 
    901 #ifdef DEBUG
    902   assert (match_last != REG_MISSING);
    903   assert (err == REG_NOERROR);
    904 #endif
    905 
    906   /* Set pmatch[] if we need.  */
    907   if (nmatch > 0)
    908     {
    909       Idx reg_idx;
    910 
    911       /* Initialize registers.  */
    912       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
    913 	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
    914 
    915       /* Set the points where matching start/end.  */
    916       pmatch[0].rm_so = 0;
    917       pmatch[0].rm_eo = mctx.match_last;
    918       /* FIXME: This function should fail if mctx.match_last exceeds
    919 	 the maximum possible regoff_t value.  We need a new error
    920 	 code REG_OVERFLOW.  */
    921 
    922       if (!preg->no_sub && nmatch > 1)
    923 	{
    924 	  err = set_regs (preg, &mctx, nmatch, pmatch,
    925 			  dfa->has_plural_match && dfa->nbackref > 0);
    926 	  if (BE (err != REG_NOERROR, 0))
    927 	    goto free_return;
    928 	}
    929 
    930       /* At last, add the offset to the each registers, since we slided
    931 	 the buffers so that we could assume that the matching starts
    932 	 from 0.  */
    933       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
    934 	if (pmatch[reg_idx].rm_so != -1)
    935 	  {
    936 #ifdef RE_ENABLE_I18N
    937 	    if (BE (mctx.input.offsets_needed != 0, 0))
    938 	      {
    939 		pmatch[reg_idx].rm_so =
    940 		  (pmatch[reg_idx].rm_so == mctx.input.valid_len
    941 		   ? mctx.input.valid_raw_len
    942 		   : mctx.input.offsets[pmatch[reg_idx].rm_so]);
    943 		pmatch[reg_idx].rm_eo =
    944 		  (pmatch[reg_idx].rm_eo == mctx.input.valid_len
    945 		   ? mctx.input.valid_raw_len
    946 		   : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
    947 	      }
    948 #else
    949 	    assert (mctx.input.offsets_needed == 0);
    950 #endif
    951 	    pmatch[reg_idx].rm_so += match_first;
    952 	    pmatch[reg_idx].rm_eo += match_first;
    953 	  }
    954       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
    955 	{
    956 	  pmatch[nmatch + reg_idx].rm_so = -1;
    957 	  pmatch[nmatch + reg_idx].rm_eo = -1;
    958 	}
    959 
    960       if (dfa->subexp_map)
    961         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
    962           if (dfa->subexp_map[reg_idx] != reg_idx)
    963             {
    964               pmatch[reg_idx + 1].rm_so
    965                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
    966               pmatch[reg_idx + 1].rm_eo
    967                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
    968             }
    969     }
    970 
    971  free_return:
    972   re_free (mctx.state_log);
    973   if (dfa->nbackref)
    974     match_ctx_free (&mctx);
    975   re_string_destruct (&mctx.input);
    976   return err;
    977 }
    978 
    979 static reg_errcode_t
    980 internal_function
    981 prune_impossible_nodes (re_match_context_t *mctx)
    982 {
    983   const re_dfa_t *const dfa = mctx->dfa;
    984   Idx halt_node, match_last;
    985   reg_errcode_t ret;
    986   re_dfastate_t **sifted_states;
    987   re_dfastate_t **lim_states = NULL;
    988   re_sift_context_t sctx;
    989 #ifdef DEBUG
    990   assert (mctx->state_log != NULL);
    991 #endif
    992   match_last = mctx->match_last;
    993   halt_node = mctx->last_node;
    994 
    995   /* Avoid overflow.  */
    996   if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0))
    997     return REG_ESPACE;
    998 
    999   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
   1000   if (BE (sifted_states == NULL, 0))
   1001     {
   1002       ret = REG_ESPACE;
   1003       goto free_return;
   1004     }
   1005   if (dfa->nbackref)
   1006     {
   1007       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
   1008       if (BE (lim_states == NULL, 0))
   1009 	{
   1010 	  ret = REG_ESPACE;
   1011 	  goto free_return;
   1012 	}
   1013       while (1)
   1014 	{
   1015 	  memset (lim_states, '\0',
   1016 		  sizeof (re_dfastate_t *) * (match_last + 1));
   1017 	  sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
   1018 			 match_last);
   1019 	  ret = sift_states_backward (mctx, &sctx);
   1020 	  re_node_set_free (&sctx.limits);
   1021 	  if (BE (ret != REG_NOERROR, 0))
   1022 	      goto free_return;
   1023 	  if (sifted_states[0] != NULL || lim_states[0] != NULL)
   1024 	    break;
   1025 	  do
   1026 	    {
   1027 	      --match_last;
   1028 	      if (! REG_VALID_INDEX (match_last))
   1029 		{
   1030 		  ret = REG_NOMATCH;
   1031 		  goto free_return;
   1032 		}
   1033 	    } while (mctx->state_log[match_last] == NULL
   1034 		     || !mctx->state_log[match_last]->halt);
   1035 	  halt_node = check_halt_state_context (mctx,
   1036 						mctx->state_log[match_last],
   1037 						match_last);
   1038 	}
   1039       ret = merge_state_array (dfa, sifted_states, lim_states,
   1040 			       match_last + 1);
   1041       re_free (lim_states);
   1042       lim_states = NULL;
   1043       if (BE (ret != REG_NOERROR, 0))
   1044 	goto free_return;
   1045     }
   1046   else
   1047     {
   1048       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
   1049       ret = sift_states_backward (mctx, &sctx);
   1050       re_node_set_free (&sctx.limits);
   1051       if (BE (ret != REG_NOERROR, 0))
   1052 	goto free_return;
   1053       if (sifted_states[0] == NULL)
   1054 	{
   1055 	  ret = REG_NOMATCH;
   1056 	  goto free_return;
   1057 	}
   1058     }
   1059   re_free (mctx->state_log);
   1060   mctx->state_log = sifted_states;
   1061   sifted_states = NULL;
   1062   mctx->last_node = halt_node;
   1063   mctx->match_last = match_last;
   1064   ret = REG_NOERROR;
   1065  free_return:
   1066   re_free (sifted_states);
   1067   re_free (lim_states);
   1068   return ret;
   1069 }
   1070 
   1071 /* Acquire an initial state and return it.
   1072    We must select appropriate initial state depending on the context,
   1073    since initial states may have constraints like "\<", "^", etc..  */
   1074 
   1075 static inline re_dfastate_t *
   1076 __attribute ((always_inline)) internal_function
   1077 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
   1078 			    Idx idx)
   1079 {
   1080   const re_dfa_t *const dfa = mctx->dfa;
   1081   if (dfa->init_state->has_constraint)
   1082     {
   1083       unsigned int context;
   1084       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
   1085       if (IS_WORD_CONTEXT (context))
   1086 	return dfa->init_state_word;
   1087       else if (IS_ORDINARY_CONTEXT (context))
   1088 	return dfa->init_state;
   1089       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
   1090 	return dfa->init_state_begbuf;
   1091       else if (IS_NEWLINE_CONTEXT (context))
   1092 	return dfa->init_state_nl;
   1093       else if (IS_BEGBUF_CONTEXT (context))
   1094 	{
   1095 	  /* It is relatively rare case, then calculate on demand.  */
   1096 	  return re_acquire_state_context (err, dfa,
   1097 					   dfa->init_state->entrance_nodes,
   1098 					   context);
   1099 	}
   1100       else
   1101 	/* Must not happen?  */
   1102 	return dfa->init_state;
   1103     }
   1104   else
   1105     return dfa->init_state;
   1106 }
   1107 
   1108 /* Check whether the regular expression match input string INPUT or not,
   1109    and return the index where the matching end.  Return REG_MISSING if
   1110    there is no match, and return REG_ERROR in case of an error.
   1111    FL_LONGEST_MATCH means we want the POSIX longest matching.
   1112    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
   1113    next place where we may want to try matching.
   1114    Note that the matcher assume that the maching starts from the current
   1115    index of the buffer.  */
   1116 
   1117 static Idx
   1118 internal_function
   1119 check_matching (re_match_context_t *mctx, bool fl_longest_match,
   1120 		Idx *p_match_first)
   1121 {
   1122   const re_dfa_t *const dfa = mctx->dfa;
   1123   reg_errcode_t err;
   1124   Idx match = 0;
   1125   Idx match_last = REG_MISSING;
   1126   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
   1127   re_dfastate_t *cur_state;
   1128   bool at_init_state = p_match_first != NULL;
   1129   Idx next_start_idx = cur_str_idx;
   1130 
   1131   err = REG_NOERROR;
   1132   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
   1133   /* An initial state must not be NULL (invalid).  */
   1134   if (BE (cur_state == NULL, 0))
   1135     {
   1136       assert (err == REG_ESPACE);
   1137       return REG_ERROR;
   1138     }
   1139 
   1140   if (mctx->state_log != NULL)
   1141     {
   1142       mctx->state_log[cur_str_idx] = cur_state;
   1143 
   1144       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
   1145 	 later.  E.g. Processing back references.  */
   1146       if (BE (dfa->nbackref, 0))
   1147 	{
   1148 	  at_init_state = false;
   1149 	  err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
   1150 	  if (BE (err != REG_NOERROR, 0))
   1151 	    return err;
   1152 
   1153 	  if (cur_state->has_backref)
   1154 	    {
   1155 	      err = transit_state_bkref (mctx, &cur_state->nodes);
   1156 	      if (BE (err != REG_NOERROR, 0))
   1157 	        return err;
   1158 	    }
   1159 	}
   1160     }
   1161 
   1162   /* If the RE accepts NULL string.  */
   1163   if (BE (cur_state->halt, 0))
   1164     {
   1165       if (!cur_state->has_constraint
   1166 	  || check_halt_state_context (mctx, cur_state, cur_str_idx))
   1167 	{
   1168 	  if (!fl_longest_match)
   1169 	    return cur_str_idx;
   1170 	  else
   1171 	    {
   1172 	      match_last = cur_str_idx;
   1173 	      match = 1;
   1174 	    }
   1175 	}
   1176     }
   1177 
   1178   while (!re_string_eoi (&mctx->input))
   1179     {
   1180       re_dfastate_t *old_state = cur_state;
   1181       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
   1182 
   1183       if (BE (next_char_idx >= mctx->input.bufs_len, 0)
   1184           || (BE (next_char_idx >= mctx->input.valid_len, 0)
   1185               && mctx->input.valid_len < mctx->input.len))
   1186         {
   1187           err = extend_buffers (mctx);
   1188           if (BE (err != REG_NOERROR, 0))
   1189 	    {
   1190 	      assert (err == REG_ESPACE);
   1191 	      return REG_ERROR;
   1192 	    }
   1193         }
   1194 
   1195       cur_state = transit_state (&err, mctx, cur_state);
   1196       if (mctx->state_log != NULL)
   1197 	cur_state = merge_state_with_log (&err, mctx, cur_state);
   1198 
   1199       if (cur_state == NULL)
   1200 	{
   1201 	  /* Reached the invalid state or an error.  Try to recover a valid
   1202 	     state using the state log, if available and if we have not
   1203 	     already found a valid (even if not the longest) match.  */
   1204 	  if (BE (err != REG_NOERROR, 0))
   1205 	    return REG_ERROR;
   1206 
   1207 	  if (mctx->state_log == NULL
   1208 	      || (match && !fl_longest_match)
   1209 	      || (cur_state = find_recover_state (&err, mctx)) == NULL)
   1210 	    break;
   1211 	}
   1212 
   1213       if (BE (at_init_state, 0))
   1214 	{
   1215 	  if (old_state == cur_state)
   1216 	    next_start_idx = next_char_idx;
   1217 	  else
   1218 	    at_init_state = false;
   1219 	}
   1220 
   1221       if (cur_state->halt)
   1222 	{
   1223 	  /* Reached a halt state.
   1224 	     Check the halt state can satisfy the current context.  */
   1225 	  if (!cur_state->has_constraint
   1226 	      || check_halt_state_context (mctx, cur_state,
   1227 					   re_string_cur_idx (&mctx->input)))
   1228 	    {
   1229 	      /* We found an appropriate halt state.  */
   1230 	      match_last = re_string_cur_idx (&mctx->input);
   1231 	      match = 1;
   1232 
   1233 	      /* We found a match, do not modify match_first below.  */
   1234 	      p_match_first = NULL;
   1235 	      if (!fl_longest_match)
   1236 		break;
   1237 	    }
   1238 	}
   1239     }
   1240 
   1241   if (p_match_first)
   1242     *p_match_first += next_start_idx;
   1243 
   1244   return match_last;
   1245 }
   1246 
   1247 /* Check NODE match the current context.  */
   1248 
   1249 static bool
   1250 internal_function
   1251 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
   1252 {
   1253   re_token_type_t type = dfa->nodes[node].type;
   1254   unsigned int constraint = dfa->nodes[node].constraint;
   1255   if (type != END_OF_RE)
   1256     return false;
   1257   if (!constraint)
   1258     return true;
   1259   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
   1260     return false;
   1261   return true;
   1262 }
   1263 
   1264 /* Check the halt state STATE match the current context.
   1265    Return 0 if not match, if the node, STATE has, is a halt node and
   1266    match the context, return the node.  */
   1267 
   1268 static Idx
   1269 internal_function
   1270 check_halt_state_context (const re_match_context_t *mctx,
   1271 			  const re_dfastate_t *state, Idx idx)
   1272 {
   1273   Idx i;
   1274   unsigned int context;
   1275 #ifdef DEBUG
   1276   assert (state->halt);
   1277 #endif
   1278   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
   1279   for (i = 0; i < state->nodes.nelem; ++i)
   1280     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
   1281       return state->nodes.elems[i];
   1282   return 0;
   1283 }
   1284 
   1285 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
   1286    corresponding to the DFA).
   1287    Return the destination node, and update EPS_VIA_NODES;
   1288    return REG_MISSING in case of errors.  */
   1289 
   1290 static Idx
   1291 internal_function
   1292 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
   1293 		   Idx *pidx, Idx node, re_node_set *eps_via_nodes,
   1294 		   struct re_fail_stack_t *fs)
   1295 {
   1296   const re_dfa_t *const dfa = mctx->dfa;
   1297   Idx i;
   1298   bool ok;
   1299   if (IS_EPSILON_NODE (dfa->nodes[node].type))
   1300     {
   1301       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
   1302       re_node_set *edests = &dfa->edests[node];
   1303       Idx dest_node;
   1304       ok = re_node_set_insert (eps_via_nodes, node);
   1305       if (BE (! ok, 0))
   1306 	return REG_ERROR;
   1307       /* Pick up a valid destination, or return REG_MISSING if none
   1308 	 is found.  */
   1309       for (dest_node = REG_MISSING, i = 0; i < edests->nelem; ++i)
   1310 	{
   1311 	  Idx candidate = edests->elems[i];
   1312 	  if (!re_node_set_contains (cur_nodes, candidate))
   1313 	    continue;
   1314           if (dest_node == REG_MISSING)
   1315 	    dest_node = candidate;
   1316 
   1317           else
   1318 	    {
   1319 	      /* In order to avoid infinite loop like "(a*)*", return the second
   1320 	         epsilon-transition if the first was already considered.  */
   1321 	      if (re_node_set_contains (eps_via_nodes, dest_node))
   1322 	        return candidate;
   1323 
   1324 	      /* Otherwise, push the second epsilon-transition on the fail stack.  */
   1325 	      else if (fs != NULL
   1326 		       && push_fail_stack (fs, *pidx, candidate, nregs, regs,
   1327 				           eps_via_nodes))
   1328 		return REG_ERROR;
   1329 
   1330 	      /* We know we are going to exit.  */
   1331 	      break;
   1332 	    }
   1333 	}
   1334       return dest_node;
   1335     }
   1336   else
   1337     {
   1338       Idx naccepted = 0;
   1339       re_token_type_t type = dfa->nodes[node].type;
   1340 
   1341 #ifdef RE_ENABLE_I18N
   1342       if (dfa->nodes[node].accept_mb)
   1343 	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
   1344       else
   1345 #endif /* RE_ENABLE_I18N */
   1346       if (type == OP_BACK_REF)
   1347 	{
   1348 	  Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
   1349 	  naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
   1350 	  if (fs != NULL)
   1351 	    {
   1352 	      if (regs[subexp_idx].rm_so == -1 || regs[subexp_idx].rm_eo == -1)
   1353 		return REG_MISSING;
   1354 	      else if (naccepted)
   1355 		{
   1356 		  char *buf = (char *) re_string_get_buffer (&mctx->input);
   1357 		  if (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
   1358 			      naccepted) != 0)
   1359 		    return REG_MISSING;
   1360 		}
   1361 	    }
   1362 
   1363 	  if (naccepted == 0)
   1364 	    {
   1365 	      Idx dest_node;
   1366 	      ok = re_node_set_insert (eps_via_nodes, node);
   1367 	      if (BE (! ok, 0))
   1368 		return REG_ERROR;
   1369 	      dest_node = dfa->edests[node].elems[0];
   1370 	      if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
   1371 					dest_node))
   1372 		return dest_node;
   1373 	    }
   1374 	}
   1375 
   1376       if (naccepted != 0
   1377 	  || check_node_accept (mctx, dfa->nodes + node, *pidx))
   1378 	{
   1379 	  Idx dest_node = dfa->nexts[node];
   1380 	  *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
   1381 	  if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
   1382 		     || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
   1383 					       dest_node)))
   1384 	    return REG_MISSING;
   1385 	  re_node_set_empty (eps_via_nodes);
   1386 	  return dest_node;
   1387 	}
   1388     }
   1389   return REG_MISSING;
   1390 }
   1391 
   1392 static reg_errcode_t
   1393 internal_function
   1394 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
   1395 		 Idx nregs, regmatch_t *regs, re_node_set *eps_via_nodes)
   1396 {
   1397   reg_errcode_t err;
   1398   Idx num = fs->num++;
   1399   if (fs->num == fs->alloc)
   1400     {
   1401       struct re_fail_stack_ent_t *new_array;
   1402       new_array = realloc (fs->stack, (sizeof (struct re_fail_stack_ent_t)
   1403 				       * fs->alloc * 2));
   1404       if (new_array == NULL)
   1405 	return REG_ESPACE;
   1406       fs->alloc *= 2;
   1407       fs->stack = new_array;
   1408     }
   1409   fs->stack[num].idx = str_idx;
   1410   fs->stack[num].node = dest_node;
   1411   fs->stack[num].regs = re_malloc (regmatch_t, nregs);
   1412   if (fs->stack[num].regs == NULL)
   1413     return REG_ESPACE;
   1414   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
   1415   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
   1416   return err;
   1417 }
   1418 
   1419 static Idx
   1420 internal_function
   1421 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
   1422 		regmatch_t *regs, re_node_set *eps_via_nodes)
   1423 {
   1424   Idx num = --fs->num;
   1425   assert (REG_VALID_INDEX (num));
   1426   *pidx = fs->stack[num].idx;
   1427   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
   1428   re_node_set_free (eps_via_nodes);
   1429   re_free (fs->stack[num].regs);
   1430   *eps_via_nodes = fs->stack[num].eps_via_nodes;
   1431   return fs->stack[num].node;
   1432 }
   1433 
   1434 /* Set the positions where the subexpressions are starts/ends to registers
   1435    PMATCH.
   1436    Note: We assume that pmatch[0] is already set, and
   1437    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
   1438 
   1439 static reg_errcode_t
   1440 internal_function
   1441 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
   1442 	  regmatch_t *pmatch, bool fl_backtrack)
   1443 {
   1444   const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer;
   1445   Idx idx, cur_node;
   1446   re_node_set eps_via_nodes;
   1447   struct re_fail_stack_t *fs;
   1448   struct re_fail_stack_t fs_body = { 0, 2, NULL };
   1449   regmatch_t *prev_idx_match;
   1450   bool prev_idx_match_malloced = false;
   1451 
   1452 #ifdef DEBUG
   1453   assert (nmatch > 1);
   1454   assert (mctx->state_log != NULL);
   1455 #endif
   1456   if (fl_backtrack)
   1457     {
   1458       fs = &fs_body;
   1459       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
   1460       if (fs->stack == NULL)
   1461 	return REG_ESPACE;
   1462     }
   1463   else
   1464     fs = NULL;
   1465 
   1466   cur_node = dfa->init_node;
   1467   re_node_set_init_empty (&eps_via_nodes);
   1468 
   1469   if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
   1470     prev_idx_match = (regmatch_t *) alloca (nmatch * sizeof (regmatch_t));
   1471   else
   1472     {
   1473       prev_idx_match = re_malloc (regmatch_t, nmatch);
   1474       if (prev_idx_match == NULL)
   1475 	{
   1476 	  free_fail_stack_return (fs);
   1477 	  return REG_ESPACE;
   1478 	}
   1479       prev_idx_match_malloced = true;
   1480     }
   1481   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
   1482 
   1483   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
   1484     {
   1485       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
   1486 
   1487       if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
   1488 	{
   1489 	  Idx reg_idx;
   1490 	  if (fs)
   1491 	    {
   1492 	      for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
   1493 		if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
   1494 		  break;
   1495 	      if (reg_idx == nmatch)
   1496 		{
   1497 		  re_node_set_free (&eps_via_nodes);
   1498 		  if (prev_idx_match_malloced)
   1499 		    re_free (prev_idx_match);
   1500 		  return free_fail_stack_return (fs);
   1501 		}
   1502 	      cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
   1503 					 &eps_via_nodes);
   1504 	    }
   1505 	  else
   1506 	    {
   1507 	      re_node_set_free (&eps_via_nodes);
   1508 	      if (prev_idx_match_malloced)
   1509 		re_free (prev_idx_match);
   1510 	      return REG_NOERROR;
   1511 	    }
   1512 	}
   1513 
   1514       /* Proceed to next node.  */
   1515       cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
   1516 				    &eps_via_nodes, fs);
   1517 
   1518       if (BE (! REG_VALID_INDEX (cur_node), 0))
   1519 	{
   1520 	  if (BE (cur_node == REG_ERROR, 0))
   1521 	    {
   1522 	      re_node_set_free (&eps_via_nodes);
   1523 	      if (prev_idx_match_malloced)
   1524 		re_free (prev_idx_match);
   1525 	      free_fail_stack_return (fs);
   1526 	      return REG_ESPACE;
   1527 	    }
   1528 	  if (fs)
   1529 	    cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
   1530 				       &eps_via_nodes);
   1531 	  else
   1532 	    {
   1533 	      re_node_set_free (&eps_via_nodes);
   1534 	      if (prev_idx_match_malloced)
   1535 		re_free (prev_idx_match);
   1536 	      return REG_NOMATCH;
   1537 	    }
   1538 	}
   1539     }
   1540   re_node_set_free (&eps_via_nodes);
   1541   if (prev_idx_match_malloced)
   1542     re_free (prev_idx_match);
   1543   return free_fail_stack_return (fs);
   1544 }
   1545 
   1546 static reg_errcode_t
   1547 internal_function
   1548 free_fail_stack_return (struct re_fail_stack_t *fs)
   1549 {
   1550   if (fs)
   1551     {
   1552       Idx fs_idx;
   1553       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
   1554 	{
   1555 	  re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
   1556 	  re_free (fs->stack[fs_idx].regs);
   1557 	}
   1558       re_free (fs->stack);
   1559     }
   1560   return REG_NOERROR;
   1561 }
   1562 
   1563 static void
   1564 internal_function
   1565 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
   1566 	     regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
   1567 {
   1568   int type = dfa->nodes[cur_node].type;
   1569   if (type == OP_OPEN_SUBEXP)
   1570     {
   1571       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
   1572 
   1573       /* We are at the first node of this sub expression.  */
   1574       if (reg_num < nmatch)
   1575 	{
   1576 	  pmatch[reg_num].rm_so = cur_idx;
   1577 	  pmatch[reg_num].rm_eo = -1;
   1578 	}
   1579     }
   1580   else if (type == OP_CLOSE_SUBEXP)
   1581     {
   1582       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
   1583       if (reg_num < nmatch)
   1584 	{
   1585 	  /* We are at the last node of this sub expression.  */
   1586 	  if (pmatch[reg_num].rm_so < cur_idx)
   1587 	    {
   1588 	      pmatch[reg_num].rm_eo = cur_idx;
   1589 	      /* This is a non-empty match or we are not inside an optional
   1590 		 subexpression.  Accept this right away.  */
   1591 	      memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
   1592 	    }
   1593 	  else
   1594 	    {
   1595 	      if (dfa->nodes[cur_node].opt_subexp
   1596 		  && prev_idx_match[reg_num].rm_so != -1)
   1597 		/* We transited through an empty match for an optional
   1598 		   subexpression, like (a?)*, and this is not the subexp's
   1599 		   first match.  Copy back the old content of the registers
   1600 		   so that matches of an inner subexpression are undone as
   1601 		   well, like in ((a?))*.  */
   1602 		memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
   1603 	      else
   1604 		/* We completed a subexpression, but it may be part of
   1605 		   an optional one, so do not update PREV_IDX_MATCH.  */
   1606 		pmatch[reg_num].rm_eo = cur_idx;
   1607 	    }
   1608 	}
   1609     }
   1610 }
   1611 
   1612 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
   1613    and sift the nodes in each states according to the following rules.
   1614    Updated state_log will be wrote to STATE_LOG.
   1615 
   1616    Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if...
   1617      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
   1618 	If `a' isn't the LAST_NODE and `a' can't epsilon transit to
   1619 	the LAST_NODE, we throw away the node `a'.
   1620      2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts
   1621 	string `s' and transit to `b':
   1622 	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
   1623 	   away the node `a'.
   1624 	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
   1625 	    thrown away, we throw away the node `a'.
   1626      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
   1627 	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
   1628 	   node `a'.
   1629 	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
   1630 	    we throw away the node `a'.  */
   1631 
   1632 #define STATE_NODE_CONTAINS(state,node) \
   1633   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
   1634 
   1635 static reg_errcode_t
   1636 internal_function
   1637 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
   1638 {
   1639   reg_errcode_t err;
   1640   int null_cnt = 0;
   1641   Idx str_idx = sctx->last_str_idx;
   1642   re_node_set cur_dest;
   1643 
   1644 #ifdef DEBUG
   1645   assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
   1646 #endif
   1647 
   1648   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
   1649      transit to the last_node and the last_node itself.  */
   1650   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
   1651   if (BE (err != REG_NOERROR, 0))
   1652     return err;
   1653   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
   1654   if (BE (err != REG_NOERROR, 0))
   1655     goto free_return;
   1656 
   1657   /* Then check each states in the state_log.  */
   1658   while (str_idx > 0)
   1659     {
   1660       /* Update counters.  */
   1661       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
   1662       if (null_cnt > mctx->max_mb_elem_len)
   1663 	{
   1664 	  memset (sctx->sifted_states, '\0',
   1665 		  sizeof (re_dfastate_t *) * str_idx);
   1666 	  re_node_set_free (&cur_dest);
   1667 	  return REG_NOERROR;
   1668 	}
   1669       re_node_set_empty (&cur_dest);
   1670       --str_idx;
   1671 
   1672       if (mctx->state_log[str_idx])
   1673 	{
   1674 	  err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
   1675           if (BE (err != REG_NOERROR, 0))
   1676 	    goto free_return;
   1677 	}
   1678 
   1679       /* Add all the nodes which satisfy the following conditions:
   1680 	 - It can epsilon transit to a node in CUR_DEST.
   1681 	 - It is in CUR_SRC.
   1682 	 And update state_log.  */
   1683       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
   1684       if (BE (err != REG_NOERROR, 0))
   1685 	goto free_return;
   1686     }
   1687   err = REG_NOERROR;
   1688  free_return:
   1689   re_node_set_free (&cur_dest);
   1690   return err;
   1691 }
   1692 
   1693 static reg_errcode_t
   1694 internal_function
   1695 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
   1696 		     Idx str_idx, re_node_set *cur_dest)
   1697 {
   1698   const re_dfa_t *const dfa = mctx->dfa;
   1699   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
   1700   Idx i;
   1701 
   1702   /* Then build the next sifted state.
   1703      We build the next sifted state on `cur_dest', and update
   1704      `sifted_states[str_idx]' with `cur_dest'.
   1705      Note:
   1706      `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
   1707      `cur_src' points the node_set of the old `state_log[str_idx]'
   1708      (with the epsilon nodes pre-filtered out).  */
   1709   for (i = 0; i < cur_src->nelem; i++)
   1710     {
   1711       Idx prev_node = cur_src->elems[i];
   1712       int naccepted = 0;
   1713       bool ok;
   1714 
   1715 #ifdef DEBUG
   1716       re_token_type_t type = dfa->nodes[prev_node].type;
   1717       assert (!IS_EPSILON_NODE (type));
   1718 #endif
   1719 #ifdef RE_ENABLE_I18N
   1720       /* If the node may accept `multi byte'.  */
   1721       if (dfa->nodes[prev_node].accept_mb)
   1722 	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
   1723 					 str_idx, sctx->last_str_idx);
   1724 #endif /* RE_ENABLE_I18N */
   1725 
   1726       /* We don't check backreferences here.
   1727 	 See update_cur_sifted_state().  */
   1728       if (!naccepted
   1729 	  && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
   1730 	  && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
   1731 				  dfa->nexts[prev_node]))
   1732 	naccepted = 1;
   1733 
   1734       if (naccepted == 0)
   1735 	continue;
   1736 
   1737       if (sctx->limits.nelem)
   1738 	{
   1739 	  Idx to_idx = str_idx + naccepted;
   1740 	  if (check_dst_limits (mctx, &sctx->limits,
   1741 				dfa->nexts[prev_node], to_idx,
   1742 				prev_node, str_idx))
   1743 	    continue;
   1744 	}
   1745       ok = re_node_set_insert (cur_dest, prev_node);
   1746       if (BE (! ok, 0))
   1747 	return REG_ESPACE;
   1748     }
   1749 
   1750   return REG_NOERROR;
   1751 }
   1752 
   1753 /* Helper functions.  */
   1754 
   1755 static reg_errcode_t
   1756 internal_function
   1757 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
   1758 {
   1759   Idx top = mctx->state_log_top;
   1760 
   1761   if (next_state_log_idx >= mctx->input.bufs_len
   1762       || (next_state_log_idx >= mctx->input.valid_len
   1763 	  && mctx->input.valid_len < mctx->input.len))
   1764     {
   1765       reg_errcode_t err;
   1766       err = extend_buffers (mctx);
   1767       if (BE (err != REG_NOERROR, 0))
   1768 	return err;
   1769     }
   1770 
   1771   if (top < next_state_log_idx)
   1772     {
   1773       memset (mctx->state_log + top + 1, '\0',
   1774 	      sizeof (re_dfastate_t *) * (next_state_log_idx - top));
   1775       mctx->state_log_top = next_state_log_idx;
   1776     }
   1777   return REG_NOERROR;
   1778 }
   1779 
   1780 static reg_errcode_t
   1781 internal_function
   1782 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
   1783 		   re_dfastate_t **src, Idx num)
   1784 {
   1785   Idx st_idx;
   1786   reg_errcode_t err;
   1787   for (st_idx = 0; st_idx < num; ++st_idx)
   1788     {
   1789       if (dst[st_idx] == NULL)
   1790 	dst[st_idx] = src[st_idx];
   1791       else if (src[st_idx] != NULL)
   1792 	{
   1793 	  re_node_set merged_set;
   1794 	  err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
   1795 					&src[st_idx]->nodes);
   1796 	  if (BE (err != REG_NOERROR, 0))
   1797 	    return err;
   1798 	  dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
   1799 	  re_node_set_free (&merged_set);
   1800 	  if (BE (err != REG_NOERROR, 0))
   1801 	    return err;
   1802 	}
   1803     }
   1804   return REG_NOERROR;
   1805 }
   1806 
   1807 static reg_errcode_t
   1808 internal_function
   1809 update_cur_sifted_state (const re_match_context_t *mctx,
   1810 			 re_sift_context_t *sctx, Idx str_idx,
   1811 			 re_node_set *dest_nodes)
   1812 {
   1813   const re_dfa_t *const dfa = mctx->dfa;
   1814   reg_errcode_t err = REG_NOERROR;
   1815   const re_node_set *candidates;
   1816   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
   1817 		: &mctx->state_log[str_idx]->nodes);
   1818 
   1819   if (dest_nodes->nelem == 0)
   1820     sctx->sifted_states[str_idx] = NULL;
   1821   else
   1822     {
   1823       if (candidates)
   1824 	{
   1825 	  /* At first, add the nodes which can epsilon transit to a node in
   1826 	     DEST_NODE.  */
   1827 	  err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
   1828 	  if (BE (err != REG_NOERROR, 0))
   1829 	    return err;
   1830 
   1831 	  /* Then, check the limitations in the current sift_context.  */
   1832 	  if (sctx->limits.nelem)
   1833 	    {
   1834 	      err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
   1835 					 mctx->bkref_ents, str_idx);
   1836 	      if (BE (err != REG_NOERROR, 0))
   1837 		return err;
   1838 	    }
   1839 	}
   1840 
   1841       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
   1842       if (BE (err != REG_NOERROR, 0))
   1843 	return err;
   1844     }
   1845 
   1846   if (candidates && mctx->state_log[str_idx]->has_backref)
   1847     {
   1848       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
   1849       if (BE (err != REG_NOERROR, 0))
   1850 	return err;
   1851     }
   1852   return REG_NOERROR;
   1853 }
   1854 
   1855 static reg_errcode_t
   1856 internal_function
   1857 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
   1858 		       const re_node_set *candidates)
   1859 {
   1860   reg_errcode_t err = REG_NOERROR;
   1861   Idx i;
   1862 
   1863   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
   1864   if (BE (err != REG_NOERROR, 0))
   1865     return err;
   1866 
   1867   if (!state->inveclosure.alloc)
   1868     {
   1869       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
   1870       if (BE (err != REG_NOERROR, 0))
   1871         return REG_ESPACE;
   1872       for (i = 0; i < dest_nodes->nelem; i++)
   1873         re_node_set_merge (&state->inveclosure,
   1874 			   dfa->inveclosures + dest_nodes->elems[i]);
   1875     }
   1876   return re_node_set_add_intersect (dest_nodes, candidates,
   1877 				    &state->inveclosure);
   1878 }
   1879 
   1880 static reg_errcode_t
   1881 internal_function
   1882 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
   1883 		       const re_node_set *candidates)
   1884 {
   1885     Idx ecl_idx;
   1886     reg_errcode_t err;
   1887     re_node_set *inv_eclosure = dfa->inveclosures + node;
   1888     re_node_set except_nodes;
   1889     re_node_set_init_empty (&except_nodes);
   1890     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
   1891       {
   1892 	Idx cur_node = inv_eclosure->elems[ecl_idx];
   1893 	if (cur_node == node)
   1894 	  continue;
   1895 	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
   1896 	  {
   1897 	    Idx edst1 = dfa->edests[cur_node].elems[0];
   1898 	    Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
   1899 			 ? dfa->edests[cur_node].elems[1] : REG_MISSING);
   1900 	    if ((!re_node_set_contains (inv_eclosure, edst1)
   1901 		 && re_node_set_contains (dest_nodes, edst1))
   1902 		|| (REG_VALID_NONZERO_INDEX (edst2)
   1903 		    && !re_node_set_contains (inv_eclosure, edst2)
   1904 		    && re_node_set_contains (dest_nodes, edst2)))
   1905 	      {
   1906 		err = re_node_set_add_intersect (&except_nodes, candidates,
   1907 						 dfa->inveclosures + cur_node);
   1908 		if (BE (err != REG_NOERROR, 0))
   1909 		  {
   1910 		    re_node_set_free (&except_nodes);
   1911 		    return err;
   1912 		  }
   1913 	      }
   1914 	  }
   1915       }
   1916     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
   1917       {
   1918 	Idx cur_node = inv_eclosure->elems[ecl_idx];
   1919 	if (!re_node_set_contains (&except_nodes, cur_node))
   1920 	  {
   1921 	    Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
   1922 	    re_node_set_remove_at (dest_nodes, idx);
   1923 	  }
   1924       }
   1925     re_node_set_free (&except_nodes);
   1926     return REG_NOERROR;
   1927 }
   1928 
   1929 static bool
   1930 internal_function
   1931 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
   1932 		  Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
   1933 {
   1934   const re_dfa_t *const dfa = mctx->dfa;
   1935   Idx lim_idx, src_pos, dst_pos;
   1936 
   1937   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
   1938   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
   1939   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
   1940     {
   1941       Idx subexp_idx;
   1942       struct re_backref_cache_entry *ent;
   1943       ent = mctx->bkref_ents + limits->elems[lim_idx];
   1944       subexp_idx = dfa->nodes[ent->node].opr.idx;
   1945 
   1946       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
   1947 					   subexp_idx, dst_node, dst_idx,
   1948 					   dst_bkref_idx);
   1949       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
   1950 					   subexp_idx, src_node, src_idx,
   1951 					   src_bkref_idx);
   1952 
   1953       /* In case of:
   1954 	 <src> <dst> ( <subexp> )
   1955 	 ( <subexp> ) <src> <dst>
   1956 	 ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
   1957       if (src_pos == dst_pos)
   1958 	continue; /* This is unrelated limitation.  */
   1959       else
   1960 	return true;
   1961     }
   1962   return false;
   1963 }
   1964 
   1965 static int
   1966 internal_function
   1967 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
   1968 			     Idx subexp_idx, Idx from_node, Idx bkref_idx)
   1969 {
   1970   const re_dfa_t *const dfa = mctx->dfa;
   1971   const re_node_set *eclosures = dfa->eclosures + from_node;
   1972   Idx node_idx;
   1973 
   1974   /* Else, we are on the boundary: examine the nodes on the epsilon
   1975      closure.  */
   1976   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
   1977     {
   1978       Idx node = eclosures->elems[node_idx];
   1979       switch (dfa->nodes[node].type)
   1980 	{
   1981 	case OP_BACK_REF:
   1982 	  if (bkref_idx != REG_MISSING)
   1983 	    {
   1984 	      struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
   1985 	      do
   1986 	        {
   1987 		  Idx dst;
   1988 		  int cpos;
   1989 
   1990 		  if (ent->node != node)
   1991 		    continue;
   1992 
   1993 		  if (subexp_idx < BITSET_WORD_BITS
   1994 		      && !(ent->eps_reachable_subexps_map
   1995 			   & ((bitset_word_t) 1 << subexp_idx)))
   1996 		    continue;
   1997 
   1998 		  /* Recurse trying to reach the OP_OPEN_SUBEXP and
   1999 		     OP_CLOSE_SUBEXP cases below.  But, if the
   2000 		     destination node is the same node as the source
   2001 		     node, don't recurse because it would cause an
   2002 		     infinite loop: a regex that exhibits this behavior
   2003 		     is ()\1*\1*  */
   2004 		  dst = dfa->edests[node].elems[0];
   2005 		  if (dst == from_node)
   2006 		    {
   2007 		      if (boundaries & 1)
   2008 		        return -1;
   2009 		      else /* if (boundaries & 2) */
   2010 		        return 0;
   2011 		    }
   2012 
   2013 		  cpos =
   2014 		    check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
   2015 						 dst, bkref_idx);
   2016 		  if (cpos == -1 /* && (boundaries & 1) */)
   2017 		    return -1;
   2018 		  if (cpos == 0 && (boundaries & 2))
   2019 		    return 0;
   2020 
   2021 		  if (subexp_idx < BITSET_WORD_BITS)
   2022 		    ent->eps_reachable_subexps_map
   2023 		      &= ~((bitset_word_t) 1 << subexp_idx);
   2024 	        }
   2025 	      while (ent++->more);
   2026 	    }
   2027 	  break;
   2028 
   2029 	case OP_OPEN_SUBEXP:
   2030 	  if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
   2031 	    return -1;
   2032 	  break;
   2033 
   2034 	case OP_CLOSE_SUBEXP:
   2035 	  if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
   2036 	    return 0;
   2037 	  break;
   2038 
   2039 	default:
   2040 	    break;
   2041 	}
   2042     }
   2043 
   2044   return (boundaries & 2) ? 1 : 0;
   2045 }
   2046 
   2047 static int
   2048 internal_function
   2049 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
   2050 			   Idx subexp_idx, Idx from_node, Idx str_idx,
   2051 			   Idx bkref_idx)
   2052 {
   2053   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
   2054   int boundaries;
   2055 
   2056   /* If we are outside the range of the subexpression, return -1 or 1.  */
   2057   if (str_idx < lim->subexp_from)
   2058     return -1;
   2059 
   2060   if (lim->subexp_to < str_idx)
   2061     return 1;
   2062 
   2063   /* If we are within the subexpression, return 0.  */
   2064   boundaries = (str_idx == lim->subexp_from);
   2065   boundaries |= (str_idx == lim->subexp_to) << 1;
   2066   if (boundaries == 0)
   2067     return 0;
   2068 
   2069   /* Else, examine epsilon closure.  */
   2070   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
   2071 				      from_node, bkref_idx);
   2072 }
   2073 
   2074 /* Check the limitations of sub expressions LIMITS, and remove the nodes
   2075    which are against limitations from DEST_NODES. */
   2076 
   2077 static reg_errcode_t
   2078 internal_function
   2079 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
   2080 		     const re_node_set *candidates, re_node_set *limits,
   2081 		     struct re_backref_cache_entry *bkref_ents, Idx str_idx)
   2082 {
   2083   reg_errcode_t err;
   2084   Idx node_idx, lim_idx;
   2085 
   2086   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
   2087     {
   2088       Idx subexp_idx;
   2089       struct re_backref_cache_entry *ent;
   2090       ent = bkref_ents + limits->elems[lim_idx];
   2091 
   2092       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
   2093 	continue; /* This is unrelated limitation.  */
   2094 
   2095       subexp_idx = dfa->nodes[ent->node].opr.idx;
   2096       if (ent->subexp_to == str_idx)
   2097 	{
   2098 	  Idx ops_node = REG_MISSING;
   2099 	  Idx cls_node = REG_MISSING;
   2100 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
   2101 	    {
   2102 	      Idx node = dest_nodes->elems[node_idx];
   2103 	      re_token_type_t type = dfa->nodes[node].type;
   2104 	      if (type == OP_OPEN_SUBEXP
   2105 		  && subexp_idx == dfa->nodes[node].opr.idx)
   2106 		ops_node = node;
   2107 	      else if (type == OP_CLOSE_SUBEXP
   2108 		       && subexp_idx == dfa->nodes[node].opr.idx)
   2109 		cls_node = node;
   2110 	    }
   2111 
   2112 	  /* Check the limitation of the open subexpression.  */
   2113 	  /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
   2114 	  if (REG_VALID_INDEX (ops_node))
   2115 	    {
   2116 	      err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
   2117 					   candidates);
   2118 	      if (BE (err != REG_NOERROR, 0))
   2119 		return err;
   2120 	    }
   2121 
   2122 	  /* Check the limitation of the close subexpression.  */
   2123 	  if (REG_VALID_INDEX (cls_node))
   2124 	    for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
   2125 	      {
   2126 		Idx node = dest_nodes->elems[node_idx];
   2127 		if (!re_node_set_contains (dfa->inveclosures + node,
   2128 					   cls_node)
   2129 		    && !re_node_set_contains (dfa->eclosures + node,
   2130 					      cls_node))
   2131 		  {
   2132 		    /* It is against this limitation.
   2133 		       Remove it form the current sifted state.  */
   2134 		    err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
   2135 						 candidates);
   2136 		    if (BE (err != REG_NOERROR, 0))
   2137 		      return err;
   2138 		    --node_idx;
   2139 		  }
   2140 	      }
   2141 	}
   2142       else /* (ent->subexp_to != str_idx)  */
   2143 	{
   2144 	  for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
   2145 	    {
   2146 	      Idx node = dest_nodes->elems[node_idx];
   2147 	      re_token_type_t type = dfa->nodes[node].type;
   2148 	      if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
   2149 		{
   2150 		  if (subexp_idx != dfa->nodes[node].opr.idx)
   2151 		    continue;
   2152 		  /* It is against this limitation.
   2153 		     Remove it form the current sifted state.  */
   2154 		  err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
   2155 					       candidates);
   2156 		  if (BE (err != REG_NOERROR, 0))
   2157 		    return err;
   2158 		}
   2159 	    }
   2160 	}
   2161     }
   2162   return REG_NOERROR;
   2163 }
   2164 
   2165 static reg_errcode_t
   2166 internal_function
   2167 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
   2168 		   Idx str_idx, const re_node_set *candidates)
   2169 {
   2170   const re_dfa_t *const dfa = mctx->dfa;
   2171   reg_errcode_t err;
   2172   Idx node_idx, node;
   2173   re_sift_context_t local_sctx;
   2174   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
   2175 
   2176   if (first_idx == REG_MISSING)
   2177     return REG_NOERROR;
   2178 
   2179   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
   2180 
   2181   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
   2182     {
   2183       Idx enabled_idx;
   2184       re_token_type_t type;
   2185       struct re_backref_cache_entry *entry;
   2186       node = candidates->elems[node_idx];
   2187       type = dfa->nodes[node].type;
   2188       /* Avoid infinite loop for the REs like "()\1+".  */
   2189       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
   2190 	continue;
   2191       if (type != OP_BACK_REF)
   2192 	continue;
   2193 
   2194       entry = mctx->bkref_ents + first_idx;
   2195       enabled_idx = first_idx;
   2196       do
   2197 	{
   2198 	  Idx subexp_len;
   2199 	  Idx to_idx;
   2200 	  Idx dst_node;
   2201 	  bool ok;
   2202 	  re_dfastate_t *cur_state;
   2203 
   2204 	  if (entry->node != node)
   2205 	    continue;
   2206 	  subexp_len = entry->subexp_to - entry->subexp_from;
   2207 	  to_idx = str_idx + subexp_len;
   2208 	  dst_node = (subexp_len ? dfa->nexts[node]
   2209 		      : dfa->edests[node].elems[0]);
   2210 
   2211 	  if (to_idx > sctx->last_str_idx
   2212 	      || sctx->sifted_states[to_idx] == NULL
   2213 	      || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
   2214 	      || check_dst_limits (mctx, &sctx->limits, node,
   2215 				   str_idx, dst_node, to_idx))
   2216 	    continue;
   2217 
   2218 	  if (local_sctx.sifted_states == NULL)
   2219 	    {
   2220 	      local_sctx = *sctx;
   2221 	      err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
   2222 	      if (BE (err != REG_NOERROR, 0))
   2223 		goto free_return;
   2224 	    }
   2225 	  local_sctx.last_node = node;
   2226 	  local_sctx.last_str_idx = str_idx;
   2227 	  ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
   2228 	  if (BE (! ok, 0))
   2229 	    {
   2230 	      err = REG_ESPACE;
   2231 	      goto free_return;
   2232 	    }
   2233 	  cur_state = local_sctx.sifted_states[str_idx];
   2234 	  err = sift_states_backward (mctx, &local_sctx);
   2235 	  if (BE (err != REG_NOERROR, 0))
   2236 	    goto free_return;
   2237 	  if (sctx->limited_states != NULL)
   2238 	    {
   2239 	      err = merge_state_array (dfa, sctx->limited_states,
   2240 				       local_sctx.sifted_states,
   2241 				       str_idx + 1);
   2242 	      if (BE (err != REG_NOERROR, 0))
   2243 		goto free_return;
   2244 	    }
   2245 	  local_sctx.sifted_states[str_idx] = cur_state;
   2246 	  re_node_set_remove (&local_sctx.limits, enabled_idx);
   2247 
   2248 	  /* mctx->bkref_ents may have changed, reload the pointer.  */
   2249           entry = mctx->bkref_ents + enabled_idx;
   2250 	}
   2251       while (enabled_idx++, entry++->more);
   2252     }
   2253   err = REG_NOERROR;
   2254  free_return:
   2255   if (local_sctx.sifted_states != NULL)
   2256     {
   2257       re_node_set_free (&local_sctx.limits);
   2258     }
   2259 
   2260   return err;
   2261 }
   2262 
   2263 
   2264 #ifdef RE_ENABLE_I18N
   2265 static int
   2266 internal_function
   2267 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
   2268 		     Idx node_idx, Idx str_idx, Idx max_str_idx)
   2269 {
   2270   const re_dfa_t *const dfa = mctx->dfa;
   2271   int naccepted;
   2272   /* Check the node can accept `multi byte'.  */
   2273   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
   2274   if (naccepted > 0 && str_idx + naccepted <= max_str_idx &&
   2275       !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
   2276 			    dfa->nexts[node_idx]))
   2277     /* The node can't accept the `multi byte', or the
   2278        destination was already thrown away, then the node
   2279        could't accept the current input `multi byte'.   */
   2280     naccepted = 0;
   2281   /* Otherwise, it is sure that the node could accept
   2282      `naccepted' bytes input.  */
   2283   return naccepted;
   2284 }
   2285 #endif /* RE_ENABLE_I18N */
   2286 
   2287 
   2288 /* Functions for state transition.  */
   2290 
   2291 /* Return the next state to which the current state STATE will transit by
   2292    accepting the current input byte, and update STATE_LOG if necessary.
   2293    If STATE can accept a multibyte char/collating element/back reference
   2294    update the destination of STATE_LOG.  */
   2295 
   2296 static re_dfastate_t *
   2297 internal_function
   2298 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
   2299 	       re_dfastate_t *state)
   2300 {
   2301   re_dfastate_t **trtable;
   2302   unsigned char ch;
   2303 
   2304 #ifdef RE_ENABLE_I18N
   2305   /* If the current state can accept multibyte.  */
   2306   if (BE (state->accept_mb, 0))
   2307     {
   2308       *err = transit_state_mb (mctx, state);
   2309       if (BE (*err != REG_NOERROR, 0))
   2310 	return NULL;
   2311     }
   2312 #endif /* RE_ENABLE_I18N */
   2313 
   2314   /* Then decide the next state with the single byte.  */
   2315 #if 0
   2316   if (0)
   2317     /* don't use transition table  */
   2318     return transit_state_sb (err, mctx, state);
   2319 #endif
   2320 
   2321   /* Use transition table  */
   2322   ch = re_string_fetch_byte (&mctx->input);
   2323   for (;;)
   2324     {
   2325       trtable = state->trtable;
   2326       if (BE (trtable != NULL, 1))
   2327 	return trtable[ch];
   2328 
   2329       trtable = state->word_trtable;
   2330       if (BE (trtable != NULL, 1))
   2331         {
   2332 	  unsigned int context;
   2333 	  context
   2334 	    = re_string_context_at (&mctx->input,
   2335 				    re_string_cur_idx (&mctx->input) - 1,
   2336 				    mctx->eflags);
   2337 	  if (IS_WORD_CONTEXT (context))
   2338 	    return trtable[ch + SBC_MAX];
   2339 	  else
   2340 	    return trtable[ch];
   2341 	}
   2342 
   2343       if (!build_trtable (mctx->dfa, state))
   2344 	{
   2345 	  *err = REG_ESPACE;
   2346 	  return NULL;
   2347 	}
   2348 
   2349       /* Retry, we now have a transition table.  */
   2350     }
   2351 }
   2352 
   2353 /* Update the state_log if we need */
   2354 static re_dfastate_t *
   2355 internal_function
   2356 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
   2357 		      re_dfastate_t *next_state)
   2358 {
   2359   const re_dfa_t *const dfa = mctx->dfa;
   2360   Idx cur_idx = re_string_cur_idx (&mctx->input);
   2361 
   2362   if (cur_idx > mctx->state_log_top)
   2363     {
   2364       mctx->state_log[cur_idx] = next_state;
   2365       mctx->state_log_top = cur_idx;
   2366     }
   2367   else if (mctx->state_log[cur_idx] == 0)
   2368     {
   2369       mctx->state_log[cur_idx] = next_state;
   2370     }
   2371   else
   2372     {
   2373       re_dfastate_t *pstate;
   2374       unsigned int context;
   2375       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
   2376       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
   2377          the destination of a multibyte char/collating element/
   2378          back reference.  Then the next state is the union set of
   2379          these destinations and the results of the transition table.  */
   2380       pstate = mctx->state_log[cur_idx];
   2381       log_nodes = pstate->entrance_nodes;
   2382       if (next_state != NULL)
   2383         {
   2384           table_nodes = next_state->entrance_nodes;
   2385           *err = re_node_set_init_union (&next_nodes, table_nodes,
   2386 					     log_nodes);
   2387           if (BE (*err != REG_NOERROR, 0))
   2388 	    return NULL;
   2389         }
   2390       else
   2391         next_nodes = *log_nodes;
   2392       /* Note: We already add the nodes of the initial state,
   2393 	 then we don't need to add them here.  */
   2394 
   2395       context = re_string_context_at (&mctx->input,
   2396 				      re_string_cur_idx (&mctx->input) - 1,
   2397 				      mctx->eflags);
   2398       next_state = mctx->state_log[cur_idx]
   2399         = re_acquire_state_context (err, dfa, &next_nodes, context);
   2400       /* We don't need to check errors here, since the return value of
   2401          this function is next_state and ERR is already set.  */
   2402 
   2403       if (table_nodes != NULL)
   2404         re_node_set_free (&next_nodes);
   2405     }
   2406 
   2407   if (BE (dfa->nbackref, 0) && next_state != NULL)
   2408     {
   2409       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
   2410 	 later.  We must check them here, since the back references in the
   2411 	 next state might use them.  */
   2412       *err = check_subexp_matching_top (mctx, &next_state->nodes,
   2413 					cur_idx);
   2414       if (BE (*err != REG_NOERROR, 0))
   2415 	return NULL;
   2416 
   2417       /* If the next state has back references.  */
   2418       if (next_state->has_backref)
   2419 	{
   2420 	  *err = transit_state_bkref (mctx, &next_state->nodes);
   2421 	  if (BE (*err != REG_NOERROR, 0))
   2422 	    return NULL;
   2423 	  next_state = mctx->state_log[cur_idx];
   2424 	}
   2425     }
   2426 
   2427   return next_state;
   2428 }
   2429 
   2430 /* Skip bytes in the input that correspond to part of a
   2431    multi-byte match, then look in the log for a state
   2432    from which to restart matching.  */
   2433 static re_dfastate_t *
   2434 internal_function
   2435 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
   2436 {
   2437   re_dfastate_t *cur_state;
   2438   do
   2439     {
   2440       Idx max = mctx->state_log_top;
   2441       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
   2442 
   2443       do
   2444 	{
   2445           if (++cur_str_idx > max)
   2446             return NULL;
   2447           re_string_skip_bytes (&mctx->input, 1);
   2448 	}
   2449       while (mctx->state_log[cur_str_idx] == NULL);
   2450 
   2451       cur_state = merge_state_with_log (err, mctx, NULL);
   2452     }
   2453   while (*err == REG_NOERROR && cur_state == NULL);
   2454   return cur_state;
   2455 }
   2456 
   2457 /* Helper functions for transit_state.  */
   2458 
   2459 /* From the node set CUR_NODES, pick up the nodes whose types are
   2460    OP_OPEN_SUBEXP and which have corresponding back references in the regular
   2461    expression. And register them to use them later for evaluating the
   2462    correspoding back references.  */
   2463 
   2464 static reg_errcode_t
   2465 internal_function
   2466 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
   2467 			   Idx str_idx)
   2468 {
   2469   const re_dfa_t *const dfa = mctx->dfa;
   2470   Idx node_idx;
   2471   reg_errcode_t err;
   2472 
   2473   /* TODO: This isn't efficient.
   2474 	   Because there might be more than one nodes whose types are
   2475 	   OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
   2476 	   nodes.
   2477 	   E.g. RE: (a){2}  */
   2478   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
   2479     {
   2480       Idx node = cur_nodes->elems[node_idx];
   2481       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
   2482 	  && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
   2483 	  && (dfa->used_bkref_map
   2484 	      & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
   2485 	{
   2486 	  err = match_ctx_add_subtop (mctx, node, str_idx);
   2487 	  if (BE (err != REG_NOERROR, 0))
   2488 	    return err;
   2489 	}
   2490     }
   2491   return REG_NOERROR;
   2492 }
   2493 
   2494 #if 0
   2495 /* Return the next state to which the current state STATE will transit by
   2496    accepting the current input byte.  */
   2497 
   2498 static re_dfastate_t *
   2499 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
   2500 		  re_dfastate_t *state)
   2501 {
   2502   const re_dfa_t *const dfa = mctx->dfa;
   2503   re_node_set next_nodes;
   2504   re_dfastate_t *next_state;
   2505   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
   2506   unsigned int context;
   2507 
   2508   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
   2509   if (BE (*err != REG_NOERROR, 0))
   2510     return NULL;
   2511   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
   2512     {
   2513       Idx cur_node = state->nodes.elems[node_cnt];
   2514       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
   2515 	{
   2516 	  *err = re_node_set_merge (&next_nodes,
   2517 				    dfa->eclosures + dfa->nexts[cur_node]);
   2518 	  if (BE (*err != REG_NOERROR, 0))
   2519 	    {
   2520 	      re_node_set_free (&next_nodes);
   2521 	      return NULL;
   2522 	    }
   2523 	}
   2524     }
   2525   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
   2526   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
   2527   /* We don't need to check errors here, since the return value of
   2528      this function is next_state and ERR is already set.  */
   2529 
   2530   re_node_set_free (&next_nodes);
   2531   re_string_skip_bytes (&mctx->input, 1);
   2532   return next_state;
   2533 }
   2534 #endif
   2535 
   2536 #ifdef RE_ENABLE_I18N
   2537 static reg_errcode_t
   2538 internal_function
   2539 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
   2540 {
   2541   const re_dfa_t *const dfa = mctx->dfa;
   2542   reg_errcode_t err;
   2543   Idx i;
   2544 
   2545   for (i = 0; i < pstate->nodes.nelem; ++i)
   2546     {
   2547       re_node_set dest_nodes, *new_nodes;
   2548       Idx cur_node_idx = pstate->nodes.elems[i];
   2549       int naccepted;
   2550       Idx dest_idx;
   2551       unsigned int context;
   2552       re_dfastate_t *dest_state;
   2553 
   2554       if (!dfa->nodes[cur_node_idx].accept_mb)
   2555         continue;
   2556 
   2557       if (dfa->nodes[cur_node_idx].constraint)
   2558 	{
   2559 	  context = re_string_context_at (&mctx->input,
   2560 					  re_string_cur_idx (&mctx->input),
   2561 					  mctx->eflags);
   2562 	  if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
   2563 					   context))
   2564 	    continue;
   2565 	}
   2566 
   2567       /* How many bytes the node can accept?  */
   2568       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
   2569 					   re_string_cur_idx (&mctx->input));
   2570       if (naccepted == 0)
   2571 	continue;
   2572 
   2573       /* The node can accepts `naccepted' bytes.  */
   2574       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
   2575       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
   2576 			       : mctx->max_mb_elem_len);
   2577       err = clean_state_log_if_needed (mctx, dest_idx);
   2578       if (BE (err != REG_NOERROR, 0))
   2579 	return err;
   2580 #ifdef DEBUG
   2581       assert (dfa->nexts[cur_node_idx] != REG_MISSING);
   2582 #endif
   2583       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
   2584 
   2585       dest_state = mctx->state_log[dest_idx];
   2586       if (dest_state == NULL)
   2587 	dest_nodes = *new_nodes;
   2588       else
   2589 	{
   2590 	  err = re_node_set_init_union (&dest_nodes,
   2591 					dest_state->entrance_nodes, new_nodes);
   2592 	  if (BE (err != REG_NOERROR, 0))
   2593 	    return err;
   2594 	}
   2595       context = re_string_context_at (&mctx->input, dest_idx - 1,
   2596 				      mctx->eflags);
   2597       mctx->state_log[dest_idx]
   2598 	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
   2599       if (dest_state != NULL)
   2600 	re_node_set_free (&dest_nodes);
   2601       if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, 0))
   2602 	return err;
   2603     }
   2604   return REG_NOERROR;
   2605 }
   2606 #endif /* RE_ENABLE_I18N */
   2607 
   2608 static reg_errcode_t
   2609 internal_function
   2610 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
   2611 {
   2612   const re_dfa_t *const dfa = mctx->dfa;
   2613   reg_errcode_t err;
   2614   Idx i;
   2615   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
   2616 
   2617   for (i = 0; i < nodes->nelem; ++i)
   2618     {
   2619       Idx dest_str_idx, prev_nelem, bkc_idx;
   2620       Idx node_idx = nodes->elems[i];
   2621       unsigned int context;
   2622       const re_token_t *node = dfa->nodes + node_idx;
   2623       re_node_set *new_dest_nodes;
   2624 
   2625       /* Check whether `node' is a backreference or not.  */
   2626       if (node->type != OP_BACK_REF)
   2627 	continue;
   2628 
   2629       if (node->constraint)
   2630 	{
   2631 	  context = re_string_context_at (&mctx->input, cur_str_idx,
   2632 					  mctx->eflags);
   2633 	  if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
   2634 	    continue;
   2635 	}
   2636 
   2637       /* `node' is a backreference.
   2638 	 Check the substring which the substring matched.  */
   2639       bkc_idx = mctx->nbkref_ents;
   2640       err = get_subexp (mctx, node_idx, cur_str_idx);
   2641       if (BE (err != REG_NOERROR, 0))
   2642 	goto free_return;
   2643 
   2644       /* And add the epsilon closures (which is `new_dest_nodes') of
   2645 	 the backreference to appropriate state_log.  */
   2646 #ifdef DEBUG
   2647       assert (dfa->nexts[node_idx] != REG_MISSING);
   2648 #endif
   2649       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
   2650 	{
   2651 	  Idx subexp_len;
   2652 	  re_dfastate_t *dest_state;
   2653 	  struct re_backref_cache_entry *bkref_ent;
   2654 	  bkref_ent = mctx->bkref_ents + bkc_idx;
   2655 	  if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
   2656 	    continue;
   2657 	  subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
   2658 	  new_dest_nodes = (subexp_len == 0
   2659 			    ? dfa->eclosures + dfa->edests[node_idx].elems[0]
   2660 			    : dfa->eclosures + dfa->nexts[node_idx]);
   2661 	  dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
   2662 			  - bkref_ent->subexp_from);
   2663 	  context = re_string_context_at (&mctx->input, dest_str_idx - 1,
   2664 					  mctx->eflags);
   2665 	  dest_state = mctx->state_log[dest_str_idx];
   2666 	  prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
   2667 			: mctx->state_log[cur_str_idx]->nodes.nelem);
   2668 	  /* Add `new_dest_node' to state_log.  */
   2669 	  if (dest_state == NULL)
   2670 	    {
   2671 	      mctx->state_log[dest_str_idx]
   2672 		= re_acquire_state_context (&err, dfa, new_dest_nodes,
   2673 					    context);
   2674 	      if (BE (mctx->state_log[dest_str_idx] == NULL
   2675 		      && err != REG_NOERROR, 0))
   2676 		goto free_return;
   2677 	    }
   2678 	  else
   2679 	    {
   2680 	      re_node_set dest_nodes;
   2681 	      err = re_node_set_init_union (&dest_nodes,
   2682 					    dest_state->entrance_nodes,
   2683 					    new_dest_nodes);
   2684 	      if (BE (err != REG_NOERROR, 0))
   2685 		{
   2686 		  re_node_set_free (&dest_nodes);
   2687 		  goto free_return;
   2688 		}
   2689 	      mctx->state_log[dest_str_idx]
   2690 		= re_acquire_state_context (&err, dfa, &dest_nodes, context);
   2691 	      re_node_set_free (&dest_nodes);
   2692 	      if (BE (mctx->state_log[dest_str_idx] == NULL
   2693 		      && err != REG_NOERROR, 0))
   2694 		goto free_return;
   2695 	    }
   2696 	  /* We need to check recursively if the backreference can epsilon
   2697 	     transit.  */
   2698 	  if (subexp_len == 0
   2699 	      && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
   2700 	    {
   2701 	      err = check_subexp_matching_top (mctx, new_dest_nodes,
   2702 					       cur_str_idx);
   2703 	      if (BE (err != REG_NOERROR, 0))
   2704 		goto free_return;
   2705 	      err = transit_state_bkref (mctx, new_dest_nodes);
   2706 	      if (BE (err != REG_NOERROR, 0))
   2707 		goto free_return;
   2708 	    }
   2709 	}
   2710     }
   2711   err = REG_NOERROR;
   2712  free_return:
   2713   return err;
   2714 }
   2715 
   2716 /* Enumerate all the candidates which the backreference BKREF_NODE can match
   2717    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
   2718    Note that we might collect inappropriate candidates here.
   2719    However, the cost of checking them strictly here is too high, then we
   2720    delay these checking for prune_impossible_nodes().  */
   2721 
   2722 static reg_errcode_t
   2723 internal_function
   2724 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
   2725 {
   2726   const re_dfa_t *const dfa = mctx->dfa;
   2727   Idx subexp_num, sub_top_idx;
   2728   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
   2729   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
   2730   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
   2731   if (cache_idx != REG_MISSING)
   2732     {
   2733       const struct re_backref_cache_entry *entry
   2734 	= mctx->bkref_ents + cache_idx;
   2735       do
   2736         if (entry->node == bkref_node)
   2737 	  return REG_NOERROR; /* We already checked it.  */
   2738       while (entry++->more);
   2739     }
   2740 
   2741   subexp_num = dfa->nodes[bkref_node].opr.idx;
   2742 
   2743   /* For each sub expression  */
   2744   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
   2745     {
   2746       reg_errcode_t err;
   2747       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
   2748       re_sub_match_last_t *sub_last;
   2749       Idx sub_last_idx, sl_str, bkref_str_off;
   2750 
   2751       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
   2752 	continue; /* It isn't related.  */
   2753 
   2754       sl_str = sub_top->str_idx;
   2755       bkref_str_off = bkref_str_idx;
   2756       /* At first, check the last node of sub expressions we already
   2757 	 evaluated.  */
   2758       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
   2759 	{
   2760 	  regoff_t sl_str_diff;
   2761 	  sub_last = sub_top->lasts[sub_last_idx];
   2762 	  sl_str_diff = sub_last->str_idx - sl_str;
   2763 	  /* The matched string by the sub expression match with the substring
   2764 	     at the back reference?  */
   2765 	  if (sl_str_diff > 0)
   2766 	    {
   2767 	      if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, 0))
   2768 		{
   2769 		  /* Not enough chars for a successful match.  */
   2770 		  if (bkref_str_off + sl_str_diff > mctx->input.len)
   2771 		    break;
   2772 
   2773 		  err = clean_state_log_if_needed (mctx,
   2774 						   bkref_str_off
   2775 						   + sl_str_diff);
   2776 		  if (BE (err != REG_NOERROR, 0))
   2777 		    return err;
   2778 		  buf = (const char *) re_string_get_buffer (&mctx->input);
   2779 		}
   2780 	      if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
   2781 		/* We don't need to search this sub expression any more.  */
   2782 		break;
   2783 	    }
   2784 	  bkref_str_off += sl_str_diff;
   2785 	  sl_str += sl_str_diff;
   2786 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
   2787 				bkref_str_idx);
   2788 
   2789 	  /* Reload buf, since the preceding call might have reallocated
   2790 	     the buffer.  */
   2791 	  buf = (const char *) re_string_get_buffer (&mctx->input);
   2792 
   2793 	  if (err == REG_NOMATCH)
   2794 	    continue;
   2795 	  if (BE (err != REG_NOERROR, 0))
   2796 	    return err;
   2797 	}
   2798 
   2799       if (sub_last_idx < sub_top->nlasts)
   2800 	continue;
   2801       if (sub_last_idx > 0)
   2802 	++sl_str;
   2803       /* Then, search for the other last nodes of the sub expression.  */
   2804       for (; sl_str <= bkref_str_idx; ++sl_str)
   2805 	{
   2806 	  Idx cls_node;
   2807 	  regoff_t sl_str_off;
   2808 	  const re_node_set *nodes;
   2809 	  sl_str_off = sl_str - sub_top->str_idx;
   2810 	  /* The matched string by the sub expression match with the substring
   2811 	     at the back reference?  */
   2812 	  if (sl_str_off > 0)
   2813 	    {
   2814 	      if (BE (bkref_str_off >= mctx->input.valid_len, 0))
   2815 		{
   2816 		  /* If we are at the end of the input, we cannot match.  */
   2817 		  if (bkref_str_off >= mctx->input.len)
   2818 		    break;
   2819 
   2820 		  err = extend_buffers (mctx);
   2821 		  if (BE (err != REG_NOERROR, 0))
   2822 		    return err;
   2823 
   2824 		  buf = (const char *) re_string_get_buffer (&mctx->input);
   2825 		}
   2826 	      if (buf [bkref_str_off++] != buf[sl_str - 1])
   2827 		break; /* We don't need to search this sub expression
   2828 			  any more.  */
   2829 	    }
   2830 	  if (mctx->state_log[sl_str] == NULL)
   2831 	    continue;
   2832 	  /* Does this state have a ')' of the sub expression?  */
   2833 	  nodes = &mctx->state_log[sl_str]->nodes;
   2834 	  cls_node = find_subexp_node (dfa, nodes, subexp_num,
   2835 				       OP_CLOSE_SUBEXP);
   2836 	  if (cls_node == REG_MISSING)
   2837 	    continue; /* No.  */
   2838 	  if (sub_top->path == NULL)
   2839 	    {
   2840 	      sub_top->path = calloc (sizeof (state_array_t),
   2841 				      sl_str - sub_top->str_idx + 1);
   2842 	      if (sub_top->path == NULL)
   2843 		return REG_ESPACE;
   2844 	    }
   2845 	  /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
   2846 	     in the current context?  */
   2847 	  err = check_arrival (mctx, sub_top->path, sub_top->node,
   2848 			       sub_top->str_idx, cls_node, sl_str,
   2849 			       OP_CLOSE_SUBEXP);
   2850 	  if (err == REG_NOMATCH)
   2851 	      continue;
   2852 	  if (BE (err != REG_NOERROR, 0))
   2853 	      return err;
   2854 	  sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
   2855 	  if (BE (sub_last == NULL, 0))
   2856 	    return REG_ESPACE;
   2857 	  err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
   2858 				bkref_str_idx);
   2859 	  if (err == REG_NOMATCH)
   2860 	    continue;
   2861 	}
   2862     }
   2863   return REG_NOERROR;
   2864 }
   2865 
   2866 /* Helper functions for get_subexp().  */
   2867 
   2868 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
   2869    If it can arrive, register the sub expression expressed with SUB_TOP
   2870    and SUB_LAST.  */
   2871 
   2872 static reg_errcode_t
   2873 internal_function
   2874 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
   2875 		re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
   2876 {
   2877   reg_errcode_t err;
   2878   Idx to_idx;
   2879   /* Can the subexpression arrive the back reference?  */
   2880   err = check_arrival (mctx, &sub_last->path, sub_last->node,
   2881 		       sub_last->str_idx, bkref_node, bkref_str,
   2882 		       OP_OPEN_SUBEXP);
   2883   if (err != REG_NOERROR)
   2884     return err;
   2885   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
   2886 			     sub_last->str_idx);
   2887   if (BE (err != REG_NOERROR, 0))
   2888     return err;
   2889   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
   2890   return clean_state_log_if_needed (mctx, to_idx);
   2891 }
   2892 
   2893 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
   2894    Search '(' if FL_OPEN, or search ')' otherwise.
   2895    TODO: This function isn't efficient...
   2896 	 Because there might be more than one nodes whose types are
   2897 	 OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
   2898 	 nodes.
   2899 	 E.g. RE: (a){2}  */
   2900 
   2901 static Idx
   2902 internal_function
   2903 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
   2904 		  Idx subexp_idx, int type)
   2905 {
   2906   Idx cls_idx;
   2907   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
   2908     {
   2909       Idx cls_node = nodes->elems[cls_idx];
   2910       const re_token_t *node = dfa->nodes + cls_node;
   2911       if (node->type == type
   2912 	  && node->opr.idx == subexp_idx)
   2913 	return cls_node;
   2914     }
   2915   return REG_MISSING;
   2916 }
   2917 
   2918 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
   2919    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
   2920    heavily reused.
   2921    Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise.  */
   2922 
   2923 static reg_errcode_t
   2924 internal_function
   2925 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
   2926 	       Idx top_str, Idx last_node, Idx last_str, int type)
   2927 {
   2928   const re_dfa_t *const dfa = mctx->dfa;
   2929   reg_errcode_t err = REG_NOERROR;
   2930   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
   2931   re_dfastate_t *cur_state = NULL;
   2932   re_node_set *cur_nodes, next_nodes;
   2933   re_dfastate_t **backup_state_log;
   2934   unsigned int context;
   2935 
   2936   subexp_num = dfa->nodes[top_node].opr.idx;
   2937   /* Extend the buffer if we need.  */
   2938   if (BE (path->alloc < last_str + mctx->max_mb_elem_len + 1, 0))
   2939     {
   2940       re_dfastate_t **new_array;
   2941       Idx old_alloc = path->alloc;
   2942       Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1;
   2943       if (BE (new_alloc < old_alloc, 0)
   2944 	  || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0))
   2945 	return REG_ESPACE;
   2946       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
   2947       if (BE (new_array == NULL, 0))
   2948 	return REG_ESPACE;
   2949       path->array = new_array;
   2950       path->alloc = new_alloc;
   2951       memset (new_array + old_alloc, '\0',
   2952 	      sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
   2953     }
   2954 
   2955   str_idx = path->next_idx ? path->next_idx : top_str;
   2956 
   2957   /* Temporary modify MCTX.  */
   2958   backup_state_log = mctx->state_log;
   2959   backup_cur_idx = mctx->input.cur_idx;
   2960   mctx->state_log = path->array;
   2961   mctx->input.cur_idx = str_idx;
   2962 
   2963   /* Setup initial node set.  */
   2964   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
   2965   if (str_idx == top_str)
   2966     {
   2967       err = re_node_set_init_1 (&next_nodes, top_node);
   2968       if (BE (err != REG_NOERROR, 0))
   2969 	return err;
   2970       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
   2971       if (BE (err != REG_NOERROR, 0))
   2972 	{
   2973 	  re_node_set_free (&next_nodes);
   2974 	  return err;
   2975 	}
   2976     }
   2977   else
   2978     {
   2979       cur_state = mctx->state_log[str_idx];
   2980       if (cur_state && cur_state->has_backref)
   2981 	{
   2982 	  err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
   2983 	  if (BE (err != REG_NOERROR, 0))
   2984 	    return err;
   2985 	}
   2986       else
   2987 	re_node_set_init_empty (&next_nodes);
   2988     }
   2989   if (str_idx == top_str || (cur_state && cur_state->has_backref))
   2990     {
   2991       if (next_nodes.nelem)
   2992 	{
   2993 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
   2994 				    subexp_num, type);
   2995 	  if (BE (err != REG_NOERROR, 0))
   2996 	    {
   2997 	      re_node_set_free (&next_nodes);
   2998 	      return err;
   2999 	    }
   3000 	}
   3001       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
   3002       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
   3003 	{
   3004 	  re_node_set_free (&next_nodes);
   3005 	  return err;
   3006 	}
   3007       mctx->state_log[str_idx] = cur_state;
   3008     }
   3009 
   3010   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
   3011     {
   3012       re_node_set_empty (&next_nodes);
   3013       if (mctx->state_log[str_idx + 1])
   3014 	{
   3015 	  err = re_node_set_merge (&next_nodes,
   3016 				   &mctx->state_log[str_idx + 1]->nodes);
   3017 	  if (BE (err != REG_NOERROR, 0))
   3018 	    {
   3019 	      re_node_set_free (&next_nodes);
   3020 	      return err;
   3021 	    }
   3022 	}
   3023       if (cur_state)
   3024 	{
   3025 	  err = check_arrival_add_next_nodes (mctx, str_idx,
   3026 					      &cur_state->non_eps_nodes,
   3027 					      &next_nodes);
   3028 	  if (BE (err != REG_NOERROR, 0))
   3029 	    {
   3030 	      re_node_set_free (&next_nodes);
   3031 	      return err;
   3032 	    }
   3033 	}
   3034       ++str_idx;
   3035       if (next_nodes.nelem)
   3036 	{
   3037 	  err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
   3038 	  if (BE (err != REG_NOERROR, 0))
   3039 	    {
   3040 	      re_node_set_free (&next_nodes);
   3041 	      return err;
   3042 	    }
   3043 	  err = expand_bkref_cache (mctx, &next_nodes, str_idx,
   3044 				    subexp_num, type);
   3045 	  if (BE (err != REG_NOERROR, 0))
   3046 	    {
   3047 	      re_node_set_free (&next_nodes);
   3048 	      return err;
   3049 	    }
   3050 	}
   3051       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
   3052       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
   3053       if (BE (cur_state == NULL && err != REG_NOERROR, 0))
   3054 	{
   3055 	  re_node_set_free (&next_nodes);
   3056 	  return err;
   3057 	}
   3058       mctx->state_log[str_idx] = cur_state;
   3059       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
   3060     }
   3061   re_node_set_free (&next_nodes);
   3062   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
   3063 	       : &mctx->state_log[last_str]->nodes);
   3064   path->next_idx = str_idx;
   3065 
   3066   /* Fix MCTX.  */
   3067   mctx->state_log = backup_state_log;
   3068   mctx->input.cur_idx = backup_cur_idx;
   3069 
   3070   /* Then check the current node set has the node LAST_NODE.  */
   3071   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
   3072     return REG_NOERROR;
   3073 
   3074   return REG_NOMATCH;
   3075 }
   3076 
   3077 /* Helper functions for check_arrival.  */
   3078 
   3079 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
   3080    to NEXT_NODES.
   3081    TODO: This function is similar to the functions transit_state*(),
   3082 	 however this function has many additional works.
   3083 	 Can't we unify them?  */
   3084 
   3085 static reg_errcode_t
   3086 internal_function
   3087 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
   3088 			      re_node_set *cur_nodes, re_node_set *next_nodes)
   3089 {
   3090   const re_dfa_t *const dfa = mctx->dfa;
   3091   bool ok;
   3092   Idx cur_idx;
   3093 #ifdef RE_ENABLE_I18N
   3094   reg_errcode_t err = REG_NOERROR;
   3095 #endif
   3096   re_node_set union_set;
   3097   re_node_set_init_empty (&union_set);
   3098   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
   3099     {
   3100       int naccepted = 0;
   3101       Idx cur_node = cur_nodes->elems[cur_idx];
   3102 #ifdef DEBUG
   3103       re_token_type_t type = dfa->nodes[cur_node].type;
   3104       assert (!IS_EPSILON_NODE (type));
   3105 #endif
   3106 #ifdef RE_ENABLE_I18N
   3107       /* If the node may accept `multi byte'.  */
   3108       if (dfa->nodes[cur_node].accept_mb)
   3109 	{
   3110 	  naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
   3111 					       str_idx);
   3112 	  if (naccepted > 1)
   3113 	    {
   3114 	      re_dfastate_t *dest_state;
   3115 	      Idx next_node = dfa->nexts[cur_node];
   3116 	      Idx next_idx = str_idx + naccepted;
   3117 	      dest_state = mctx->state_log[next_idx];
   3118 	      re_node_set_empty (&union_set);
   3119 	      if (dest_state)
   3120 		{
   3121 		  err = re_node_set_merge (&union_set, &dest_state->nodes);
   3122 		  if (BE (err != REG_NOERROR, 0))
   3123 		    {
   3124 		      re_node_set_free (&union_set);
   3125 		      return err;
   3126 		    }
   3127 		}
   3128 	      ok = re_node_set_insert (&union_set, next_node);
   3129 	      if (BE (! ok, 0))
   3130 		{
   3131 		  re_node_set_free (&union_set);
   3132 		  return REG_ESPACE;
   3133 		}
   3134 	      mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
   3135 							    &union_set);
   3136 	      if (BE (mctx->state_log[next_idx] == NULL
   3137 		      && err != REG_NOERROR, 0))
   3138 		{
   3139 		  re_node_set_free (&union_set);
   3140 		  return err;
   3141 		}
   3142 	    }
   3143 	}
   3144 #endif /* RE_ENABLE_I18N */
   3145       if (naccepted
   3146 	  || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
   3147 	{
   3148 	  ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
   3149 	  if (BE (! ok, 0))
   3150 	    {
   3151 	      re_node_set_free (&union_set);
   3152 	      return REG_ESPACE;
   3153 	    }
   3154 	}
   3155     }
   3156   re_node_set_free (&union_set);
   3157   return REG_NOERROR;
   3158 }
   3159 
   3160 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
   3161    CUR_NODES, however exclude the nodes which are:
   3162     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
   3163     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
   3164 */
   3165 
   3166 static reg_errcode_t
   3167 internal_function
   3168 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
   3169 			  Idx ex_subexp, int type)
   3170 {
   3171   reg_errcode_t err;
   3172   Idx idx, outside_node;
   3173   re_node_set new_nodes;
   3174 #ifdef DEBUG
   3175   assert (cur_nodes->nelem);
   3176 #endif
   3177   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
   3178   if (BE (err != REG_NOERROR, 0))
   3179     return err;
   3180   /* Create a new node set NEW_NODES with the nodes which are epsilon
   3181      closures of the node in CUR_NODES.  */
   3182 
   3183   for (idx = 0; idx < cur_nodes->nelem; ++idx)
   3184     {
   3185       Idx cur_node = cur_nodes->elems[idx];
   3186       const re_node_set *eclosure = dfa->eclosures + cur_node;
   3187       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
   3188       if (outside_node == REG_MISSING)
   3189 	{
   3190 	  /* There are no problematic nodes, just merge them.  */
   3191 	  err = re_node_set_merge (&new_nodes, eclosure);
   3192 	  if (BE (err != REG_NOERROR, 0))
   3193 	    {
   3194 	      re_node_set_free (&new_nodes);
   3195 	      return err;
   3196 	    }
   3197 	}
   3198       else
   3199 	{
   3200 	  /* There are problematic nodes, re-calculate incrementally.  */
   3201 	  err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
   3202 					      ex_subexp, type);
   3203 	  if (BE (err != REG_NOERROR, 0))
   3204 	    {
   3205 	      re_node_set_free (&new_nodes);
   3206 	      return err;
   3207 	    }
   3208 	}
   3209     }
   3210   re_node_set_free (cur_nodes);
   3211   *cur_nodes = new_nodes;
   3212   return REG_NOERROR;
   3213 }
   3214 
   3215 /* Helper function for check_arrival_expand_ecl.
   3216    Check incrementally the epsilon closure of TARGET, and if it isn't
   3217    problematic append it to DST_NODES.  */
   3218 
   3219 static reg_errcode_t
   3220 internal_function
   3221 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
   3222 			      Idx target, Idx ex_subexp, int type)
   3223 {
   3224   Idx cur_node;
   3225   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
   3226     {
   3227       bool ok;
   3228 
   3229       if (dfa->nodes[cur_node].type == type
   3230 	  && dfa->nodes[cur_node].opr.idx == ex_subexp)
   3231 	{
   3232 	  if (type == OP_CLOSE_SUBEXP)
   3233 	    {
   3234 	      ok = re_node_set_insert (dst_nodes, cur_node);
   3235 	      if (BE (! ok, 0))
   3236 		return REG_ESPACE;
   3237 	    }
   3238 	  break;
   3239 	}
   3240       ok = re_node_set_insert (dst_nodes, cur_node);
   3241       if (BE (! ok, 0))
   3242 	return REG_ESPACE;
   3243       if (dfa->edests[cur_node].nelem == 0)
   3244 	break;
   3245       if (dfa->edests[cur_node].nelem == 2)
   3246 	{
   3247 	  reg_errcode_t err;
   3248 	  err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
   3249 					      dfa->edests[cur_node].elems[1],
   3250 					      ex_subexp, type);
   3251 	  if (BE (err != REG_NOERROR, 0))
   3252 	    return err;
   3253 	}
   3254       cur_node = dfa->edests[cur_node].elems[0];
   3255     }
   3256   return REG_NOERROR;
   3257 }
   3258 
   3259 
   3260 /* For all the back references in the current state, calculate the
   3261    destination of the back references by the appropriate entry
   3262    in MCTX->BKREF_ENTS.  */
   3263 
   3264 static reg_errcode_t
   3265 internal_function
   3266 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
   3267 		    Idx cur_str, Idx subexp_num, int type)
   3268 {
   3269   const re_dfa_t *const dfa = mctx->dfa;
   3270   reg_errcode_t err;
   3271   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
   3272   struct re_backref_cache_entry *ent;
   3273 
   3274   if (cache_idx_start == REG_MISSING)
   3275     return REG_NOERROR;
   3276 
   3277  restart:
   3278   ent = mctx->bkref_ents + cache_idx_start;
   3279   do
   3280     {
   3281       Idx to_idx, next_node;
   3282 
   3283       /* Is this entry ENT is appropriate?  */
   3284       if (!re_node_set_contains (cur_nodes, ent->node))
   3285 	continue; /* No.  */
   3286 
   3287       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
   3288       /* Calculate the destination of the back reference, and append it
   3289 	 to MCTX->STATE_LOG.  */
   3290       if (to_idx == cur_str)
   3291 	{
   3292 	  /* The backreference did epsilon transit, we must re-check all the
   3293 	     node in the current state.  */
   3294 	  re_node_set new_dests;
   3295 	  reg_errcode_t err2, err3;
   3296 	  next_node = dfa->edests[ent->node].elems[0];
   3297 	  if (re_node_set_contains (cur_nodes, next_node))
   3298 	    continue;
   3299 	  err = re_node_set_init_1 (&new_dests, next_node);
   3300 	  err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
   3301 	  err3 = re_node_set_merge (cur_nodes, &new_dests);
   3302 	  re_node_set_free (&new_dests);
   3303 	  if (BE (err != REG_NOERROR || err2 != REG_NOERROR
   3304 		  || err3 != REG_NOERROR, 0))
   3305 	    {
   3306 	      err = (err != REG_NOERROR ? err
   3307 		     : (err2 != REG_NOERROR ? err2 : err3));
   3308 	      return err;
   3309 	    }
   3310 	  /* TODO: It is still inefficient...  */
   3311 	  goto restart;
   3312 	}
   3313       else
   3314 	{
   3315 	  re_node_set union_set;
   3316 	  next_node = dfa->nexts[ent->node];
   3317 	  if (mctx->state_log[to_idx])
   3318 	    {
   3319 	      bool ok;
   3320 	      if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
   3321 					next_node))
   3322 		continue;
   3323 	      err = re_node_set_init_copy (&union_set,
   3324 					   &mctx->state_log[to_idx]->nodes);
   3325 	      ok = re_node_set_insert (&union_set, next_node);
   3326 	      if (BE (err != REG_NOERROR || ! ok, 0))
   3327 		{
   3328 		  re_node_set_free (&union_set);
   3329 		  err = err != REG_NOERROR ? err : REG_ESPACE;
   3330 		  return err;
   3331 		}
   3332 	    }
   3333 	  else
   3334 	    {
   3335 	      err = re_node_set_init_1 (&union_set, next_node);
   3336 	      if (BE (err != REG_NOERROR, 0))
   3337 		return err;
   3338 	    }
   3339 	  mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
   3340 	  re_node_set_free (&union_set);
   3341 	  if (BE (mctx->state_log[to_idx] == NULL
   3342 		  && err != REG_NOERROR, 0))
   3343 	    return err;
   3344 	}
   3345     }
   3346   while (ent++->more);
   3347   return REG_NOERROR;
   3348 }
   3349 
   3350 /* Build transition table for the state.
   3351    Return true if successful.  */
   3352 
   3353 static bool
   3354 internal_function
   3355 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
   3356 {
   3357   reg_errcode_t err;
   3358   Idx i, j;
   3359   int ch;
   3360   bool need_word_trtable = false;
   3361   bitset_word_t elem, mask;
   3362   bool dests_node_malloced = false;
   3363   bool dest_states_malloced = false;
   3364   Idx ndests; /* Number of the destination states from `state'.  */
   3365   re_dfastate_t **trtable;
   3366   re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl;
   3367   re_node_set follows, *dests_node;
   3368   bitset_t *dests_ch;
   3369   bitset_t acceptable;
   3370 
   3371   struct dests_alloc
   3372   {
   3373     re_node_set dests_node[SBC_MAX];
   3374     bitset_t dests_ch[SBC_MAX];
   3375   } *dests_alloc;
   3376 
   3377   /* We build DFA states which corresponds to the destination nodes
   3378      from `state'.  `dests_node[i]' represents the nodes which i-th
   3379      destination state contains, and `dests_ch[i]' represents the
   3380      characters which i-th destination state accepts.  */
   3381   if (__libc_use_alloca (sizeof (struct dests_alloc)))
   3382     dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc));
   3383   else
   3384     {
   3385       dests_alloc = re_malloc (struct dests_alloc, 1);
   3386       if (BE (dests_alloc == NULL, 0))
   3387 	return false;
   3388       dests_node_malloced = true;
   3389     }
   3390   dests_node = dests_alloc->dests_node;
   3391   dests_ch = dests_alloc->dests_ch;
   3392 
   3393   /* Initialize transiton table.  */
   3394   state->word_trtable = state->trtable = NULL;
   3395 
   3396   /* At first, group all nodes belonging to `state' into several
   3397      destinations.  */
   3398   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
   3399   if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0))
   3400     {
   3401       if (dests_node_malloced)
   3402 	free (dests_alloc);
   3403       if (ndests == 0)
   3404 	{
   3405 	  state->trtable = (re_dfastate_t **)
   3406 	    calloc (sizeof (re_dfastate_t *), SBC_MAX);
   3407 	  return true;
   3408 	}
   3409       return false;
   3410     }
   3411 
   3412   err = re_node_set_alloc (&follows, ndests + 1);
   3413   if (BE (err != REG_NOERROR, 0))
   3414     goto out_free;
   3415 
   3416   /* Avoid arithmetic overflow in size calculation.  */
   3417   if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
   3418 	    / (3 * sizeof (re_dfastate_t *)))
   3419 	   < ndests),
   3420 	  0))
   3421     goto out_free;
   3422 
   3423   if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
   3424 			 + ndests * 3 * sizeof (re_dfastate_t *)))
   3425     dest_states = (re_dfastate_t **)
   3426       alloca (ndests * 3 * sizeof (re_dfastate_t *));
   3427   else
   3428     {
   3429       dest_states = (re_dfastate_t **)
   3430 	malloc (ndests * 3 * sizeof (re_dfastate_t *));
   3431       if (BE (dest_states == NULL, 0))
   3432 	{
   3433 out_free:
   3434 	  if (dest_states_malloced)
   3435 	    free (dest_states);
   3436 	  re_node_set_free (&follows);
   3437 	  for (i = 0; i < ndests; ++i)
   3438 	    re_node_set_free (dests_node + i);
   3439 	  if (dests_node_malloced)
   3440 	    free (dests_alloc);
   3441 	  return false;
   3442 	}
   3443       dest_states_malloced = true;
   3444     }
   3445   dest_states_word = dest_states + ndests;
   3446   dest_states_nl = dest_states_word + ndests;
   3447   bitset_empty (acceptable);
   3448 
   3449   /* Then build the states for all destinations.  */
   3450   for (i = 0; i < ndests; ++i)
   3451     {
   3452       Idx next_node;
   3453       re_node_set_empty (&follows);
   3454       /* Merge the follows of this destination states.  */
   3455       for (j = 0; j < dests_node[i].nelem; ++j)
   3456 	{
   3457 	  next_node = dfa->nexts[dests_node[i].elems[j]];
   3458 	  if (next_node != REG_MISSING)
   3459 	    {
   3460 	      err = re_node_set_merge (&follows, dfa->eclosures + next_node);
   3461 	      if (BE (err != REG_NOERROR, 0))
   3462 		goto out_free;
   3463 	    }
   3464 	}
   3465       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
   3466       if (BE (dest_states[i] == NULL && err != REG_NOERROR, 0))
   3467 	goto out_free;
   3468       /* If the new state has context constraint,
   3469 	 build appropriate states for these contexts.  */
   3470       if (dest_states[i]->has_constraint)
   3471 	{
   3472 	  dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
   3473 							  CONTEXT_WORD);
   3474 	  if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
   3475 	    goto out_free;
   3476 
   3477 	  if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
   3478 	    need_word_trtable = true;
   3479 
   3480 	  dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
   3481 							CONTEXT_NEWLINE);
   3482 	  if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, 0))
   3483 	    goto out_free;
   3484 	}
   3485       else
   3486 	{
   3487 	  dest_states_word[i] = dest_states[i];
   3488 	  dest_states_nl[i] = dest_states[i];
   3489 	}
   3490       bitset_merge (acceptable, dests_ch[i]);
   3491     }
   3492 
   3493   if (!BE (need_word_trtable, 0))
   3494     {
   3495       /* We don't care about whether the following character is a word
   3496 	 character, or we are in a single-byte character set so we can
   3497 	 discern by looking at the character code: allocate a
   3498 	 256-entry transition table.  */
   3499       trtable = state->trtable =
   3500 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
   3501       if (BE (trtable == NULL, 0))
   3502 	goto out_free;
   3503 
   3504       /* For all characters ch...:  */
   3505       for (i = 0; i < BITSET_WORDS; ++i)
   3506 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
   3507 	     elem;
   3508 	     mask <<= 1, elem >>= 1, ++ch)
   3509 	  if (BE (elem & 1, 0))
   3510 	    {
   3511 	      /* There must be exactly one destination which accepts
   3512 		 character ch.  See group_nodes_into_DFAstates.  */
   3513 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
   3514 		;
   3515 
   3516 	      /* j-th destination accepts the word character ch.  */
   3517 	      if (dfa->word_char[i] & mask)
   3518 		trtable[ch] = dest_states_word[j];
   3519 	      else
   3520 		trtable[ch] = dest_states[j];
   3521 	    }
   3522     }
   3523   else
   3524     {
   3525       /* We care about whether the following character is a word
   3526 	 character, and we are in a multi-byte character set: discern
   3527 	 by looking at the character code: build two 256-entry
   3528 	 transition tables, one starting at trtable[0] and one
   3529 	 starting at trtable[SBC_MAX].  */
   3530       trtable = state->word_trtable =
   3531 	(re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
   3532       if (BE (trtable == NULL, 0))
   3533 	goto out_free;
   3534 
   3535       /* For all characters ch...:  */
   3536       for (i = 0; i < BITSET_WORDS; ++i)
   3537 	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
   3538 	     elem;
   3539 	     mask <<= 1, elem >>= 1, ++ch)
   3540 	  if (BE (elem & 1, 0))
   3541 	    {
   3542 	      /* There must be exactly one destination which accepts
   3543 		 character ch.  See group_nodes_into_DFAstates.  */
   3544 	      for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
   3545 		;
   3546 
   3547 	      /* j-th destination accepts the word character ch.  */
   3548 	      trtable[ch] = dest_states[j];
   3549 	      trtable[ch + SBC_MAX] = dest_states_word[j];
   3550 	    }
   3551     }
   3552 
   3553   /* new line */
   3554   if (bitset_contain (acceptable, NEWLINE_CHAR))
   3555     {
   3556       /* The current state accepts newline character.  */
   3557       for (j = 0; j < ndests; ++j)
   3558 	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
   3559 	  {
   3560 	    /* k-th destination accepts newline character.  */
   3561 	    trtable[NEWLINE_CHAR] = dest_states_nl[j];
   3562 	    if (need_word_trtable)
   3563 	      trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
   3564 	    /* There must be only one destination which accepts
   3565 	       newline.  See group_nodes_into_DFAstates.  */
   3566 	    break;
   3567 	  }
   3568     }
   3569 
   3570   if (dest_states_malloced)
   3571     free (dest_states);
   3572 
   3573   re_node_set_free (&follows);
   3574   for (i = 0; i < ndests; ++i)
   3575     re_node_set_free (dests_node + i);
   3576 
   3577   if (dests_node_malloced)
   3578     free (dests_alloc);
   3579 
   3580   return true;
   3581 }
   3582 
   3583 /* Group all nodes belonging to STATE into several destinations.
   3584    Then for all destinations, set the nodes belonging to the destination
   3585    to DESTS_NODE[i] and set the characters accepted by the destination
   3586    to DEST_CH[i].  This function return the number of destinations.  */
   3587 
   3588 static Idx
   3589 internal_function
   3590 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
   3591 			    re_node_set *dests_node, bitset_t *dests_ch)
   3592 {
   3593   reg_errcode_t err;
   3594   bool ok;
   3595   Idx i, j, k;
   3596   Idx ndests; /* Number of the destinations from `state'.  */
   3597   bitset_t accepts; /* Characters a node can accept.  */
   3598   const re_node_set *cur_nodes = &state->nodes;
   3599   bitset_empty (accepts);
   3600   ndests = 0;
   3601 
   3602   /* For all the nodes belonging to `state',  */
   3603   for (i = 0; i < cur_nodes->nelem; ++i)
   3604     {
   3605       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
   3606       re_token_type_t type = node->type;
   3607       unsigned int constraint = node->constraint;
   3608 
   3609       /* Enumerate all single byte character this node can accept.  */
   3610       if (type == CHARACTER)
   3611 	bitset_set (accepts, node->opr.c);
   3612       else if (type == SIMPLE_BRACKET)
   3613 	{
   3614 	  bitset_merge (accepts, node->opr.sbcset);
   3615 	}
   3616       else if (type == OP_PERIOD)
   3617 	{
   3618 #ifdef RE_ENABLE_I18N
   3619 	  if (dfa->mb_cur_max > 1)
   3620 	    bitset_merge (accepts, dfa->sb_char);
   3621 	  else
   3622 #endif
   3623 	    bitset_set_all (accepts);
   3624 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
   3625 	    bitset_clear (accepts, '\n');
   3626 	  if (dfa->syntax & RE_DOT_NOT_NULL)
   3627 	    bitset_clear (accepts, '\0');
   3628 	}
   3629 #ifdef RE_ENABLE_I18N
   3630       else if (type == OP_UTF8_PERIOD)
   3631         {
   3632 	  if (ASCII_CHARS % BITSET_WORD_BITS == 0)
   3633 	    memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
   3634 	  else
   3635 	    bitset_merge (accepts, utf8_sb_map);
   3636 	  if (!(dfa->syntax & RE_DOT_NEWLINE))
   3637 	    bitset_clear (accepts, '\n');
   3638 	  if (dfa->syntax & RE_DOT_NOT_NULL)
   3639 	    bitset_clear (accepts, '\0');
   3640         }
   3641 #endif
   3642       else
   3643 	continue;
   3644 
   3645       /* Check the `accepts' and sift the characters which are not
   3646 	 match it the context.  */
   3647       if (constraint)
   3648 	{
   3649 	  if (constraint & NEXT_NEWLINE_CONSTRAINT)
   3650 	    {
   3651 	      bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
   3652 	      bitset_empty (accepts);
   3653 	      if (accepts_newline)
   3654 		bitset_set (accepts, NEWLINE_CHAR);
   3655 	      else
   3656 		continue;
   3657 	    }
   3658 	  if (constraint & NEXT_ENDBUF_CONSTRAINT)
   3659 	    {
   3660 	      bitset_empty (accepts);
   3661 	      continue;
   3662 	    }
   3663 
   3664 	  if (constraint & NEXT_WORD_CONSTRAINT)
   3665 	    {
   3666 	      bitset_word_t any_set = 0;
   3667 	      if (type == CHARACTER && !node->word_char)
   3668 		{
   3669 		  bitset_empty (accepts);
   3670 		  continue;
   3671 		}
   3672 #ifdef RE_ENABLE_I18N
   3673 	      if (dfa->mb_cur_max > 1)
   3674 		for (j = 0; j < BITSET_WORDS; ++j)
   3675 		  any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
   3676 	      else
   3677 #endif
   3678 		for (j = 0; j < BITSET_WORDS; ++j)
   3679 		  any_set |= (accepts[j] &= dfa->word_char[j]);
   3680 	      if (!any_set)
   3681 		continue;
   3682 	    }
   3683 	  if (constraint & NEXT_NOTWORD_CONSTRAINT)
   3684 	    {
   3685 	      bitset_word_t any_set = 0;
   3686 	      if (type == CHARACTER && node->word_char)
   3687 		{
   3688 		  bitset_empty (accepts);
   3689 		  continue;
   3690 		}
   3691 #ifdef RE_ENABLE_I18N
   3692 	      if (dfa->mb_cur_max > 1)
   3693 		for (j = 0; j < BITSET_WORDS; ++j)
   3694 		  any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
   3695 	      else
   3696 #endif
   3697 		for (j = 0; j < BITSET_WORDS; ++j)
   3698 		  any_set |= (accepts[j] &= ~dfa->word_char[j]);
   3699 	      if (!any_set)
   3700 		continue;
   3701 	    }
   3702 	}
   3703 
   3704       /* Then divide `accepts' into DFA states, or create a new
   3705 	 state.  Above, we make sure that accepts is not empty.  */
   3706       for (j = 0; j < ndests; ++j)
   3707 	{
   3708 	  bitset_t intersec; /* Intersection sets, see below.  */
   3709 	  bitset_t remains;
   3710 	  /* Flags, see below.  */
   3711 	  bitset_word_t has_intersec, not_subset, not_consumed;
   3712 
   3713 	  /* Optimization, skip if this state doesn't accept the character.  */
   3714 	  if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
   3715 	    continue;
   3716 
   3717 	  /* Enumerate the intersection set of this state and `accepts'.  */
   3718 	  has_intersec = 0;
   3719 	  for (k = 0; k < BITSET_WORDS; ++k)
   3720 	    has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
   3721 	  /* And skip if the intersection set is empty.  */
   3722 	  if (!has_intersec)
   3723 	    continue;
   3724 
   3725 	  /* Then check if this state is a subset of `accepts'.  */
   3726 	  not_subset = not_consumed = 0;
   3727 	  for (k = 0; k < BITSET_WORDS; ++k)
   3728 	    {
   3729 	      not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
   3730 	      not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
   3731 	    }
   3732 
   3733 	  /* If this state isn't a subset of `accepts', create a
   3734 	     new group state, which has the `remains'. */
   3735 	  if (not_subset)
   3736 	    {
   3737 	      bitset_copy (dests_ch[ndests], remains);
   3738 	      bitset_copy (dests_ch[j], intersec);
   3739 	      err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
   3740 	      if (BE (err != REG_NOERROR, 0))
   3741 		goto error_return;
   3742 	      ++ndests;
   3743 	    }
   3744 
   3745 	  /* Put the position in the current group. */
   3746 	  ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
   3747 	  if (BE (! ok, 0))
   3748 	    goto error_return;
   3749 
   3750 	  /* If all characters are consumed, go to next node. */
   3751 	  if (!not_consumed)
   3752 	    break;
   3753 	}
   3754       /* Some characters remain, create a new group. */
   3755       if (j == ndests)
   3756 	{
   3757 	  bitset_copy (dests_ch[ndests], accepts);
   3758 	  err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
   3759 	  if (BE (err != REG_NOERROR, 0))
   3760 	    goto error_return;
   3761 	  ++ndests;
   3762 	  bitset_empty (accepts);
   3763 	}
   3764     }
   3765   return ndests;
   3766  error_return:
   3767   for (j = 0; j < ndests; ++j)
   3768     re_node_set_free (dests_node + j);
   3769   return REG_MISSING;
   3770 }
   3771 
   3772 #ifdef RE_ENABLE_I18N
   3773 /* Check how many bytes the node `dfa->nodes[node_idx]' accepts.
   3774    Return the number of the bytes the node accepts.
   3775    STR_IDX is the current index of the input string.
   3776 
   3777    This function handles the nodes which can accept one character, or
   3778    one collating element like '.', '[a-z]', opposite to the other nodes
   3779    can only accept one byte.  */
   3780 
   3781 static int
   3782 internal_function
   3783 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
   3784 			 const re_string_t *input, Idx str_idx)
   3785 {
   3786   const re_token_t *node = dfa->nodes + node_idx;
   3787   int char_len, elem_len;
   3788   Idx i;
   3789 
   3790   if (BE (node->type == OP_UTF8_PERIOD, 0))
   3791     {
   3792       unsigned char c = re_string_byte_at (input, str_idx), d;
   3793       if (BE (c < 0xc2, 1))
   3794 	return 0;
   3795 
   3796       if (str_idx + 2 > input->len)
   3797 	return 0;
   3798 
   3799       d = re_string_byte_at (input, str_idx + 1);
   3800       if (c < 0xe0)
   3801 	return (d < 0x80 || d > 0xbf) ? 0 : 2;
   3802       else if (c < 0xf0)
   3803 	{
   3804 	  char_len = 3;
   3805 	  if (c == 0xe0 && d < 0xa0)
   3806 	    return 0;
   3807 	}
   3808       else if (c < 0xf8)
   3809 	{
   3810 	  char_len = 4;
   3811 	  if (c == 0xf0 && d < 0x90)
   3812 	    return 0;
   3813 	}
   3814       else if (c < 0xfc)
   3815 	{
   3816 	  char_len = 5;
   3817 	  if (c == 0xf8 && d < 0x88)
   3818 	    return 0;
   3819 	}
   3820       else if (c < 0xfe)
   3821 	{
   3822 	  char_len = 6;
   3823 	  if (c == 0xfc && d < 0x84)
   3824 	    return 0;
   3825 	}
   3826       else
   3827 	return 0;
   3828 
   3829       if (str_idx + char_len > input->len)
   3830 	return 0;
   3831 
   3832       for (i = 1; i < char_len; ++i)
   3833 	{
   3834 	  d = re_string_byte_at (input, str_idx + i);
   3835 	  if (d < 0x80 || d > 0xbf)
   3836 	    return 0;
   3837 	}
   3838       return char_len;
   3839     }
   3840 
   3841   char_len = re_string_char_size_at (input, str_idx);
   3842   if (node->type == OP_PERIOD)
   3843     {
   3844       if (char_len <= 1)
   3845         return 0;
   3846       /* FIXME: I don't think this if is needed, as both '\n'
   3847 	 and '\0' are char_len == 1.  */
   3848       /* '.' accepts any one character except the following two cases.  */
   3849       if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
   3850 	   re_string_byte_at (input, str_idx) == '\n') ||
   3851 	  ((dfa->syntax & RE_DOT_NOT_NULL) &&
   3852 	   re_string_byte_at (input, str_idx) == '\0'))
   3853 	return 0;
   3854       return char_len;
   3855     }
   3856 
   3857   elem_len = re_string_elem_size_at (input, str_idx);
   3858   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
   3859     return 0;
   3860 
   3861   if (node->type == COMPLEX_BRACKET)
   3862     {
   3863       const re_charset_t *cset = node->opr.mbcset;
   3864 # ifdef _LIBC
   3865       const unsigned char *pin
   3866 	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
   3867       Idx j;
   3868       uint32_t nrules;
   3869 # endif /* _LIBC */
   3870       int match_len = 0;
   3871       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
   3872 		    ? re_string_wchar_at (input, str_idx) : 0);
   3873 
   3874       /* match with multibyte character?  */
   3875       for (i = 0; i < cset->nmbchars; ++i)
   3876 	if (wc == cset->mbchars[i])
   3877 	  {
   3878 	    match_len = char_len;
   3879 	    goto check_node_accept_bytes_match;
   3880 	  }
   3881       /* match with character_class?  */
   3882       for (i = 0; i < cset->nchar_classes; ++i)
   3883 	{
   3884 	  wctype_t wt = cset->char_classes[i];
   3885 	  if (__iswctype (wc, wt))
   3886 	    {
   3887 	      match_len = char_len;
   3888 	      goto check_node_accept_bytes_match;
   3889 	    }
   3890 	}
   3891 
   3892 # ifdef _LIBC
   3893       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   3894       if (nrules != 0)
   3895 	{
   3896 	  unsigned int in_collseq = 0;
   3897 	  const int32_t *table, *indirect;
   3898 	  const unsigned char *weights, *extra;
   3899 	  const char *collseqwc;
   3900 	  int32_t idx;
   3901 	  /* This #include defines a local function!  */
   3902 #  include <locale/weight.h>
   3903 
   3904 	  /* match with collating_symbol?  */
   3905 	  if (cset->ncoll_syms)
   3906 	    extra = (const unsigned char *)
   3907 	      _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
   3908 	  for (i = 0; i < cset->ncoll_syms; ++i)
   3909 	    {
   3910 	      const unsigned char *coll_sym = extra + cset->coll_syms[i];
   3911 	      /* Compare the length of input collating element and
   3912 		 the length of current collating element.  */
   3913 	      if (*coll_sym != elem_len)
   3914 		continue;
   3915 	      /* Compare each bytes.  */
   3916 	      for (j = 0; j < *coll_sym; j++)
   3917 		if (pin[j] != coll_sym[1 + j])
   3918 		  break;
   3919 	      if (j == *coll_sym)
   3920 		{
   3921 		  /* Match if every bytes is equal.  */
   3922 		  match_len = j;
   3923 		  goto check_node_accept_bytes_match;
   3924 		}
   3925 	    }
   3926 
   3927 	  if (cset->nranges)
   3928 	    {
   3929 	      if (elem_len <= char_len)
   3930 		{
   3931 		  collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
   3932 		  in_collseq = __collseq_table_lookup (collseqwc, wc);
   3933 		}
   3934 	      else
   3935 		in_collseq = find_collation_sequence_value (pin, elem_len);
   3936 	    }
   3937 	  /* match with range expression?  */
   3938 	  for (i = 0; i < cset->nranges; ++i)
   3939 	    if (cset->range_starts[i] <= in_collseq
   3940 		&& in_collseq <= cset->range_ends[i])
   3941 	      {
   3942 		match_len = elem_len;
   3943 		goto check_node_accept_bytes_match;
   3944 	      }
   3945 
   3946 	  /* match with equivalence_class?  */
   3947 	  if (cset->nequiv_classes)
   3948 	    {
   3949 	      const unsigned char *cp = pin;
   3950 	      table = (const int32_t *)
   3951 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
   3952 	      weights = (const unsigned char *)
   3953 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
   3954 	      extra = (const unsigned char *)
   3955 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
   3956 	      indirect = (const int32_t *)
   3957 		_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
   3958 	      idx = findidx (&cp);
   3959 	      if (idx > 0)
   3960 		for (i = 0; i < cset->nequiv_classes; ++i)
   3961 		  {
   3962 		    int32_t equiv_class_idx = cset->equiv_classes[i];
   3963 		    size_t weight_len = weights[idx];
   3964 		    if (weight_len == weights[equiv_class_idx])
   3965 		      {
   3966 			Idx cnt = 0;
   3967 			while (cnt <= weight_len
   3968 			       && (weights[equiv_class_idx + 1 + cnt]
   3969 				   == weights[idx + 1 + cnt]))
   3970 			  ++cnt;
   3971 			if (cnt > weight_len)
   3972 			  {
   3973 			    match_len = elem_len;
   3974 			    goto check_node_accept_bytes_match;
   3975 			  }
   3976 		      }
   3977 		  }
   3978 	    }
   3979 	}
   3980       else
   3981 # endif /* _LIBC */
   3982 	{
   3983 	  /* match with range expression?  */
   3984 #if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__)
   3985 	  wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'};
   3986 #else
   3987 	  wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'};
   3988 	  cmp_buf[2] = wc;
   3989 #endif
   3990 	  for (i = 0; i < cset->nranges; ++i)
   3991 	    {
   3992 	      cmp_buf[0] = cset->range_starts[i];
   3993 	      cmp_buf[4] = cset->range_ends[i];
   3994 	      if (wcscoll (cmp_buf, cmp_buf + 2) <= 0
   3995 		  && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0)
   3996 		{
   3997 		  match_len = char_len;
   3998 		  goto check_node_accept_bytes_match;
   3999 		}
   4000 	    }
   4001 	}
   4002     check_node_accept_bytes_match:
   4003       if (!cset->non_match)
   4004 	return match_len;
   4005       else
   4006 	{
   4007 	  if (match_len > 0)
   4008 	    return 0;
   4009 	  else
   4010 	    return (elem_len > char_len) ? elem_len : char_len;
   4011 	}
   4012     }
   4013   return 0;
   4014 }
   4015 
   4016 # ifdef _LIBC
   4017 static unsigned int
   4018 internal_function
   4019 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
   4020 {
   4021   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
   4022   if (nrules == 0)
   4023     {
   4024       if (mbs_len == 1)
   4025 	{
   4026 	  /* No valid character.  Match it as a single byte character.  */
   4027 	  const unsigned char *collseq = (const unsigned char *)
   4028 	    _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
   4029 	  return collseq[mbs[0]];
   4030 	}
   4031       return UINT_MAX;
   4032     }
   4033   else
   4034     {
   4035       int32_t idx;
   4036       const unsigned char *extra = (const unsigned char *)
   4037 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
   4038       int32_t extrasize = (const unsigned char *)
   4039 	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
   4040 
   4041       for (idx = 0; idx < extrasize;)
   4042 	{
   4043 	  int mbs_cnt;
   4044 	  bool found = false;
   4045 	  int32_t elem_mbs_len;
   4046 	  /* Skip the name of collating element name.  */
   4047 	  idx = idx + extra[idx] + 1;
   4048 	  elem_mbs_len = extra[idx++];
   4049 	  if (mbs_len == elem_mbs_len)
   4050 	    {
   4051 	      for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
   4052 		if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
   4053 		  break;
   4054 	      if (mbs_cnt == elem_mbs_len)
   4055 		/* Found the entry.  */
   4056 		found = true;
   4057 	    }
   4058 	  /* Skip the byte sequence of the collating element.  */
   4059 	  idx += elem_mbs_len;
   4060 	  /* Adjust for the alignment.  */
   4061 	  idx = (idx + 3) & ~3;
   4062 	  /* Skip the collation sequence value.  */
   4063 	  idx += sizeof (uint32_t);
   4064 	  /* Skip the wide char sequence of the collating element.  */
   4065 	  idx = idx + sizeof (uint32_t) * (extra[idx] + 1);
   4066 	  /* If we found the entry, return the sequence value.  */
   4067 	  if (found)
   4068 	    return *(uint32_t *) (extra + idx);
   4069 	  /* Skip the collation sequence value.  */
   4070 	  idx += sizeof (uint32_t);
   4071 	}
   4072       return UINT_MAX;
   4073     }
   4074 }
   4075 # endif /* _LIBC */
   4076 #endif /* RE_ENABLE_I18N */
   4077 
   4078 /* Check whether the node accepts the byte which is IDX-th
   4079    byte of the INPUT.  */
   4080 
   4081 static bool
   4082 internal_function
   4083 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
   4084 		   Idx idx)
   4085 {
   4086   unsigned char ch;
   4087   ch = re_string_byte_at (&mctx->input, idx);
   4088   switch (node->type)
   4089     {
   4090     case CHARACTER:
   4091       if (node->opr.c != ch)
   4092         return false;
   4093       break;
   4094 
   4095     case SIMPLE_BRACKET:
   4096       if (!bitset_contain (node->opr.sbcset, ch))
   4097         return false;
   4098       break;
   4099 
   4100 #ifdef RE_ENABLE_I18N
   4101     case OP_UTF8_PERIOD:
   4102       if (ch >= ASCII_CHARS)
   4103         return false;
   4104       /* FALLTHROUGH */
   4105 #endif
   4106     case OP_PERIOD:
   4107       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
   4108 	  || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
   4109 	return false;
   4110       break;
   4111 
   4112     default:
   4113       return false;
   4114     }
   4115 
   4116   if (node->constraint)
   4117     {
   4118       /* The node has constraints.  Check whether the current context
   4119 	 satisfies the constraints.  */
   4120       unsigned int context = re_string_context_at (&mctx->input, idx,
   4121 						   mctx->eflags);
   4122       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
   4123 	return false;
   4124     }
   4125 
   4126   return true;
   4127 }
   4128 
   4129 /* Extend the buffers, if the buffers have run out.  */
   4130 
   4131 static reg_errcode_t
   4132 internal_function
   4133 extend_buffers (re_match_context_t *mctx)
   4134 {
   4135   reg_errcode_t ret;
   4136   re_string_t *pstr = &mctx->input;
   4137 
   4138   /* Avoid overflow.  */
   4139   if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
   4140     return REG_ESPACE;
   4141 
   4142   /* Double the lengthes of the buffers.  */
   4143   ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
   4144   if (BE (ret != REG_NOERROR, 0))
   4145     return ret;
   4146 
   4147   if (mctx->state_log != NULL)
   4148     {
   4149       /* And double the length of state_log.  */
   4150       /* XXX We have no indication of the size of this buffer.  If this
   4151 	 allocation fail we have no indication that the state_log array
   4152 	 does not have the right size.  */
   4153       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
   4154 					      pstr->bufs_len + 1);
   4155       if (BE (new_array == NULL, 0))
   4156 	return REG_ESPACE;
   4157       mctx->state_log = new_array;
   4158     }
   4159 
   4160   /* Then reconstruct the buffers.  */
   4161   if (pstr->icase)
   4162     {
   4163 #ifdef RE_ENABLE_I18N
   4164       if (pstr->mb_cur_max > 1)
   4165 	{
   4166 	  ret = build_wcs_upper_buffer (pstr);
   4167 	  if (BE (ret != REG_NOERROR, 0))
   4168 	    return ret;
   4169 	}
   4170       else
   4171 #endif /* RE_ENABLE_I18N  */
   4172 	build_upper_buffer (pstr);
   4173     }
   4174   else
   4175     {
   4176 #ifdef RE_ENABLE_I18N
   4177       if (pstr->mb_cur_max > 1)
   4178 	build_wcs_buffer (pstr);
   4179       else
   4180 #endif /* RE_ENABLE_I18N  */
   4181 	{
   4182 	  if (pstr->trans != NULL)
   4183 	    re_string_translate_buffer (pstr);
   4184 	}
   4185     }
   4186   return REG_NOERROR;
   4187 }
   4188 
   4189 
   4190 /* Functions for matching context.  */
   4192 
   4193 /* Initialize MCTX.  */
   4194 
   4195 static reg_errcode_t
   4196 internal_function
   4197 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
   4198 {
   4199   mctx->eflags = eflags;
   4200   mctx->match_last = REG_MISSING;
   4201   if (n > 0)
   4202     {
   4203       /* Avoid overflow.  */
   4204       size_t max_object_size =
   4205 	MAX (sizeof (struct re_backref_cache_entry),
   4206 	     sizeof (re_sub_match_top_t *));
   4207       if (BE (SIZE_MAX / max_object_size < n, 0))
   4208 	return REG_ESPACE;
   4209 
   4210       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
   4211       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
   4212       if (BE (mctx->bkref_ents == NULL || mctx->sub_tops == NULL, 0))
   4213 	return REG_ESPACE;
   4214     }
   4215   /* Already zero-ed by the caller.
   4216      else
   4217        mctx->bkref_ents = NULL;
   4218      mctx->nbkref_ents = 0;
   4219      mctx->nsub_tops = 0;  */
   4220   mctx->abkref_ents = n;
   4221   mctx->max_mb_elem_len = 1;
   4222   mctx->asub_tops = n;
   4223   return REG_NOERROR;
   4224 }
   4225 
   4226 /* Clean the entries which depend on the current input in MCTX.
   4227    This function must be invoked when the matcher changes the start index
   4228    of the input, or changes the input string.  */
   4229 
   4230 static void
   4231 internal_function
   4232 match_ctx_clean (re_match_context_t *mctx)
   4233 {
   4234   Idx st_idx;
   4235   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
   4236     {
   4237       Idx sl_idx;
   4238       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
   4239       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
   4240 	{
   4241 	  re_sub_match_last_t *last = top->lasts[sl_idx];
   4242 	  re_free (last->path.array);
   4243 	  re_free (last);
   4244 	}
   4245       re_free (top->lasts);
   4246       if (top->path)
   4247 	{
   4248 	  re_free (top->path->array);
   4249 	  re_free (top->path);
   4250 	}
   4251       free (top);
   4252     }
   4253 
   4254   mctx->nsub_tops = 0;
   4255   mctx->nbkref_ents = 0;
   4256 }
   4257 
   4258 /* Free all the memory associated with MCTX.  */
   4259 
   4260 static void
   4261 internal_function
   4262 match_ctx_free (re_match_context_t *mctx)
   4263 {
   4264   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
   4265   match_ctx_clean (mctx);
   4266   re_free (mctx->sub_tops);
   4267   re_free (mctx->bkref_ents);
   4268 }
   4269 
   4270 /* Add a new backreference entry to MCTX.
   4271    Note that we assume that caller never call this function with duplicate
   4272    entry, and call with STR_IDX which isn't smaller than any existing entry.
   4273 */
   4274 
   4275 static reg_errcode_t
   4276 internal_function
   4277 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
   4278 		     Idx to)
   4279 {
   4280   if (mctx->nbkref_ents >= mctx->abkref_ents)
   4281     {
   4282       struct re_backref_cache_entry* new_entry;
   4283       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
   4284 			      mctx->abkref_ents * 2);
   4285       if (BE (new_entry == NULL, 0))
   4286 	{
   4287 	  re_free (mctx->bkref_ents);
   4288 	  return REG_ESPACE;
   4289 	}
   4290       mctx->bkref_ents = new_entry;
   4291       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
   4292 	      sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
   4293       mctx->abkref_ents *= 2;
   4294     }
   4295   if (mctx->nbkref_ents > 0
   4296       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
   4297     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
   4298 
   4299   mctx->bkref_ents[mctx->nbkref_ents].node = node;
   4300   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
   4301   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
   4302   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
   4303 
   4304   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
   4305      If bit N is clear, means that this entry won't epsilon-transition to
   4306      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
   4307      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
   4308      such node.
   4309 
   4310      A backreference does not epsilon-transition unless it is empty, so set
   4311      to all zeros if FROM != TO.  */
   4312   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
   4313     = (from == to ? -1 : 0);
   4314 
   4315   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
   4316   if (mctx->max_mb_elem_len < to - from)
   4317     mctx->max_mb_elem_len = to - from;
   4318   return REG_NOERROR;
   4319 }
   4320 
   4321 /* Return the first entry with the same str_idx, or REG_MISSING if none is
   4322    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
   4323 
   4324 static Idx
   4325 internal_function
   4326 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
   4327 {
   4328   Idx left, right, mid, last;
   4329   last = right = mctx->nbkref_ents;
   4330   for (left = 0; left < right;)
   4331     {
   4332       mid = (left + right) / 2;
   4333       if (mctx->bkref_ents[mid].str_idx < str_idx)
   4334 	left = mid + 1;
   4335       else
   4336 	right = mid;
   4337     }
   4338   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
   4339     return left;
   4340   else
   4341     return REG_MISSING;
   4342 }
   4343 
   4344 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
   4345    at STR_IDX.  */
   4346 
   4347 static reg_errcode_t
   4348 internal_function
   4349 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
   4350 {
   4351 #ifdef DEBUG
   4352   assert (mctx->sub_tops != NULL);
   4353   assert (mctx->asub_tops > 0);
   4354 #endif
   4355   if (BE (mctx->nsub_tops == mctx->asub_tops, 0))
   4356     {
   4357       Idx new_asub_tops = mctx->asub_tops * 2;
   4358       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
   4359 						   re_sub_match_top_t *,
   4360 						   new_asub_tops);
   4361       if (BE (new_array == NULL, 0))
   4362 	return REG_ESPACE;
   4363       mctx->sub_tops = new_array;
   4364       mctx->asub_tops = new_asub_tops;
   4365     }
   4366   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
   4367   if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, 0))
   4368     return REG_ESPACE;
   4369   mctx->sub_tops[mctx->nsub_tops]->node = node;
   4370   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
   4371   return REG_NOERROR;
   4372 }
   4373 
   4374 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
   4375    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.  */
   4376 
   4377 static re_sub_match_last_t *
   4378 internal_function
   4379 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
   4380 {
   4381   re_sub_match_last_t *new_entry;
   4382   if (BE (subtop->nlasts == subtop->alasts, 0))
   4383     {
   4384       Idx new_alasts = 2 * subtop->alasts + 1;
   4385       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
   4386 						    re_sub_match_last_t *,
   4387 						    new_alasts);
   4388       if (BE (new_array == NULL, 0))
   4389 	return NULL;
   4390       subtop->lasts = new_array;
   4391       subtop->alasts = new_alasts;
   4392     }
   4393   new_entry = calloc (1, sizeof (re_sub_match_last_t));
   4394   if (BE (new_entry != NULL, 1))
   4395     {
   4396       subtop->lasts[subtop->nlasts] = new_entry;
   4397       new_entry->node = node;
   4398       new_entry->str_idx = str_idx;
   4399       ++subtop->nlasts;
   4400     }
   4401   return new_entry;
   4402 }
   4403 
   4404 static void
   4405 internal_function
   4406 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
   4407 	       re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
   4408 {
   4409   sctx->sifted_states = sifted_sts;
   4410   sctx->limited_states = limited_sts;
   4411   sctx->last_node = last_node;
   4412   sctx->last_str_idx = last_str_idx;
   4413   re_node_set_init_empty (&sctx->limits);
   4414 }
   4415