Home | History | Annotate | Download | only in lib
      1 /* -*- buffer-read-only: t -*- vi: set ro: */
      2 /* DO NOT EDIT! GENERATED AUTOMATICALLY! */
      3 /* Extended regular expression matching and search library.
      4    Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
      5    Free Software Foundation, Inc.
      6    This file is part of the GNU C Library.
      7    Contributed by Isamu Hasegawa <isamu (at) yamato.ibm.com>.
      8 
      9    This program is free software; you can redistribute it and/or modify
     10    it under the terms of the GNU General Public License as published by
     11    the Free Software Foundation; either version 3, or (at your option)
     12    any later version.
     13 
     14    This program is distributed in the hope that it will be useful,
     15    but WITHOUT ANY WARRANTY; without even the implied warranty of
     16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17    GNU General Public License for more details.
     18 
     19    You should have received a copy of the GNU General Public License along
     20    with this program; if not, write to the Free Software Foundation,
     21    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
     22 
     23 static void re_string_construct_common (const char *str, Idx len,
     24 					re_string_t *pstr,
     25 					RE_TRANSLATE_TYPE trans, bool icase,
     26 					const re_dfa_t *dfa) internal_function;
     27 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
     28 					  const re_node_set *nodes,
     29 					  re_hashval_t hash) internal_function;
     30 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
     31 					  const re_node_set *nodes,
     32 					  unsigned int context,
     33 					  re_hashval_t hash) internal_function;
     34 
     35 /* Functions for string operation.  */
     37 
     38 /* This function allocate the buffers.  It is necessary to call
     39    re_string_reconstruct before using the object.  */
     40 
     41 static reg_errcode_t
     42 internal_function
     43 re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
     44 		    RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
     45 {
     46   reg_errcode_t ret;
     47   Idx init_buf_len;
     48 
     49   /* Ensure at least one character fits into the buffers.  */
     50   if (init_len < dfa->mb_cur_max)
     51     init_len = dfa->mb_cur_max;
     52   init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
     53   re_string_construct_common (str, len, pstr, trans, icase, dfa);
     54 
     55   ret = re_string_realloc_buffers (pstr, init_buf_len);
     56   if (BE (ret != REG_NOERROR, 0))
     57     return ret;
     58 
     59   pstr->word_char = dfa->word_char;
     60   pstr->word_ops_used = dfa->word_ops_used;
     61   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
     62   pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
     63   pstr->valid_raw_len = pstr->valid_len;
     64   return REG_NOERROR;
     65 }
     66 
     67 /* This function allocate the buffers, and initialize them.  */
     68 
     69 static reg_errcode_t
     70 internal_function
     71 re_string_construct (re_string_t *pstr, const char *str, Idx len,
     72 		     RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
     73 {
     74   reg_errcode_t ret;
     75   memset (pstr, '\0', sizeof (re_string_t));
     76   re_string_construct_common (str, len, pstr, trans, icase, dfa);
     77 
     78   if (len > 0)
     79     {
     80       ret = re_string_realloc_buffers (pstr, len + 1);
     81       if (BE (ret != REG_NOERROR, 0))
     82 	return ret;
     83     }
     84   pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
     85 
     86   if (icase)
     87     {
     88 #ifdef RE_ENABLE_I18N
     89       if (dfa->mb_cur_max > 1)
     90 	{
     91 	  while (1)
     92 	    {
     93 	      ret = build_wcs_upper_buffer (pstr);
     94 	      if (BE (ret != REG_NOERROR, 0))
     95 		return ret;
     96 	      if (pstr->valid_raw_len >= len)
     97 		break;
     98 	      if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
     99 		break;
    100 	      ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
    101 	      if (BE (ret != REG_NOERROR, 0))
    102 		return ret;
    103 	    }
    104 	}
    105       else
    106 #endif /* RE_ENABLE_I18N  */
    107 	build_upper_buffer (pstr);
    108     }
    109   else
    110     {
    111 #ifdef RE_ENABLE_I18N
    112       if (dfa->mb_cur_max > 1)
    113 	build_wcs_buffer (pstr);
    114       else
    115 #endif /* RE_ENABLE_I18N  */
    116 	{
    117 	  if (trans != NULL)
    118 	    re_string_translate_buffer (pstr);
    119 	  else
    120 	    {
    121 	      pstr->valid_len = pstr->bufs_len;
    122 	      pstr->valid_raw_len = pstr->bufs_len;
    123 	    }
    124 	}
    125     }
    126 
    127   return REG_NOERROR;
    128 }
    129 
    130 /* Helper functions for re_string_allocate, and re_string_construct.  */
    131 
    132 static reg_errcode_t
    133 internal_function
    134 re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
    135 {
    136 #ifdef RE_ENABLE_I18N
    137   if (pstr->mb_cur_max > 1)
    138     {
    139       wint_t *new_wcs;
    140 
    141       /* Avoid overflow.  */
    142       size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
    143       if (BE (SIZE_MAX / max_object_size < new_buf_len, 0))
    144 	return REG_ESPACE;
    145 
    146       new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
    147       if (BE (new_wcs == NULL, 0))
    148 	return REG_ESPACE;
    149       pstr->wcs = new_wcs;
    150       if (pstr->offsets != NULL)
    151 	{
    152 	  Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
    153 	  if (BE (new_offsets == NULL, 0))
    154 	    return REG_ESPACE;
    155 	  pstr->offsets = new_offsets;
    156 	}
    157     }
    158 #endif /* RE_ENABLE_I18N  */
    159   if (pstr->mbs_allocated)
    160     {
    161       unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
    162 					   new_buf_len);
    163       if (BE (new_mbs == NULL, 0))
    164 	return REG_ESPACE;
    165       pstr->mbs = new_mbs;
    166     }
    167   pstr->bufs_len = new_buf_len;
    168   return REG_NOERROR;
    169 }
    170 
    171 
    172 static void
    173 internal_function
    174 re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
    175 			    RE_TRANSLATE_TYPE trans, bool icase,
    176 			    const re_dfa_t *dfa)
    177 {
    178   pstr->raw_mbs = (const unsigned char *) str;
    179   pstr->len = len;
    180   pstr->raw_len = len;
    181   pstr->trans = trans;
    182   pstr->icase = icase;
    183   pstr->mbs_allocated = (trans != NULL || icase);
    184   pstr->mb_cur_max = dfa->mb_cur_max;
    185   pstr->is_utf8 = dfa->is_utf8;
    186   pstr->map_notascii = dfa->map_notascii;
    187   pstr->stop = pstr->len;
    188   pstr->raw_stop = pstr->stop;
    189 }
    190 
    191 #ifdef RE_ENABLE_I18N
    192 
    193 /* Build wide character buffer PSTR->WCS.
    194    If the byte sequence of the string are:
    195      <mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
    196    Then wide character buffer will be:
    197      <wc1>   , WEOF    , <wc2>   , WEOF    , <wc3>
    198    We use WEOF for padding, they indicate that the position isn't
    199    a first byte of a multibyte character.
    200 
    201    Note that this function assumes PSTR->VALID_LEN elements are already
    202    built and starts from PSTR->VALID_LEN.  */
    203 
    204 static void
    205 internal_function
    206 build_wcs_buffer (re_string_t *pstr)
    207 {
    208 #ifdef _LIBC
    209   unsigned char buf[MB_LEN_MAX];
    210   assert (MB_LEN_MAX >= pstr->mb_cur_max);
    211 #else
    212   unsigned char buf[64];
    213 #endif
    214   mbstate_t prev_st;
    215   Idx byte_idx, end_idx, remain_len;
    216   size_t mbclen;
    217 
    218   /* Build the buffers from pstr->valid_len to either pstr->len or
    219      pstr->bufs_len.  */
    220   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
    221   for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
    222     {
    223       wchar_t wc;
    224       const char *p;
    225 
    226       remain_len = end_idx - byte_idx;
    227       prev_st = pstr->cur_state;
    228       /* Apply the translation if we need.  */
    229       if (BE (pstr->trans != NULL, 0))
    230 	{
    231 	  int i, ch;
    232 
    233 	  for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
    234 	    {
    235 	      ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
    236 	      buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
    237 	    }
    238 	  p = (const char *) buf;
    239 	}
    240       else
    241 	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
    242       mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
    243       if (BE (mbclen == (size_t) -2, 0))
    244 	{
    245 	  /* The buffer doesn't have enough space, finish to build.  */
    246 	  pstr->cur_state = prev_st;
    247 	  break;
    248 	}
    249       else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
    250 	{
    251 	  /* We treat these cases as a singlebyte character.  */
    252 	  mbclen = 1;
    253 	  wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
    254 	  if (BE (pstr->trans != NULL, 0))
    255 	    wc = pstr->trans[wc];
    256 	  pstr->cur_state = prev_st;
    257 	}
    258 
    259       /* Write wide character and padding.  */
    260       pstr->wcs[byte_idx++] = wc;
    261       /* Write paddings.  */
    262       for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
    263 	pstr->wcs[byte_idx++] = WEOF;
    264     }
    265   pstr->valid_len = byte_idx;
    266   pstr->valid_raw_len = byte_idx;
    267 }
    268 
    269 /* Build wide character buffer PSTR->WCS like build_wcs_buffer,
    270    but for REG_ICASE.  */
    271 
    272 static reg_errcode_t
    273 internal_function
    274 build_wcs_upper_buffer (re_string_t *pstr)
    275 {
    276   mbstate_t prev_st;
    277   Idx src_idx, byte_idx, end_idx, remain_len;
    278   size_t mbclen;
    279 #ifdef _LIBC
    280   char buf[MB_LEN_MAX];
    281   assert (MB_LEN_MAX >= pstr->mb_cur_max);
    282 #else
    283   char buf[64];
    284 #endif
    285 
    286   byte_idx = pstr->valid_len;
    287   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
    288 
    289   /* The following optimization assumes that ASCII characters can be
    290      mapped to wide characters with a simple cast.  */
    291   if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
    292     {
    293       while (byte_idx < end_idx)
    294 	{
    295 	  wchar_t wc;
    296 
    297 	  if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
    298 	      && mbsinit (&pstr->cur_state))
    299 	    {
    300 	      /* In case of a singlebyte character.  */
    301 	      pstr->mbs[byte_idx]
    302 		= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
    303 	      /* The next step uses the assumption that wchar_t is encoded
    304 		 ASCII-safe: all ASCII values can be converted like this.  */
    305 	      pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
    306 	      ++byte_idx;
    307 	      continue;
    308 	    }
    309 
    310 	  remain_len = end_idx - byte_idx;
    311 	  prev_st = pstr->cur_state;
    312 	  mbclen = __mbrtowc (&wc,
    313 			      ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
    314 			       + byte_idx), remain_len, &pstr->cur_state);
    315 	  if (BE (mbclen < (size_t) -2, 1))
    316 	    {
    317 	      wchar_t wcu = wc;
    318 	      if (iswlower (wc))
    319 		{
    320 		  size_t mbcdlen;
    321 
    322 		  wcu = towupper (wc);
    323 		  mbcdlen = wcrtomb (buf, wcu, &prev_st);
    324 		  if (BE (mbclen == mbcdlen, 1))
    325 		    memcpy (pstr->mbs + byte_idx, buf, mbclen);
    326 		  else
    327 		    {
    328 		      src_idx = byte_idx;
    329 		      goto offsets_needed;
    330 		    }
    331 		}
    332 	      else
    333 		memcpy (pstr->mbs + byte_idx,
    334 			pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
    335 	      pstr->wcs[byte_idx++] = wcu;
    336 	      /* Write paddings.  */
    337 	      for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
    338 		pstr->wcs[byte_idx++] = WEOF;
    339 	    }
    340 	  else if (mbclen == (size_t) -1 || mbclen == 0)
    341 	    {
    342 	      /* It is an invalid character or '\0'.  Just use the byte.  */
    343 	      int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
    344 	      pstr->mbs[byte_idx] = ch;
    345 	      /* And also cast it to wide char.  */
    346 	      pstr->wcs[byte_idx++] = (wchar_t) ch;
    347 	      if (BE (mbclen == (size_t) -1, 0))
    348 		pstr->cur_state = prev_st;
    349 	    }
    350 	  else
    351 	    {
    352 	      /* The buffer doesn't have enough space, finish to build.  */
    353 	      pstr->cur_state = prev_st;
    354 	      break;
    355 	    }
    356 	}
    357       pstr->valid_len = byte_idx;
    358       pstr->valid_raw_len = byte_idx;
    359       return REG_NOERROR;
    360     }
    361   else
    362     for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
    363       {
    364 	wchar_t wc;
    365 	const char *p;
    366       offsets_needed:
    367 	remain_len = end_idx - byte_idx;
    368 	prev_st = pstr->cur_state;
    369 	if (BE (pstr->trans != NULL, 0))
    370 	  {
    371 	    int i, ch;
    372 
    373 	    for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
    374 	      {
    375 		ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
    376 		buf[i] = pstr->trans[ch];
    377 	      }
    378 	    p = (const char *) buf;
    379 	  }
    380 	else
    381 	  p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
    382 	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
    383 	if (BE (mbclen < (size_t) -2, 1))
    384 	  {
    385 	    wchar_t wcu = wc;
    386 	    if (iswlower (wc))
    387 	      {
    388 		size_t mbcdlen;
    389 
    390 		wcu = towupper (wc);
    391 		mbcdlen = wcrtomb ((char *) buf, wcu, &prev_st);
    392 		if (BE (mbclen == mbcdlen, 1))
    393 		  memcpy (pstr->mbs + byte_idx, buf, mbclen);
    394 		else if (mbcdlen != (size_t) -1)
    395 		  {
    396 		    size_t i;
    397 
    398 		    if (byte_idx + mbcdlen > pstr->bufs_len)
    399 		      {
    400 			pstr->cur_state = prev_st;
    401 			break;
    402 		      }
    403 
    404 		    if (pstr->offsets == NULL)
    405 		      {
    406 			pstr->offsets = re_malloc (Idx, pstr->bufs_len);
    407 
    408 			if (pstr->offsets == NULL)
    409 			  return REG_ESPACE;
    410 		      }
    411 		    if (!pstr->offsets_needed)
    412 		      {
    413 			for (i = 0; i < (size_t) byte_idx; ++i)
    414 			  pstr->offsets[i] = i;
    415 			pstr->offsets_needed = 1;
    416 		      }
    417 
    418 		    memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
    419 		    pstr->wcs[byte_idx] = wcu;
    420 		    pstr->offsets[byte_idx] = src_idx;
    421 		    for (i = 1; i < mbcdlen; ++i)
    422 		      {
    423 			pstr->offsets[byte_idx + i]
    424 			  = src_idx + (i < mbclen ? i : mbclen - 1);
    425 			pstr->wcs[byte_idx + i] = WEOF;
    426 		      }
    427 		    pstr->len += mbcdlen - mbclen;
    428 		    if (pstr->raw_stop > src_idx)
    429 		      pstr->stop += mbcdlen - mbclen;
    430 		    end_idx = (pstr->bufs_len > pstr->len)
    431 			      ? pstr->len : pstr->bufs_len;
    432 		    byte_idx += mbcdlen;
    433 		    src_idx += mbclen;
    434 		    continue;
    435 		  }
    436                 else
    437                   memcpy (pstr->mbs + byte_idx, p, mbclen);
    438 	      }
    439 	    else
    440 	      memcpy (pstr->mbs + byte_idx, p, mbclen);
    441 
    442 	    if (BE (pstr->offsets_needed != 0, 0))
    443 	      {
    444 		size_t i;
    445 		for (i = 0; i < mbclen; ++i)
    446 		  pstr->offsets[byte_idx + i] = src_idx + i;
    447 	      }
    448 	    src_idx += mbclen;
    449 
    450 	    pstr->wcs[byte_idx++] = wcu;
    451 	    /* Write paddings.  */
    452 	    for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
    453 	      pstr->wcs[byte_idx++] = WEOF;
    454 	  }
    455 	else if (mbclen == (size_t) -1 || mbclen == 0)
    456 	  {
    457 	    /* It is an invalid character or '\0'.  Just use the byte.  */
    458 	    int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
    459 
    460 	    if (BE (pstr->trans != NULL, 0))
    461 	      ch = pstr->trans [ch];
    462 	    pstr->mbs[byte_idx] = ch;
    463 
    464 	    if (BE (pstr->offsets_needed != 0, 0))
    465 	      pstr->offsets[byte_idx] = src_idx;
    466 	    ++src_idx;
    467 
    468 	    /* And also cast it to wide char.  */
    469 	    pstr->wcs[byte_idx++] = (wchar_t) ch;
    470 	    if (BE (mbclen == (size_t) -1, 0))
    471 	      pstr->cur_state = prev_st;
    472 	  }
    473 	else
    474 	  {
    475 	    /* The buffer doesn't have enough space, finish to build.  */
    476 	    pstr->cur_state = prev_st;
    477 	    break;
    478 	  }
    479       }
    480   pstr->valid_len = byte_idx;
    481   pstr->valid_raw_len = src_idx;
    482   return REG_NOERROR;
    483 }
    484 
    485 /* Skip characters until the index becomes greater than NEW_RAW_IDX.
    486    Return the index.  */
    487 
    488 static Idx
    489 internal_function
    490 re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
    491 {
    492   mbstate_t prev_st;
    493   Idx rawbuf_idx;
    494   size_t mbclen;
    495   wint_t wc = WEOF;
    496 
    497   /* Skip the characters which are not necessary to check.  */
    498   for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
    499        rawbuf_idx < new_raw_idx;)
    500     {
    501       wchar_t wc2;
    502       Idx remain_len;
    503       remain_len = pstr->len - rawbuf_idx;
    504       prev_st = pstr->cur_state;
    505       mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
    506 			  remain_len, &pstr->cur_state);
    507       if (BE (mbclen == (size_t) -2 || mbclen == (size_t) -1 || mbclen == 0, 0))
    508 	{
    509 	  /* We treat these cases as a single byte character.  */
    510 	  if (mbclen == 0 || remain_len == 0)
    511 	    wc = L'\0';
    512 	  else
    513 	    wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
    514 	  mbclen = 1;
    515 	  pstr->cur_state = prev_st;
    516 	}
    517       else
    518 	wc = wc2;
    519       /* Then proceed the next character.  */
    520       rawbuf_idx += mbclen;
    521     }
    522   *last_wc = wc;
    523   return rawbuf_idx;
    524 }
    525 #endif /* RE_ENABLE_I18N  */
    526 
    527 /* Build the buffer PSTR->MBS, and apply the translation if we need.
    528    This function is used in case of REG_ICASE.  */
    529 
    530 static void
    531 internal_function
    532 build_upper_buffer (re_string_t *pstr)
    533 {
    534   Idx char_idx, end_idx;
    535   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
    536 
    537   for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
    538     {
    539       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
    540       if (BE (pstr->trans != NULL, 0))
    541 	ch = pstr->trans[ch];
    542       if (islower (ch))
    543 	pstr->mbs[char_idx] = toupper (ch);
    544       else
    545 	pstr->mbs[char_idx] = ch;
    546     }
    547   pstr->valid_len = char_idx;
    548   pstr->valid_raw_len = char_idx;
    549 }
    550 
    551 /* Apply TRANS to the buffer in PSTR.  */
    552 
    553 static void
    554 internal_function
    555 re_string_translate_buffer (re_string_t *pstr)
    556 {
    557   Idx buf_idx, end_idx;
    558   end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
    559 
    560   for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
    561     {
    562       int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
    563       pstr->mbs[buf_idx] = pstr->trans[ch];
    564     }
    565 
    566   pstr->valid_len = buf_idx;
    567   pstr->valid_raw_len = buf_idx;
    568 }
    569 
    570 /* This function re-construct the buffers.
    571    Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
    572    convert to upper case in case of REG_ICASE, apply translation.  */
    573 
    574 static reg_errcode_t
    575 internal_function
    576 re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
    577 {
    578   Idx offset;
    579 
    580   if (BE (pstr->raw_mbs_idx <= idx, 0))
    581     offset = idx - pstr->raw_mbs_idx;
    582   else
    583     {
    584       /* Reset buffer.  */
    585 #ifdef RE_ENABLE_I18N
    586       if (pstr->mb_cur_max > 1)
    587 	memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
    588 #endif /* RE_ENABLE_I18N */
    589       pstr->len = pstr->raw_len;
    590       pstr->stop = pstr->raw_stop;
    591       pstr->valid_len = 0;
    592       pstr->raw_mbs_idx = 0;
    593       pstr->valid_raw_len = 0;
    594       pstr->offsets_needed = 0;
    595       pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
    596 			   : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
    597       if (!pstr->mbs_allocated)
    598 	pstr->mbs = (unsigned char *) pstr->raw_mbs;
    599       offset = idx;
    600     }
    601 
    602   if (BE (offset != 0, 1))
    603     {
    604       /* Should the already checked characters be kept?  */
    605       if (BE (offset < pstr->valid_raw_len, 1))
    606 	{
    607 	  /* Yes, move them to the front of the buffer.  */
    608 #ifdef RE_ENABLE_I18N
    609 	  if (BE (pstr->offsets_needed, 0))
    610 	    {
    611 	      Idx low = 0, high = pstr->valid_len, mid;
    612 	      do
    613 		{
    614 		  mid = (high + low) / 2;
    615 		  if (pstr->offsets[mid] > offset)
    616 		    high = mid;
    617 		  else if (pstr->offsets[mid] < offset)
    618 		    low = mid + 1;
    619 		  else
    620 		    break;
    621 		}
    622 	      while (low < high);
    623 	      if (pstr->offsets[mid] < offset)
    624 		++mid;
    625 	      pstr->tip_context = re_string_context_at (pstr, mid - 1,
    626 							eflags);
    627 	      /* This can be quite complicated, so handle specially
    628 		 only the common and easy case where the character with
    629 		 different length representation of lower and upper
    630 		 case is present at or after offset.  */
    631 	      if (pstr->valid_len > offset
    632 		  && mid == offset && pstr->offsets[mid] == offset)
    633 		{
    634 		  memmove (pstr->wcs, pstr->wcs + offset,
    635 			   (pstr->valid_len - offset) * sizeof (wint_t));
    636 		  memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
    637 		  pstr->valid_len -= offset;
    638 		  pstr->valid_raw_len -= offset;
    639 		  for (low = 0; low < pstr->valid_len; low++)
    640 		    pstr->offsets[low] = pstr->offsets[low + offset] - offset;
    641 		}
    642 	      else
    643 		{
    644 		  /* Otherwise, just find out how long the partial multibyte
    645 		     character at offset is and fill it with WEOF/255.  */
    646 		  pstr->len = pstr->raw_len - idx + offset;
    647 		  pstr->stop = pstr->raw_stop - idx + offset;
    648 		  pstr->offsets_needed = 0;
    649 		  while (mid > 0 && pstr->offsets[mid - 1] == offset)
    650 		    --mid;
    651 		  while (mid < pstr->valid_len)
    652 		    if (pstr->wcs[mid] != WEOF)
    653 		      break;
    654 		    else
    655 		      ++mid;
    656 		  if (mid == pstr->valid_len)
    657 		    pstr->valid_len = 0;
    658 		  else
    659 		    {
    660 		      pstr->valid_len = pstr->offsets[mid] - offset;
    661 		      if (pstr->valid_len)
    662 			{
    663 			  for (low = 0; low < pstr->valid_len; ++low)
    664 			    pstr->wcs[low] = WEOF;
    665 			  memset (pstr->mbs, 255, pstr->valid_len);
    666 			}
    667 		    }
    668 		  pstr->valid_raw_len = pstr->valid_len;
    669 		}
    670 	    }
    671 	  else
    672 #endif
    673 	    {
    674 	      pstr->tip_context = re_string_context_at (pstr, offset - 1,
    675 							eflags);
    676 #ifdef RE_ENABLE_I18N
    677 	      if (pstr->mb_cur_max > 1)
    678 		memmove (pstr->wcs, pstr->wcs + offset,
    679 			 (pstr->valid_len - offset) * sizeof (wint_t));
    680 #endif /* RE_ENABLE_I18N */
    681 	      if (BE (pstr->mbs_allocated, 0))
    682 		memmove (pstr->mbs, pstr->mbs + offset,
    683 			 pstr->valid_len - offset);
    684 	      pstr->valid_len -= offset;
    685 	      pstr->valid_raw_len -= offset;
    686 #if DEBUG
    687 	      assert (pstr->valid_len > 0);
    688 #endif
    689 	    }
    690 	}
    691       else
    692 	{
    693 #ifdef RE_ENABLE_I18N
    694 	  /* No, skip all characters until IDX.  */
    695 	  Idx prev_valid_len = pstr->valid_len;
    696 
    697 	  if (BE (pstr->offsets_needed, 0))
    698 	    {
    699 	      pstr->len = pstr->raw_len - idx + offset;
    700 	      pstr->stop = pstr->raw_stop - idx + offset;
    701 	      pstr->offsets_needed = 0;
    702 	    }
    703 #endif
    704 	  pstr->valid_len = 0;
    705 #ifdef RE_ENABLE_I18N
    706 	  if (pstr->mb_cur_max > 1)
    707 	    {
    708 	      Idx wcs_idx;
    709 	      wint_t wc = WEOF;
    710 
    711 	      if (pstr->is_utf8)
    712 		{
    713 		  const unsigned char *raw, *p, *end;
    714 
    715 		  /* Special case UTF-8.  Multi-byte chars start with any
    716 		     byte other than 0x80 - 0xbf.  */
    717 		  raw = pstr->raw_mbs + pstr->raw_mbs_idx;
    718 		  end = raw + (offset - pstr->mb_cur_max);
    719 		  if (end < pstr->raw_mbs)
    720 		    end = pstr->raw_mbs;
    721 		  p = raw + offset - 1;
    722 #ifdef _LIBC
    723 		  /* We know the wchar_t encoding is UCS4, so for the simple
    724 		     case, ASCII characters, skip the conversion step.  */
    725 		  if (isascii (*p) && BE (pstr->trans == NULL, 1))
    726 		    {
    727 		      memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
    728 		      /* pstr->valid_len = 0; */
    729 		      wc = (wchar_t) *p;
    730 		    }
    731 		  else
    732 #endif
    733 		    for (; p >= end; --p)
    734 		      if ((*p & 0xc0) != 0x80)
    735 			{
    736 			  mbstate_t cur_state;
    737 			  wchar_t wc2;
    738 			  Idx mlen = raw + pstr->len - p;
    739 			  unsigned char buf[6];
    740 			  size_t mbclen;
    741 
    742 			  if (BE (pstr->trans != NULL, 0))
    743 			    {
    744 			      int i = mlen < 6 ? mlen : 6;
    745 			      while (--i >= 0)
    746 				buf[i] = pstr->trans[p[i]];
    747 			    }
    748 			  /* XXX Don't use mbrtowc, we know which conversion
    749 			     to use (UTF-8 -> UCS4).  */
    750 			  memset (&cur_state, 0, sizeof (cur_state));
    751 			  mbclen = __mbrtowc (&wc2, (const char *) p, mlen,
    752 					      &cur_state);
    753 			  if (raw + offset - p <= mbclen
    754 			      && mbclen < (size_t) -2)
    755 			    {
    756 			      memset (&pstr->cur_state, '\0',
    757 				      sizeof (mbstate_t));
    758 			      pstr->valid_len = mbclen - (raw + offset - p);
    759 			      wc = wc2;
    760 			    }
    761 			  break;
    762 			}
    763 		}
    764 
    765 	      if (wc == WEOF)
    766 		pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
    767 	      if (wc == WEOF)
    768 		pstr->tip_context
    769 		  = re_string_context_at (pstr, prev_valid_len - 1, eflags);
    770 	      else
    771 		pstr->tip_context = ((BE (pstr->word_ops_used != 0, 0)
    772 				      && IS_WIDE_WORD_CHAR (wc))
    773 				     ? CONTEXT_WORD
    774 				     : ((IS_WIDE_NEWLINE (wc)
    775 					 && pstr->newline_anchor)
    776 					? CONTEXT_NEWLINE : 0));
    777 	      if (BE (pstr->valid_len, 0))
    778 		{
    779 		  for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
    780 		    pstr->wcs[wcs_idx] = WEOF;
    781 		  if (pstr->mbs_allocated)
    782 		    memset (pstr->mbs, 255, pstr->valid_len);
    783 		}
    784 	      pstr->valid_raw_len = pstr->valid_len;
    785 	    }
    786 	  else
    787 #endif /* RE_ENABLE_I18N */
    788 	    {
    789 	      int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
    790 	      pstr->valid_raw_len = 0;
    791 	      if (pstr->trans)
    792 		c = pstr->trans[c];
    793 	      pstr->tip_context = (bitset_contain (pstr->word_char, c)
    794 				   ? CONTEXT_WORD
    795 				   : ((IS_NEWLINE (c) && pstr->newline_anchor)
    796 				      ? CONTEXT_NEWLINE : 0));
    797 	    }
    798 	}
    799       if (!BE (pstr->mbs_allocated, 0))
    800 	pstr->mbs += offset;
    801     }
    802   pstr->raw_mbs_idx = idx;
    803   pstr->len -= offset;
    804   pstr->stop -= offset;
    805 
    806   /* Then build the buffers.  */
    807 #ifdef RE_ENABLE_I18N
    808   if (pstr->mb_cur_max > 1)
    809     {
    810       if (pstr->icase)
    811 	{
    812 	  reg_errcode_t ret = build_wcs_upper_buffer (pstr);
    813 	  if (BE (ret != REG_NOERROR, 0))
    814 	    return ret;
    815 	}
    816       else
    817 	build_wcs_buffer (pstr);
    818     }
    819   else
    820 #endif /* RE_ENABLE_I18N */
    821     if (BE (pstr->mbs_allocated, 0))
    822       {
    823 	if (pstr->icase)
    824 	  build_upper_buffer (pstr);
    825 	else if (pstr->trans != NULL)
    826 	  re_string_translate_buffer (pstr);
    827       }
    828     else
    829       pstr->valid_len = pstr->len;
    830 
    831   pstr->cur_idx = 0;
    832   return REG_NOERROR;
    833 }
    834 
    835 static unsigned char
    836 internal_function __attribute ((pure))
    837 re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
    838 {
    839   int ch;
    840   Idx off;
    841 
    842   /* Handle the common (easiest) cases first.  */
    843   if (BE (!pstr->mbs_allocated, 1))
    844     return re_string_peek_byte (pstr, idx);
    845 
    846 #ifdef RE_ENABLE_I18N
    847   if (pstr->mb_cur_max > 1
    848       && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
    849     return re_string_peek_byte (pstr, idx);
    850 #endif
    851 
    852   off = pstr->cur_idx + idx;
    853 #ifdef RE_ENABLE_I18N
    854   if (pstr->offsets_needed)
    855     off = pstr->offsets[off];
    856 #endif
    857 
    858   ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
    859 
    860 #ifdef RE_ENABLE_I18N
    861   /* Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I
    862      this function returns CAPITAL LETTER I instead of first byte of
    863      DOTLESS SMALL LETTER I.  The latter would confuse the parser,
    864      since peek_byte_case doesn't advance cur_idx in any way.  */
    865   if (pstr->offsets_needed && !isascii (ch))
    866     return re_string_peek_byte (pstr, idx);
    867 #endif
    868 
    869   return ch;
    870 }
    871 
    872 static unsigned char
    873 internal_function __attribute ((pure))
    874 re_string_fetch_byte_case (re_string_t *pstr)
    875 {
    876   if (BE (!pstr->mbs_allocated, 1))
    877     return re_string_fetch_byte (pstr);
    878 
    879 #ifdef RE_ENABLE_I18N
    880   if (pstr->offsets_needed)
    881     {
    882       Idx off;
    883       int ch;
    884 
    885       /* For tr_TR.UTF-8 [[:islower:]] there is
    886 	 [[: CAPITAL LETTER I WITH DOT lower:]] in mbs.  Skip
    887 	 in that case the whole multi-byte character and return
    888 	 the original letter.  On the other side, with
    889 	 [[: DOTLESS SMALL LETTER I return [[:I, as doing
    890 	 anything else would complicate things too much.  */
    891 
    892       if (!re_string_first_byte (pstr, pstr->cur_idx))
    893 	return re_string_fetch_byte (pstr);
    894 
    895       off = pstr->offsets[pstr->cur_idx];
    896       ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
    897 
    898       if (! isascii (ch))
    899 	return re_string_fetch_byte (pstr);
    900 
    901       re_string_skip_bytes (pstr,
    902 			    re_string_char_size_at (pstr, pstr->cur_idx));
    903       return ch;
    904     }
    905 #endif
    906 
    907   return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
    908 }
    909 
    910 static void
    911 internal_function
    912 re_string_destruct (re_string_t *pstr)
    913 {
    914 #ifdef RE_ENABLE_I18N
    915   re_free (pstr->wcs);
    916   re_free (pstr->offsets);
    917 #endif /* RE_ENABLE_I18N  */
    918   if (pstr->mbs_allocated)
    919     re_free (pstr->mbs);
    920 }
    921 
    922 /* Return the context at IDX in INPUT.  */
    923 
    924 static unsigned int
    925 internal_function
    926 re_string_context_at (const re_string_t *input, Idx idx, int eflags)
    927 {
    928   int c;
    929   if (BE (! REG_VALID_INDEX (idx), 0))
    930     /* In this case, we use the value stored in input->tip_context,
    931        since we can't know the character in input->mbs[-1] here.  */
    932     return input->tip_context;
    933   if (BE (idx == input->len, 0))
    934     return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
    935 	    : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
    936 #ifdef RE_ENABLE_I18N
    937   if (input->mb_cur_max > 1)
    938     {
    939       wint_t wc;
    940       Idx wc_idx = idx;
    941       while(input->wcs[wc_idx] == WEOF)
    942 	{
    943 #ifdef DEBUG
    944 	  /* It must not happen.  */
    945 	  assert (REG_VALID_INDEX (wc_idx));
    946 #endif
    947 	  --wc_idx;
    948 	  if (! REG_VALID_INDEX (wc_idx))
    949 	    return input->tip_context;
    950 	}
    951       wc = input->wcs[wc_idx];
    952       if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc))
    953 	return CONTEXT_WORD;
    954       return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
    955 	      ? CONTEXT_NEWLINE : 0);
    956     }
    957   else
    958 #endif
    959     {
    960       c = re_string_byte_at (input, idx);
    961       if (bitset_contain (input->word_char, c))
    962 	return CONTEXT_WORD;
    963       return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
    964     }
    965 }
    966 
    967 /* Functions for set operation.  */
    969 
    970 static reg_errcode_t
    971 internal_function
    972 re_node_set_alloc (re_node_set *set, Idx size)
    973 {
    974   set->alloc = size;
    975   set->nelem = 0;
    976   set->elems = re_malloc (Idx, size);
    977   if (BE (set->elems == NULL, 0))
    978     return REG_ESPACE;
    979   return REG_NOERROR;
    980 }
    981 
    982 static reg_errcode_t
    983 internal_function
    984 re_node_set_init_1 (re_node_set *set, Idx elem)
    985 {
    986   set->alloc = 1;
    987   set->nelem = 1;
    988   set->elems = re_malloc (Idx, 1);
    989   if (BE (set->elems == NULL, 0))
    990     {
    991       set->alloc = set->nelem = 0;
    992       return REG_ESPACE;
    993     }
    994   set->elems[0] = elem;
    995   return REG_NOERROR;
    996 }
    997 
    998 static reg_errcode_t
    999 internal_function
   1000 re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
   1001 {
   1002   set->alloc = 2;
   1003   set->elems = re_malloc (Idx, 2);
   1004   if (BE (set->elems == NULL, 0))
   1005     return REG_ESPACE;
   1006   if (elem1 == elem2)
   1007     {
   1008       set->nelem = 1;
   1009       set->elems[0] = elem1;
   1010     }
   1011   else
   1012     {
   1013       set->nelem = 2;
   1014       if (elem1 < elem2)
   1015 	{
   1016 	  set->elems[0] = elem1;
   1017 	  set->elems[1] = elem2;
   1018 	}
   1019       else
   1020 	{
   1021 	  set->elems[0] = elem2;
   1022 	  set->elems[1] = elem1;
   1023 	}
   1024     }
   1025   return REG_NOERROR;
   1026 }
   1027 
   1028 static reg_errcode_t
   1029 internal_function
   1030 re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
   1031 {
   1032   dest->nelem = src->nelem;
   1033   if (src->nelem > 0)
   1034     {
   1035       dest->alloc = dest->nelem;
   1036       dest->elems = re_malloc (Idx, dest->alloc);
   1037       if (BE (dest->elems == NULL, 0))
   1038 	{
   1039 	  dest->alloc = dest->nelem = 0;
   1040 	  return REG_ESPACE;
   1041 	}
   1042       memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
   1043     }
   1044   else
   1045     re_node_set_init_empty (dest);
   1046   return REG_NOERROR;
   1047 }
   1048 
   1049 /* Calculate the intersection of the sets SRC1 and SRC2. And merge it to
   1050    DEST. Return value indicate the error code or REG_NOERROR if succeeded.
   1051    Note: We assume dest->elems is NULL, when dest->alloc is 0.  */
   1052 
   1053 static reg_errcode_t
   1054 internal_function
   1055 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
   1056 			   const re_node_set *src2)
   1057 {
   1058   Idx i1, i2, is, id, delta, sbase;
   1059   if (src1->nelem == 0 || src2->nelem == 0)
   1060     return REG_NOERROR;
   1061 
   1062   /* We need dest->nelem + 2 * elems_in_intersection; this is a
   1063      conservative estimate.  */
   1064   if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
   1065     {
   1066       Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
   1067       Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
   1068       if (BE (new_elems == NULL, 0))
   1069         return REG_ESPACE;
   1070       dest->elems = new_elems;
   1071       dest->alloc = new_alloc;
   1072     }
   1073 
   1074   /* Find the items in the intersection of SRC1 and SRC2, and copy
   1075      into the top of DEST those that are not already in DEST itself.  */
   1076   sbase = dest->nelem + src1->nelem + src2->nelem;
   1077   i1 = src1->nelem - 1;
   1078   i2 = src2->nelem - 1;
   1079   id = dest->nelem - 1;
   1080   for (;;)
   1081     {
   1082       if (src1->elems[i1] == src2->elems[i2])
   1083 	{
   1084 	  /* Try to find the item in DEST.  Maybe we could binary search?  */
   1085 	  while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1])
   1086 	    --id;
   1087 
   1088           if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1])
   1089             dest->elems[--sbase] = src1->elems[i1];
   1090 
   1091 	  if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2))
   1092 	    break;
   1093 	}
   1094 
   1095       /* Lower the highest of the two items.  */
   1096       else if (src1->elems[i1] < src2->elems[i2])
   1097 	{
   1098 	  if (! REG_VALID_INDEX (--i2))
   1099 	    break;
   1100 	}
   1101       else
   1102 	{
   1103 	  if (! REG_VALID_INDEX (--i1))
   1104 	    break;
   1105 	}
   1106     }
   1107 
   1108   id = dest->nelem - 1;
   1109   is = dest->nelem + src1->nelem + src2->nelem - 1;
   1110   delta = is - sbase + 1;
   1111 
   1112   /* Now copy.  When DELTA becomes zero, the remaining
   1113      DEST elements are already in place; this is more or
   1114      less the same loop that is in re_node_set_merge.  */
   1115   dest->nelem += delta;
   1116   if (delta > 0 && REG_VALID_INDEX (id))
   1117     for (;;)
   1118       {
   1119         if (dest->elems[is] > dest->elems[id])
   1120           {
   1121             /* Copy from the top.  */
   1122             dest->elems[id + delta--] = dest->elems[is--];
   1123             if (delta == 0)
   1124               break;
   1125           }
   1126         else
   1127           {
   1128             /* Slide from the bottom.  */
   1129             dest->elems[id + delta] = dest->elems[id];
   1130             if (! REG_VALID_INDEX (--id))
   1131               break;
   1132           }
   1133       }
   1134 
   1135   /* Copy remaining SRC elements.  */
   1136   memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
   1137 
   1138   return REG_NOERROR;
   1139 }
   1140 
   1141 /* Calculate the union set of the sets SRC1 and SRC2. And store it to
   1142    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
   1143 
   1144 static reg_errcode_t
   1145 internal_function
   1146 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
   1147 			const re_node_set *src2)
   1148 {
   1149   Idx i1, i2, id;
   1150   if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
   1151     {
   1152       dest->alloc = src1->nelem + src2->nelem;
   1153       dest->elems = re_malloc (Idx, dest->alloc);
   1154       if (BE (dest->elems == NULL, 0))
   1155 	return REG_ESPACE;
   1156     }
   1157   else
   1158     {
   1159       if (src1 != NULL && src1->nelem > 0)
   1160 	return re_node_set_init_copy (dest, src1);
   1161       else if (src2 != NULL && src2->nelem > 0)
   1162 	return re_node_set_init_copy (dest, src2);
   1163       else
   1164 	re_node_set_init_empty (dest);
   1165       return REG_NOERROR;
   1166     }
   1167   for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
   1168     {
   1169       if (src1->elems[i1] > src2->elems[i2])
   1170 	{
   1171 	  dest->elems[id++] = src2->elems[i2++];
   1172 	  continue;
   1173 	}
   1174       if (src1->elems[i1] == src2->elems[i2])
   1175 	++i2;
   1176       dest->elems[id++] = src1->elems[i1++];
   1177     }
   1178   if (i1 < src1->nelem)
   1179     {
   1180       memcpy (dest->elems + id, src1->elems + i1,
   1181 	     (src1->nelem - i1) * sizeof (Idx));
   1182       id += src1->nelem - i1;
   1183     }
   1184   else if (i2 < src2->nelem)
   1185     {
   1186       memcpy (dest->elems + id, src2->elems + i2,
   1187 	     (src2->nelem - i2) * sizeof (Idx));
   1188       id += src2->nelem - i2;
   1189     }
   1190   dest->nelem = id;
   1191   return REG_NOERROR;
   1192 }
   1193 
   1194 /* Calculate the union set of the sets DEST and SRC. And store it to
   1195    DEST. Return value indicate the error code or REG_NOERROR if succeeded.  */
   1196 
   1197 static reg_errcode_t
   1198 internal_function
   1199 re_node_set_merge (re_node_set *dest, const re_node_set *src)
   1200 {
   1201   Idx is, id, sbase, delta;
   1202   if (src == NULL || src->nelem == 0)
   1203     return REG_NOERROR;
   1204   if (dest->alloc < 2 * src->nelem + dest->nelem)
   1205     {
   1206       Idx new_alloc = 2 * (src->nelem + dest->alloc);
   1207       Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
   1208       if (BE (new_buffer == NULL, 0))
   1209 	return REG_ESPACE;
   1210       dest->elems = new_buffer;
   1211       dest->alloc = new_alloc;
   1212     }
   1213 
   1214   if (BE (dest->nelem == 0, 0))
   1215     {
   1216       dest->nelem = src->nelem;
   1217       memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
   1218       return REG_NOERROR;
   1219     }
   1220 
   1221   /* Copy into the top of DEST the items of SRC that are not
   1222      found in DEST.  Maybe we could binary search in DEST?  */
   1223   for (sbase = dest->nelem + 2 * src->nelem,
   1224        is = src->nelem - 1, id = dest->nelem - 1;
   1225        REG_VALID_INDEX (is) && REG_VALID_INDEX (id); )
   1226     {
   1227       if (dest->elems[id] == src->elems[is])
   1228         is--, id--;
   1229       else if (dest->elems[id] < src->elems[is])
   1230         dest->elems[--sbase] = src->elems[is--];
   1231       else /* if (dest->elems[id] > src->elems[is]) */
   1232         --id;
   1233     }
   1234 
   1235   if (REG_VALID_INDEX (is))
   1236     {
   1237       /* If DEST is exhausted, the remaining items of SRC must be unique.  */
   1238       sbase -= is + 1;
   1239       memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
   1240     }
   1241 
   1242   id = dest->nelem - 1;
   1243   is = dest->nelem + 2 * src->nelem - 1;
   1244   delta = is - sbase + 1;
   1245   if (delta == 0)
   1246     return REG_NOERROR;
   1247 
   1248   /* Now copy.  When DELTA becomes zero, the remaining
   1249      DEST elements are already in place.  */
   1250   dest->nelem += delta;
   1251   for (;;)
   1252     {
   1253       if (dest->elems[is] > dest->elems[id])
   1254         {
   1255 	  /* Copy from the top.  */
   1256           dest->elems[id + delta--] = dest->elems[is--];
   1257 	  if (delta == 0)
   1258 	    break;
   1259 	}
   1260       else
   1261         {
   1262           /* Slide from the bottom.  */
   1263           dest->elems[id + delta] = dest->elems[id];
   1264 	  if (! REG_VALID_INDEX (--id))
   1265 	    {
   1266 	      /* Copy remaining SRC elements.  */
   1267 	      memcpy (dest->elems, dest->elems + sbase,
   1268 	              delta * sizeof (Idx));
   1269 	      break;
   1270 	    }
   1271 	}
   1272     }
   1273 
   1274   return REG_NOERROR;
   1275 }
   1276 
   1277 /* Insert the new element ELEM to the re_node_set* SET.
   1278    SET should not already have ELEM.
   1279    Return true if successful.  */
   1280 
   1281 static bool
   1282 internal_function
   1283 re_node_set_insert (re_node_set *set, Idx elem)
   1284 {
   1285   Idx idx;
   1286   /* In case the set is empty.  */
   1287   if (set->alloc == 0)
   1288     return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1);
   1289 
   1290   if (BE (set->nelem, 0) == 0)
   1291     {
   1292       /* We already guaranteed above that set->alloc != 0.  */
   1293       set->elems[0] = elem;
   1294       ++set->nelem;
   1295       return true;
   1296     }
   1297 
   1298   /* Realloc if we need.  */
   1299   if (set->alloc == set->nelem)
   1300     {
   1301       Idx *new_elems;
   1302       set->alloc = set->alloc * 2;
   1303       new_elems = re_realloc (set->elems, Idx, set->alloc);
   1304       if (BE (new_elems == NULL, 0))
   1305 	return false;
   1306       set->elems = new_elems;
   1307     }
   1308 
   1309   /* Move the elements which follows the new element.  Test the
   1310      first element separately to skip a check in the inner loop.  */
   1311   if (elem < set->elems[0])
   1312     {
   1313       idx = 0;
   1314       for (idx = set->nelem; idx > 0; idx--)
   1315         set->elems[idx] = set->elems[idx - 1];
   1316     }
   1317   else
   1318     {
   1319       for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
   1320         set->elems[idx] = set->elems[idx - 1];
   1321     }
   1322 
   1323   /* Insert the new element.  */
   1324   set->elems[idx] = elem;
   1325   ++set->nelem;
   1326   return true;
   1327 }
   1328 
   1329 /* Insert the new element ELEM to the re_node_set* SET.
   1330    SET should not already have any element greater than or equal to ELEM.
   1331    Return true if successful.  */
   1332 
   1333 static bool
   1334 internal_function
   1335 re_node_set_insert_last (re_node_set *set, Idx elem)
   1336 {
   1337   /* Realloc if we need.  */
   1338   if (set->alloc == set->nelem)
   1339     {
   1340       Idx *new_elems;
   1341       set->alloc = (set->alloc + 1) * 2;
   1342       new_elems = re_realloc (set->elems, Idx, set->alloc);
   1343       if (BE (new_elems == NULL, 0))
   1344 	return false;
   1345       set->elems = new_elems;
   1346     }
   1347 
   1348   /* Insert the new element.  */
   1349   set->elems[set->nelem++] = elem;
   1350   return true;
   1351 }
   1352 
   1353 /* Compare two node sets SET1 and SET2.
   1354    Return true if SET1 and SET2 are equivalent.  */
   1355 
   1356 static bool
   1357 internal_function __attribute ((pure))
   1358 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
   1359 {
   1360   Idx i;
   1361   if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
   1362     return false;
   1363   for (i = set1->nelem ; REG_VALID_INDEX (--i) ; )
   1364     if (set1->elems[i] != set2->elems[i])
   1365       return false;
   1366   return true;
   1367 }
   1368 
   1369 /* Return (idx + 1) if SET contains the element ELEM, return 0 otherwise.  */
   1370 
   1371 static Idx
   1372 internal_function __attribute ((pure))
   1373 re_node_set_contains (const re_node_set *set, Idx elem)
   1374 {
   1375   __re_size_t idx, right, mid;
   1376   if (! REG_VALID_NONZERO_INDEX (set->nelem))
   1377     return 0;
   1378 
   1379   /* Binary search the element.  */
   1380   idx = 0;
   1381   right = set->nelem - 1;
   1382   while (idx < right)
   1383     {
   1384       mid = (idx + right) / 2;
   1385       if (set->elems[mid] < elem)
   1386 	idx = mid + 1;
   1387       else
   1388 	right = mid;
   1389     }
   1390   return set->elems[idx] == elem ? idx + 1 : 0;
   1391 }
   1392 
   1393 static void
   1394 internal_function
   1395 re_node_set_remove_at (re_node_set *set, Idx idx)
   1396 {
   1397   if (idx < 0 || idx >= set->nelem)
   1398     return;
   1399   --set->nelem;
   1400   for (; idx < set->nelem; idx++)
   1401     set->elems[idx] = set->elems[idx + 1];
   1402 }
   1403 
   1404 
   1406 /* Add the token TOKEN to dfa->nodes, and return the index of the token.
   1407    Or return REG_MISSING if an error occurred.  */
   1408 
   1409 static Idx
   1410 internal_function
   1411 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
   1412 {
   1413   if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
   1414     {
   1415       size_t new_nodes_alloc = dfa->nodes_alloc * 2;
   1416       Idx *new_nexts, *new_indices;
   1417       re_node_set *new_edests, *new_eclosures;
   1418       re_token_t *new_nodes;
   1419       size_t max_object_size =
   1420 	MAX (sizeof (re_token_t),
   1421 	     MAX (sizeof (re_node_set),
   1422 		  sizeof (Idx)));
   1423 
   1424       /* Avoid overflows.  */
   1425       if (BE (SIZE_MAX / 2 / max_object_size < dfa->nodes_alloc, 0))
   1426 	return REG_MISSING;
   1427 
   1428       new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
   1429       if (BE (new_nodes == NULL, 0))
   1430 	return REG_MISSING;
   1431       dfa->nodes = new_nodes;
   1432       new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
   1433       new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
   1434       new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
   1435       new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
   1436       if (BE (new_nexts == NULL || new_indices == NULL
   1437 	      || new_edests == NULL || new_eclosures == NULL, 0))
   1438 	return REG_MISSING;
   1439       dfa->nexts = new_nexts;
   1440       dfa->org_indices = new_indices;
   1441       dfa->edests = new_edests;
   1442       dfa->eclosures = new_eclosures;
   1443       dfa->nodes_alloc = new_nodes_alloc;
   1444     }
   1445   dfa->nodes[dfa->nodes_len] = token;
   1446   dfa->nodes[dfa->nodes_len].constraint = 0;
   1447 #ifdef RE_ENABLE_I18N
   1448   {
   1449   int type = token.type;
   1450   dfa->nodes[dfa->nodes_len].accept_mb =
   1451     (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
   1452   }
   1453 #endif
   1454   dfa->nexts[dfa->nodes_len] = REG_MISSING;
   1455   re_node_set_init_empty (dfa->edests + dfa->nodes_len);
   1456   re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
   1457   return dfa->nodes_len++;
   1458 }
   1459 
   1460 static inline re_hashval_t
   1461 internal_function
   1462 calc_state_hash (const re_node_set *nodes, unsigned int context)
   1463 {
   1464   re_hashval_t hash = nodes->nelem + context;
   1465   Idx i;
   1466   for (i = 0 ; i < nodes->nelem ; i++)
   1467     hash += nodes->elems[i];
   1468   return hash;
   1469 }
   1470 
   1471 /* Search for the state whose node_set is equivalent to NODES.
   1472    Return the pointer to the state, if we found it in the DFA.
   1473    Otherwise create the new one and return it.  In case of an error
   1474    return NULL and set the error code in ERR.
   1475    Note: - We assume NULL as the invalid state, then it is possible that
   1476 	   return value is NULL and ERR is REG_NOERROR.
   1477 	 - We never return non-NULL value in case of any errors, it is for
   1478 	   optimization.  */
   1479 
   1480 static re_dfastate_t *
   1481 internal_function
   1482 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
   1483 		  const re_node_set *nodes)
   1484 {
   1485   re_hashval_t hash;
   1486   re_dfastate_t *new_state;
   1487   struct re_state_table_entry *spot;
   1488   Idx i;
   1489 #ifdef lint
   1490   /* Suppress bogus uninitialized-variable warnings.  */
   1491   *err = REG_NOERROR;
   1492 #endif
   1493   if (BE (nodes->nelem == 0, 0))
   1494     {
   1495       *err = REG_NOERROR;
   1496       return NULL;
   1497     }
   1498   hash = calc_state_hash (nodes, 0);
   1499   spot = dfa->state_table + (hash & dfa->state_hash_mask);
   1500 
   1501   for (i = 0 ; i < spot->num ; i++)
   1502     {
   1503       re_dfastate_t *state = spot->array[i];
   1504       if (hash != state->hash)
   1505 	continue;
   1506       if (re_node_set_compare (&state->nodes, nodes))
   1507 	return state;
   1508     }
   1509 
   1510   /* There are no appropriate state in the dfa, create the new one.  */
   1511   new_state = create_ci_newstate (dfa, nodes, hash);
   1512   if (BE (new_state == NULL, 0))
   1513     *err = REG_ESPACE;
   1514 
   1515   return new_state;
   1516 }
   1517 
   1518 /* Search for the state whose node_set is equivalent to NODES and
   1519    whose context is equivalent to CONTEXT.
   1520    Return the pointer to the state, if we found it in the DFA.
   1521    Otherwise create the new one and return it.  In case of an error
   1522    return NULL and set the error code in ERR.
   1523    Note: - We assume NULL as the invalid state, then it is possible that
   1524 	   return value is NULL and ERR is REG_NOERROR.
   1525 	 - We never return non-NULL value in case of any errors, it is for
   1526 	   optimization.  */
   1527 
   1528 static re_dfastate_t *
   1529 internal_function
   1530 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
   1531 			  const re_node_set *nodes, unsigned int context)
   1532 {
   1533   re_hashval_t hash;
   1534   re_dfastate_t *new_state;
   1535   struct re_state_table_entry *spot;
   1536   Idx i;
   1537 #ifdef lint
   1538   /* Suppress bogus uninitialized-variable warnings.  */
   1539   *err = REG_NOERROR;
   1540 #endif
   1541   if (nodes->nelem == 0)
   1542     {
   1543       *err = REG_NOERROR;
   1544       return NULL;
   1545     }
   1546   hash = calc_state_hash (nodes, context);
   1547   spot = dfa->state_table + (hash & dfa->state_hash_mask);
   1548 
   1549   for (i = 0 ; i < spot->num ; i++)
   1550     {
   1551       re_dfastate_t *state = spot->array[i];
   1552       if (state->hash == hash
   1553 	  && state->context == context
   1554 	  && re_node_set_compare (state->entrance_nodes, nodes))
   1555 	return state;
   1556     }
   1557   /* There are no appropriate state in `dfa', create the new one.  */
   1558   new_state = create_cd_newstate (dfa, nodes, context, hash);
   1559   if (BE (new_state == NULL, 0))
   1560     *err = REG_ESPACE;
   1561 
   1562   return new_state;
   1563 }
   1564 
   1565 /* Finish initialization of the new state NEWSTATE, and using its hash value
   1566    HASH put in the appropriate bucket of DFA's state table.  Return value
   1567    indicates the error code if failed.  */
   1568 
   1569 static reg_errcode_t
   1570 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
   1571 		re_hashval_t hash)
   1572 {
   1573   struct re_state_table_entry *spot;
   1574   reg_errcode_t err;
   1575   Idx i;
   1576 
   1577   newstate->hash = hash;
   1578   err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
   1579   if (BE (err != REG_NOERROR, 0))
   1580     return REG_ESPACE;
   1581   for (i = 0; i < newstate->nodes.nelem; i++)
   1582     {
   1583       Idx elem = newstate->nodes.elems[i];
   1584       if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
   1585 	if (BE (! re_node_set_insert_last (&newstate->non_eps_nodes, elem), 0))
   1586 	  return REG_ESPACE;
   1587     }
   1588 
   1589   spot = dfa->state_table + (hash & dfa->state_hash_mask);
   1590   if (BE (spot->alloc <= spot->num, 0))
   1591     {
   1592       Idx new_alloc = 2 * spot->num + 2;
   1593       re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
   1594 					      new_alloc);
   1595       if (BE (new_array == NULL, 0))
   1596 	return REG_ESPACE;
   1597       spot->array = new_array;
   1598       spot->alloc = new_alloc;
   1599     }
   1600   spot->array[spot->num++] = newstate;
   1601   return REG_NOERROR;
   1602 }
   1603 
   1604 static void
   1605 free_state (re_dfastate_t *state)
   1606 {
   1607   re_node_set_free (&state->non_eps_nodes);
   1608   re_node_set_free (&state->inveclosure);
   1609   if (state->entrance_nodes != &state->nodes)
   1610     {
   1611       re_node_set_free (state->entrance_nodes);
   1612       re_free (state->entrance_nodes);
   1613     }
   1614   re_node_set_free (&state->nodes);
   1615   re_free (state->word_trtable);
   1616   re_free (state->trtable);
   1617   re_free (state);
   1618 }
   1619 
   1620 /* Create the new state which is independ of contexts.
   1621    Return the new state if succeeded, otherwise return NULL.  */
   1622 
   1623 static re_dfastate_t *
   1624 internal_function
   1625 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
   1626 		    re_hashval_t hash)
   1627 {
   1628   Idx i;
   1629   reg_errcode_t err;
   1630   re_dfastate_t *newstate;
   1631 
   1632   newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
   1633   if (BE (newstate == NULL, 0))
   1634     return NULL;
   1635   err = re_node_set_init_copy (&newstate->nodes, nodes);
   1636   if (BE (err != REG_NOERROR, 0))
   1637     {
   1638       re_free (newstate);
   1639       return NULL;
   1640     }
   1641 
   1642   newstate->entrance_nodes = &newstate->nodes;
   1643   for (i = 0 ; i < nodes->nelem ; i++)
   1644     {
   1645       re_token_t *node = dfa->nodes + nodes->elems[i];
   1646       re_token_type_t type = node->type;
   1647       if (type == CHARACTER && !node->constraint)
   1648 	continue;
   1649 #ifdef RE_ENABLE_I18N
   1650       newstate->accept_mb |= node->accept_mb;
   1651 #endif /* RE_ENABLE_I18N */
   1652 
   1653       /* If the state has the halt node, the state is a halt state.  */
   1654       if (type == END_OF_RE)
   1655 	newstate->halt = 1;
   1656       else if (type == OP_BACK_REF)
   1657 	newstate->has_backref = 1;
   1658       else if (type == ANCHOR || node->constraint)
   1659 	newstate->has_constraint = 1;
   1660     }
   1661   err = register_state (dfa, newstate, hash);
   1662   if (BE (err != REG_NOERROR, 0))
   1663     {
   1664       free_state (newstate);
   1665       newstate = NULL;
   1666     }
   1667   return newstate;
   1668 }
   1669 
   1670 /* Create the new state which is depend on the context CONTEXT.
   1671    Return the new state if succeeded, otherwise return NULL.  */
   1672 
   1673 static re_dfastate_t *
   1674 internal_function
   1675 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
   1676 		    unsigned int context, re_hashval_t hash)
   1677 {
   1678   Idx i, nctx_nodes = 0;
   1679   reg_errcode_t err;
   1680   re_dfastate_t *newstate;
   1681 
   1682   newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
   1683   if (BE (newstate == NULL, 0))
   1684     return NULL;
   1685   err = re_node_set_init_copy (&newstate->nodes, nodes);
   1686   if (BE (err != REG_NOERROR, 0))
   1687     {
   1688       re_free (newstate);
   1689       return NULL;
   1690     }
   1691 
   1692   newstate->context = context;
   1693   newstate->entrance_nodes = &newstate->nodes;
   1694 
   1695   for (i = 0 ; i < nodes->nelem ; i++)
   1696     {
   1697       re_token_t *node = dfa->nodes + nodes->elems[i];
   1698       re_token_type_t type = node->type;
   1699       unsigned int constraint = node->constraint;
   1700 
   1701       if (type == CHARACTER && !constraint)
   1702 	continue;
   1703 #ifdef RE_ENABLE_I18N
   1704       newstate->accept_mb |= node->accept_mb;
   1705 #endif /* RE_ENABLE_I18N */
   1706 
   1707       /* If the state has the halt node, the state is a halt state.  */
   1708       if (type == END_OF_RE)
   1709 	newstate->halt = 1;
   1710       else if (type == OP_BACK_REF)
   1711 	newstate->has_backref = 1;
   1712 
   1713       if (constraint)
   1714 	{
   1715 	  if (newstate->entrance_nodes == &newstate->nodes)
   1716 	    {
   1717 	      newstate->entrance_nodes = re_malloc (re_node_set, 1);
   1718 	      if (BE (newstate->entrance_nodes == NULL, 0))
   1719 		{
   1720 		  free_state (newstate);
   1721 		  return NULL;
   1722 		}
   1723 	      re_node_set_init_copy (newstate->entrance_nodes, nodes);
   1724 	      nctx_nodes = 0;
   1725 	      newstate->has_constraint = 1;
   1726 	    }
   1727 
   1728 	  if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
   1729 	    {
   1730 	      re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
   1731 	      ++nctx_nodes;
   1732 	    }
   1733 	}
   1734     }
   1735   err = register_state (dfa, newstate, hash);
   1736   if (BE (err != REG_NOERROR, 0))
   1737     {
   1738       free_state (newstate);
   1739       newstate = NULL;
   1740     }
   1741   return  newstate;
   1742 }
   1743