Home | History | Annotate | Download | only in bits
      1 // class template regex -*- C++ -*-
      2 
      3 // Copyright (C) 2010-2013 Free Software Foundation, Inc.
      4 //
      5 // This file is part of the GNU ISO C++ Library.  This library is free
      6 // software; you can redistribute it and/or modify it under the
      7 // terms of the GNU General Public License as published by the
      8 // Free Software Foundation; either version 3, or (at your option)
      9 // any later version.
     10 
     11 // This library is distributed in the hope that it will be useful,
     12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 // GNU General Public License for more details.
     15 
     16 // Under Section 7 of GPL version 3, you are granted additional
     17 // permissions described in the GCC Runtime Library Exception, version
     18 // 3.1, as published by the Free Software Foundation.
     19 
     20 // You should have received a copy of the GNU General Public License and
     21 // a copy of the GCC Runtime Library Exception along with this program;
     22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     23 // <http://www.gnu.org/licenses/>.
     24 
     25 /**
     26  *  @file bits/regex_compiler.h
     27  *  This is an internal header file, included by other library headers.
     28  *  Do not attempt to use it directly. @headername{regex}
     29  */
     30 
     31 namespace std _GLIBCXX_VISIBILITY(default)
     32 {
     33 namespace __detail
     34 {
     35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
     36 
     37   /**
     38    * @addtogroup regex-detail
     39    * @{
     40    */
     41 
     42   /// Base class for scanner.
     43   struct _Scanner_base
     44   {
     45     typedef unsigned int _StateT;
     46 
     47     static constexpr _StateT _S_state_at_start    = 1 << 0;
     48     static constexpr _StateT _S_state_in_brace    = 1 << 2;
     49     static constexpr _StateT _S_state_in_bracket  = 1 << 3;
     50 
     51     virtual ~_Scanner_base() { };
     52   };
     53 
     54   /**
     55    * @brief struct _Scanner. Scans an input range for regex tokens.
     56    *
     57    * The %_Scanner class interprets the regular expression pattern in
     58    * the input range passed to its constructor as a sequence of parse
     59    * tokens passed to the regular expression compiler.  The sequence
     60    * of tokens provided depends on the flag settings passed to the
     61    * constructor: different regular expression grammars will interpret
     62    * the same input pattern in syntactically different ways.
     63    */
     64   template<typename _InputIterator>
     65     class _Scanner: public _Scanner_base
     66     {
     67     public:
     68       typedef _InputIterator                                        _IteratorT;
     69       typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
     70       typedef std::basic_string<_CharT>                             _StringT;
     71       typedef regex_constants::syntax_option_type                   _FlagT;
     72       typedef const std::ctype<_CharT>                              _CtypeT;
     73 
     74       /// Token types returned from the scanner.
     75       enum _TokenT
     76       {
     77 	_S_token_anychar,
     78 	_S_token_backref,
     79 	_S_token_bracket_begin,
     80 	_S_token_bracket_end,
     81 	_S_token_inverse_class,
     82 	_S_token_char_class_name,
     83 	_S_token_closure0,
     84 	_S_token_closure1,
     85 	_S_token_collelem_multi,
     86 	_S_token_collelem_single,
     87 	_S_token_collsymbol,
     88 	_S_token_comma,
     89 	_S_token_dash,
     90 	_S_token_dup_count,
     91 	_S_token_eof,
     92 	_S_token_equiv_class_name,
     93 	_S_token_interval_begin,
     94 	_S_token_interval_end,
     95 	_S_token_line_begin,
     96 	_S_token_line_end,
     97 	_S_token_opt,
     98 	_S_token_or,
     99 	_S_token_ord_char,
    100 	_S_token_quoted_char,
    101 	_S_token_subexpr_begin,
    102 	_S_token_subexpr_end,
    103 	_S_token_word_begin,
    104 	_S_token_word_end,
    105 	_S_token_unknown
    106       };
    107 
    108       _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
    109 	       std::locale __loc)
    110       : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
    111         _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
    112       { _M_advance(); }
    113 
    114       void
    115       _M_advance();
    116 
    117       _TokenT
    118       _M_token() const
    119       { return _M_curToken; }
    120 
    121       const _StringT&
    122       _M_value() const
    123       { return _M_curValue; }
    124 
    125 #ifdef _GLIBCXX_DEBUG
    126       std::ostream&
    127       _M_print(std::ostream&);
    128 #endif
    129 
    130     private:
    131       void
    132       _M_eat_escape();
    133 
    134       void
    135       _M_scan_in_brace();
    136 
    137       void
    138       _M_scan_in_bracket();
    139 
    140       void
    141       _M_eat_charclass();
    142 
    143       void
    144       _M_eat_equivclass();
    145 
    146       void
    147       _M_eat_collsymbol();
    148 
    149       _IteratorT  _M_current;
    150       _IteratorT  _M_end;
    151       _FlagT      _M_flags;
    152       _CtypeT&    _M_ctype;
    153       _TokenT     _M_curToken;
    154       _StringT    _M_curValue;
    155       _StateT     _M_state;
    156     };
    157 
    158   template<typename _InputIterator>
    159     void
    160     _Scanner<_InputIterator>::
    161     _M_advance()
    162     {
    163       if (_M_current == _M_end)
    164 	{
    165 	  _M_curToken = _S_token_eof;
    166 	  return;
    167 	}
    168 
    169       _CharT __c = *_M_current;
    170       if (_M_state & _S_state_in_bracket)
    171 	{
    172 	  _M_scan_in_bracket();
    173 	  return;
    174 	}
    175       if (_M_state & _S_state_in_brace)
    176 	{
    177 	  _M_scan_in_brace();
    178 	  return;
    179 	}
    180 #if 0
    181       // TODO: re-enable line anchors when _M_assertion is implemented.
    182       // See PR libstdc++/47724
    183       else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
    184 	{
    185 	  _M_curToken = _S_token_line_begin;
    186 	  ++_M_current;
    187 	  return;
    188 	}
    189       else if (__c == _M_ctype.widen('$'))
    190 	{
    191 	  _M_curToken = _S_token_line_end;
    192 	  ++_M_current;
    193 	  return;
    194 	}
    195 #endif
    196       else if (__c == _M_ctype.widen('.'))
    197 	{
    198 	  _M_curToken = _S_token_anychar;
    199 	  ++_M_current;
    200 	  return;
    201 	}
    202       else if (__c == _M_ctype.widen('*'))
    203 	{
    204 	  _M_curToken = _S_token_closure0;
    205 	  ++_M_current;
    206 	  return;
    207 	}
    208       else if (__c == _M_ctype.widen('+'))
    209 	{
    210 	  _M_curToken = _S_token_closure1;
    211 	  ++_M_current;
    212 	  return;
    213 	}
    214       else if (__c == _M_ctype.widen('|'))
    215 	{
    216 	  _M_curToken = _S_token_or;
    217 	  ++_M_current;
    218 	  return;
    219 	}
    220       else if (__c == _M_ctype.widen('['))
    221 	{
    222 	  _M_curToken = _S_token_bracket_begin;
    223 	  _M_state |= (_S_state_in_bracket | _S_state_at_start);
    224 	  ++_M_current;
    225 	  return;
    226 	}
    227       else if (__c == _M_ctype.widen('\\'))
    228 	{
    229 	  _M_eat_escape();
    230 	  return;
    231 	}
    232       else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
    233 	{
    234 	  if (__c == _M_ctype.widen('('))
    235 	    {
    236 	      _M_curToken = _S_token_subexpr_begin;
    237 	      ++_M_current;
    238 	      return;
    239 	    }
    240 	  else if (__c == _M_ctype.widen(')'))
    241 	    {
    242 	      _M_curToken = _S_token_subexpr_end;
    243 	      ++_M_current;
    244 	      return;
    245 	    }
    246 	  else if (__c == _M_ctype.widen('{'))
    247 	    {
    248 	      _M_curToken = _S_token_interval_begin;
    249 	      _M_state |= _S_state_in_brace;
    250 	      ++_M_current;
    251 	      return;
    252 	    }
    253 	}
    254 
    255       _M_curToken = _S_token_ord_char;
    256       _M_curValue.assign(1, __c);
    257       ++_M_current;
    258     }
    259 
    260 
    261   template<typename _InputIterator>
    262     void
    263     _Scanner<_InputIterator>::
    264     _M_scan_in_brace()
    265     {
    266       if (_M_ctype.is(_CtypeT::digit, *_M_current))
    267 	{
    268 	  _M_curToken = _S_token_dup_count;
    269 	  _M_curValue.assign(1, *_M_current);
    270 	  ++_M_current;
    271 	  while (_M_current != _M_end
    272 		 && _M_ctype.is(_CtypeT::digit, *_M_current))
    273 	    {
    274 	      _M_curValue += *_M_current;
    275 	      ++_M_current;
    276 	    }
    277 	  return;
    278 	}
    279       else if (*_M_current == _M_ctype.widen(','))
    280 	{
    281 	  _M_curToken = _S_token_comma;
    282 	  ++_M_current;
    283 	  return;
    284 	}
    285       if (_M_flags & (regex_constants::basic | regex_constants::grep))
    286 	{
    287 	  if (*_M_current == _M_ctype.widen('\\'))
    288 	    _M_eat_escape();
    289 	}
    290       else
    291 	{
    292 	  if (*_M_current == _M_ctype.widen('}'))
    293 	    {
    294 	      _M_curToken = _S_token_interval_end;
    295 	      _M_state &= ~_S_state_in_brace;
    296 	      ++_M_current;
    297 	      return;
    298 	    }
    299 	}
    300     }
    301 
    302   template<typename _InputIterator>
    303     void
    304     _Scanner<_InputIterator>::
    305     _M_scan_in_bracket()
    306     {
    307       if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
    308 	{
    309 	  _M_curToken = _S_token_inverse_class;
    310 	  _M_state &= ~_S_state_at_start;
    311 	  ++_M_current;
    312 	  return;
    313 	}
    314       else if (*_M_current == _M_ctype.widen('['))
    315 	{
    316 	  ++_M_current;
    317 	  if (_M_current == _M_end)
    318 	    {
    319 	      _M_curToken = _S_token_eof;
    320 	      return;
    321 	    }
    322 
    323 	  if (*_M_current == _M_ctype.widen('.'))
    324 	    {
    325 	      _M_curToken = _S_token_collsymbol;
    326 	      _M_eat_collsymbol();
    327 	      return;
    328 	    }
    329 	  else if (*_M_current == _M_ctype.widen(':'))
    330 	    {
    331 	      _M_curToken = _S_token_char_class_name;
    332 	      _M_eat_charclass();
    333 	      return;
    334 	    }
    335 	  else if (*_M_current == _M_ctype.widen('='))
    336 	    {
    337 	      _M_curToken = _S_token_equiv_class_name;
    338 	      _M_eat_equivclass();
    339 	      return;
    340 	    }
    341 	}
    342       else if (*_M_current == _M_ctype.widen('-'))
    343 	{
    344 	  _M_curToken = _S_token_dash;
    345 	  ++_M_current;
    346 	  return;
    347 	}
    348       else if (*_M_current == _M_ctype.widen(']'))
    349 	{
    350 	  if (!(_M_flags & regex_constants::ECMAScript)
    351 	      || !(_M_state & _S_state_at_start))
    352 	    {
    353 	      // special case: only if  _not_ chr first after
    354 	      // '[' or '[^' and if not ECMAscript
    355 	      _M_curToken = _S_token_bracket_end;
    356 	      ++_M_current;
    357 	      return;
    358 	    }
    359 	}
    360       _M_curToken = _S_token_collelem_single;
    361       _M_curValue.assign(1, *_M_current);
    362       ++_M_current;
    363     }
    364 
    365   template<typename _InputIterator>
    366     void
    367     _Scanner<_InputIterator>::
    368     _M_eat_escape()
    369     {
    370       ++_M_current;
    371       if (_M_current == _M_end)
    372 	{
    373 	  _M_curToken = _S_token_eof;
    374 	  return;
    375 	}
    376       _CharT __c = *_M_current;
    377       ++_M_current;
    378 
    379       if (__c == _M_ctype.widen('('))
    380 	{
    381 	  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
    382 	    {
    383 	      _M_curToken = _S_token_ord_char;
    384 	      _M_curValue.assign(1, __c);
    385 	    }
    386 	  else
    387 	    _M_curToken = _S_token_subexpr_begin;
    388 	}
    389       else if (__c == _M_ctype.widen(')'))
    390 	{
    391 	  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
    392 	    {
    393 	      _M_curToken = _S_token_ord_char;
    394 	      _M_curValue.assign(1, __c);
    395 	    }
    396 	  else
    397 	    _M_curToken = _S_token_subexpr_end;
    398 	}
    399       else if (__c == _M_ctype.widen('{'))
    400 	{
    401 	  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
    402 	    {
    403 	      _M_curToken = _S_token_ord_char;
    404 	      _M_curValue.assign(1, __c);
    405 	    }
    406 	  else
    407 	    {
    408 	      _M_curToken = _S_token_interval_begin;
    409 	      _M_state |= _S_state_in_brace;
    410 	    }
    411 	}
    412       else if (__c == _M_ctype.widen('}'))
    413 	{
    414 	  if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
    415 	    {
    416 	      _M_curToken = _S_token_ord_char;
    417 	      _M_curValue.assign(1, __c);
    418 	    }
    419 	  else
    420 	    {
    421 	      if (!(_M_state && _S_state_in_brace))
    422 		__throw_regex_error(regex_constants::error_badbrace);
    423 	      _M_state &= ~_S_state_in_brace;
    424 	      _M_curToken = _S_token_interval_end;
    425 	    }
    426 	}
    427       else if (__c == _M_ctype.widen('x'))
    428 	{
    429 	  ++_M_current;
    430 	  if (_M_current == _M_end)
    431 	    {
    432 	      _M_curToken = _S_token_eof;
    433 	      return;
    434 	    }
    435 	  if (_M_ctype.is(_CtypeT::digit, *_M_current))
    436 	    {
    437 	      _M_curValue.assign(1, *_M_current);
    438 	      ++_M_current;
    439 	      if (_M_current == _M_end)
    440 		{
    441 		  _M_curToken = _S_token_eof;
    442 		  return;
    443 		}
    444 	      if (_M_ctype.is(_CtypeT::digit, *_M_current))
    445 		{
    446 		  _M_curValue += *_M_current;
    447 		  ++_M_current;
    448 		  return;
    449 		}
    450 	    }
    451 	}
    452       else if (__c == _M_ctype.widen('^')
    453 	       || __c == _M_ctype.widen('.')
    454 	       || __c == _M_ctype.widen('*')
    455 	       || __c == _M_ctype.widen('$')
    456 	       || __c == _M_ctype.widen('\\'))
    457 	{
    458 	  _M_curToken = _S_token_ord_char;
    459 	  _M_curValue.assign(1, __c);
    460 	}
    461       else if (_M_ctype.is(_CtypeT::digit, __c))
    462 	{
    463 	  _M_curToken = _S_token_backref;
    464 	  _M_curValue.assign(1, __c);
    465 	}
    466       else
    467 	__throw_regex_error(regex_constants::error_escape);
    468     }
    469 
    470 
    471   // Eats a character class or throwns an exception.
    472   // current point to ':' delimiter on entry, char after ']' on return
    473   template<typename _InputIterator>
    474     void
    475     _Scanner<_InputIterator>::
    476     _M_eat_charclass()
    477     {
    478       ++_M_current; // skip ':'
    479       if (_M_current == _M_end)
    480 	__throw_regex_error(regex_constants::error_ctype);
    481       for (_M_curValue.clear();
    482 	   _M_current != _M_end && *_M_current != _M_ctype.widen(':');
    483 	   ++_M_current)
    484 	_M_curValue += *_M_current;
    485       if (_M_current == _M_end)
    486 	__throw_regex_error(regex_constants::error_ctype);
    487       ++_M_current; // skip ':'
    488       if (*_M_current != _M_ctype.widen(']'))
    489 	__throw_regex_error(regex_constants::error_ctype);
    490       ++_M_current; // skip ']'
    491     }
    492 
    493 
    494   template<typename _InputIterator>
    495     void
    496     _Scanner<_InputIterator>::
    497     _M_eat_equivclass()
    498     {
    499       ++_M_current; // skip '='
    500       if (_M_current == _M_end)
    501 	__throw_regex_error(regex_constants::error_collate);
    502       for (_M_curValue.clear();
    503 	   _M_current != _M_end && *_M_current != _M_ctype.widen('=');
    504 	   ++_M_current)
    505 	_M_curValue += *_M_current;
    506       if (_M_current == _M_end)
    507 	__throw_regex_error(regex_constants::error_collate);
    508       ++_M_current; // skip '='
    509       if (*_M_current != _M_ctype.widen(']'))
    510 	__throw_regex_error(regex_constants::error_collate);
    511       ++_M_current; // skip ']'
    512     }
    513 
    514 
    515   template<typename _InputIterator>
    516     void
    517     _Scanner<_InputIterator>::
    518     _M_eat_collsymbol()
    519     {
    520       ++_M_current; // skip '.'
    521       if (_M_current == _M_end)
    522 	__throw_regex_error(regex_constants::error_collate);
    523       for (_M_curValue.clear();
    524 	   _M_current != _M_end && *_M_current != _M_ctype.widen('.');
    525 	   ++_M_current)
    526 	_M_curValue += *_M_current;
    527       if (_M_current == _M_end)
    528 	__throw_regex_error(regex_constants::error_collate);
    529       ++_M_current; // skip '.'
    530       if (*_M_current != _M_ctype.widen(']'))
    531 	__throw_regex_error(regex_constants::error_collate);
    532       ++_M_current; // skip ']'
    533     }
    534 
    535 #ifdef _GLIBCXX_DEBUG
    536   template<typename _InputIterator>
    537     std::ostream&
    538     _Scanner<_InputIterator>::
    539     _M_print(std::ostream& ostr)
    540     {
    541       switch (_M_curToken)
    542       {
    543 	case _S_token_anychar:
    544 	  ostr << "any-character\n";
    545 	  break;
    546 	case _S_token_backref:
    547 	  ostr << "backref\n";
    548 	  break;
    549 	case _S_token_bracket_begin:
    550 	  ostr << "bracket-begin\n";
    551 	  break;
    552 	case _S_token_bracket_end:
    553 	  ostr << "bracket-end\n";
    554 	  break;
    555 	case _S_token_char_class_name:
    556 	  ostr << "char-class-name \"" << _M_curValue << "\"\n";
    557 	  break;
    558 	case _S_token_closure0:
    559 	  ostr << "closure0\n";
    560 	  break;
    561 	case _S_token_closure1:
    562 	  ostr << "closure1\n";
    563 	  break;
    564 	case _S_token_collelem_multi:
    565 	  ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
    566 	  break;
    567 	case _S_token_collelem_single:
    568 	  ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
    569 	  break;
    570 	case _S_token_collsymbol:
    571 	  ostr << "collsymbol \"" << _M_curValue << "\"\n";
    572 	  break;
    573 	case _S_token_comma:
    574 	  ostr << "comma\n";
    575 	  break;
    576 	case _S_token_dash:
    577 	  ostr << "dash\n";
    578 	  break;
    579 	case _S_token_dup_count:
    580 	  ostr << "dup count: " << _M_curValue << "\n";
    581 	  break;
    582 	case _S_token_eof:
    583 	  ostr << "EOF\n";
    584 	  break;
    585 	case _S_token_equiv_class_name:
    586 	  ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
    587 	  break;
    588 	case _S_token_interval_begin:
    589 	  ostr << "interval begin\n";
    590 	  break;
    591 	case _S_token_interval_end:
    592 	  ostr << "interval end\n";
    593 	  break;
    594 	case _S_token_line_begin:
    595 	  ostr << "line begin\n";
    596 	  break;
    597 	case _S_token_line_end:
    598 	  ostr << "line end\n";
    599 	  break;
    600 	case _S_token_opt:
    601 	  ostr << "opt\n";
    602 	  break;
    603 	case _S_token_or:
    604 	  ostr << "or\n";
    605 	  break;
    606 	case _S_token_ord_char:
    607 	  ostr << "ordinary character: \"" << _M_value() << "\"\n";
    608 	  break;
    609 	case _S_token_quoted_char:
    610 	  ostr << "quoted char\n";
    611 	  break;
    612 	case _S_token_subexpr_begin:
    613 	  ostr << "subexpr begin\n";
    614 	  break;
    615 	case _S_token_subexpr_end:
    616 	  ostr << "subexpr end\n";
    617 	  break;
    618 	case _S_token_word_begin:
    619 	  ostr << "word begin\n";
    620 	  break;
    621 	case _S_token_word_end:
    622 	  ostr << "word end\n";
    623 	  break;
    624 	case _S_token_unknown:
    625 	  ostr << "-- unknown token --\n";
    626 	  break;
    627       }
    628       return ostr;
    629     }
    630 #endif
    631 
    632   /// Builds an NFA from an input iterator interval.
    633   template<typename _InIter, typename _TraitsT>
    634     class _Compiler
    635     {
    636     public:
    637       typedef _InIter                                            _IterT;
    638       typedef typename std::iterator_traits<_InIter>::value_type _CharT;
    639       typedef std::basic_string<_CharT>                          _StringT;
    640       typedef regex_constants::syntax_option_type                _FlagT;
    641 
    642       _Compiler(const _InIter& __b, const _InIter& __e,
    643 		_TraitsT& __traits, _FlagT __flags);
    644 
    645       const _Nfa&
    646       _M_nfa() const
    647       { return _M_state_store; }
    648 
    649     private:
    650       typedef _Scanner<_InIter>                              _ScannerT;
    651       typedef typename _ScannerT::_TokenT                    _TokenT;
    652       typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
    653       typedef _RangeMatcher<_InIter, _TraitsT>               _RMatcherT;
    654 
    655       // accepts a specific token or returns false.
    656       bool
    657       _M_match_token(_TokenT __token);
    658 
    659       void
    660       _M_disjunction();
    661 
    662       bool
    663       _M_alternative();
    664 
    665       bool
    666       _M_term();
    667 
    668       bool
    669       _M_assertion();
    670 
    671       bool
    672       _M_quantifier();
    673 
    674       bool
    675       _M_atom();
    676 
    677       bool
    678       _M_bracket_expression();
    679 
    680       bool
    681       _M_bracket_list(_RMatcherT& __matcher);
    682 
    683       bool
    684       _M_follow_list(_RMatcherT& __matcher);
    685 
    686       bool
    687       _M_follow_list2(_RMatcherT& __matcher);
    688 
    689       bool
    690       _M_expression_term(_RMatcherT& __matcher);
    691 
    692       bool
    693       _M_range_expression(_RMatcherT& __matcher);
    694 
    695       bool
    696       _M_start_range(_RMatcherT& __matcher);
    697 
    698       bool
    699       _M_collating_symbol(_RMatcherT& __matcher);
    700 
    701       bool
    702       _M_equivalence_class(_RMatcherT& __matcher);
    703 
    704       bool
    705       _M_character_class(_RMatcherT& __matcher);
    706 
    707       int
    708       _M_cur_int_value(int __radix);
    709 
    710       _TraitsT&      _M_traits;
    711       _ScannerT      _M_scanner;
    712       _StringT       _M_cur_value;
    713       _Nfa           _M_state_store;
    714       _StackT        _M_stack;
    715     };
    716 
    717   template<typename _InIter, typename _TraitsT>
    718     _Compiler<_InIter, _TraitsT>::
    719     _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
    720 	      _Compiler<_InIter, _TraitsT>::_FlagT __flags)
    721     : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
    722       _M_state_store(__flags)
    723     {
    724       typedef _StartTagger<_InIter, _TraitsT> _Start;
    725       typedef _EndTagger<_InIter, _TraitsT> _End;
    726 
    727       _StateSeq __r(_M_state_store,
    728       		    _M_state_store._M_insert_subexpr_begin(_Start(0)));
    729       _M_disjunction();
    730       if (!_M_stack.empty())
    731 	{
    732 	  __r._M_append(_M_stack.top());
    733 	  _M_stack.pop();
    734 	}
    735       __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
    736       __r._M_append(_M_state_store._M_insert_accept());
    737     }
    738 
    739   template<typename _InIter, typename _TraitsT>
    740     bool
    741     _Compiler<_InIter, _TraitsT>::
    742     _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
    743     {
    744       if (token == _M_scanner._M_token())
    745 	{
    746 	  _M_cur_value = _M_scanner._M_value();
    747 	  _M_scanner._M_advance();
    748 	  return true;
    749 	}
    750       return false;
    751     }
    752 
    753   template<typename _InIter, typename _TraitsT>
    754     void
    755     _Compiler<_InIter, _TraitsT>::
    756     _M_disjunction()
    757     {
    758       this->_M_alternative();
    759       if (_M_match_token(_ScannerT::_S_token_or))
    760 	{
    761 	  _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
    762 	  this->_M_disjunction();
    763 	  _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
    764 	  _M_stack.push(_StateSeq(__alt1, __alt2));
    765 	}
    766     }
    767 
    768   template<typename _InIter, typename _TraitsT>
    769     bool
    770     _Compiler<_InIter, _TraitsT>::
    771     _M_alternative()
    772     {
    773       if (this->_M_term())
    774 	{
    775 	  _StateSeq __re = _M_stack.top(); _M_stack.pop();
    776 	  this->_M_alternative();
    777 	  if (!_M_stack.empty())
    778 	    {
    779 	      __re._M_append(_M_stack.top());
    780 	      _M_stack.pop();
    781 	    }
    782 	  _M_stack.push(__re);
    783 	  return true;
    784 	}
    785       return false;
    786     }
    787 
    788   template<typename _InIter, typename _TraitsT>
    789     bool
    790     _Compiler<_InIter, _TraitsT>::
    791     _M_term()
    792     {
    793       if (this->_M_assertion())
    794 	return true;
    795       if (this->_M_atom())
    796 	{
    797 	  this->_M_quantifier();
    798 	  return true;
    799 	}
    800       return false;
    801     }
    802 
    803   template<typename _InIter, typename _TraitsT>
    804     bool
    805     _Compiler<_InIter, _TraitsT>::
    806     _M_assertion()
    807     {
    808       if (_M_match_token(_ScannerT::_S_token_line_begin))
    809 	{
    810 	  // __m.push(_Matcher::_S_opcode_line_begin);
    811 	  return true;
    812 	}
    813       if (_M_match_token(_ScannerT::_S_token_line_end))
    814 	{
    815 	  // __m.push(_Matcher::_S_opcode_line_end);
    816 	  return true;
    817 	}
    818       if (_M_match_token(_ScannerT::_S_token_word_begin))
    819 	{
    820 	  // __m.push(_Matcher::_S_opcode_word_begin);
    821 	  return true;
    822 	}
    823       if (_M_match_token(_ScannerT::_S_token_word_end))
    824 	{
    825 	  // __m.push(_Matcher::_S_opcode_word_end);
    826 	  return true;
    827 	}
    828       return false;
    829     }
    830 
    831   template<typename _InIter, typename _TraitsT>
    832     bool
    833     _Compiler<_InIter, _TraitsT>::
    834     _M_quantifier()
    835     {
    836       if (_M_match_token(_ScannerT::_S_token_closure0))
    837 	{
    838 	  if (_M_stack.empty())
    839 	    __throw_regex_error(regex_constants::error_badrepeat);
    840 	  _StateSeq __r(_M_stack.top(), -1);
    841 	  __r._M_append(__r._M_front());
    842 	  _M_stack.pop();
    843 	  _M_stack.push(__r);
    844 	  return true;
    845 	}
    846       if (_M_match_token(_ScannerT::_S_token_closure1))
    847 	{
    848 	  if (_M_stack.empty())
    849 	    __throw_regex_error(regex_constants::error_badrepeat);
    850 	  _StateSeq __r(_M_state_store,
    851 			_M_state_store.
    852 			_M_insert_alt(_S_invalid_state_id,
    853 				      _M_stack.top()._M_front()));
    854 	  _M_stack.top()._M_append(__r);
    855 	  return true;
    856 	}
    857       if (_M_match_token(_ScannerT::_S_token_opt))
    858 	{
    859 	  if (_M_stack.empty())
    860 	  __throw_regex_error(regex_constants::error_badrepeat);
    861 	  _StateSeq __r(_M_stack.top(), -1);
    862 	  _M_stack.pop();
    863 	  _M_stack.push(__r);
    864 	  return true;
    865 	}
    866       if (_M_match_token(_ScannerT::_S_token_interval_begin))
    867 	{
    868 	  if (_M_stack.empty())
    869 	    __throw_regex_error(regex_constants::error_badrepeat);
    870 	  if (!_M_match_token(_ScannerT::_S_token_dup_count))
    871 	    __throw_regex_error(regex_constants::error_badbrace);
    872 	  _StateSeq __r(_M_stack.top());
    873 	  int __min_rep = _M_cur_int_value(10);
    874 	  for (int __i = 1; __i < __min_rep; ++__i)
    875 	    _M_stack.top()._M_append(__r._M_clone());
    876 	  if (_M_match_token(_ScannerT::_S_token_comma))
    877 	    if (_M_match_token(_ScannerT::_S_token_dup_count))
    878 	      {
    879 		int __n = _M_cur_int_value(10) - __min_rep;
    880 		if (__n < 0)
    881 		  __throw_regex_error(regex_constants::error_badbrace);
    882 		for (int __i = 0; __i < __n; ++__i)
    883 		  {
    884 		    _StateSeq __r(_M_state_store,
    885 				  _M_state_store.
    886 				  _M_insert_alt(_S_invalid_state_id,
    887 						_M_stack.top()._M_front()));
    888 		    _M_stack.top()._M_append(__r);
    889 		  }
    890 	      }
    891 	    else
    892 	      {
    893 		_StateSeq __r(_M_stack.top(), -1);
    894 		__r._M_push_back(__r._M_front());
    895 		_M_stack.pop();
    896 		_M_stack.push(__r);
    897 	      }
    898 	  if (!_M_match_token(_ScannerT::_S_token_interval_end))
    899 	    __throw_regex_error(regex_constants::error_brace);
    900 	  return true;
    901 	}
    902       return false;
    903     }
    904 
    905   template<typename _InIter, typename _TraitsT>
    906     bool
    907     _Compiler<_InIter, _TraitsT>::
    908     _M_atom()
    909     {
    910       typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
    911       typedef _StartTagger<_InIter, _TraitsT> _Start;
    912       typedef _EndTagger<_InIter, _TraitsT> _End;
    913 
    914       if (_M_match_token(_ScannerT::_S_token_anychar))
    915 	{
    916 	  _M_stack.push(_StateSeq(_M_state_store,
    917                                   _M_state_store._M_insert_matcher
    918                                   (_AnyMatcher)));
    919 	  return true;
    920 	}
    921       if (_M_match_token(_ScannerT::_S_token_ord_char))
    922 	{
    923 	  _M_stack.push(_StateSeq(_M_state_store,
    924                                   _M_state_store._M_insert_matcher
    925                                   (_CMatcher(_M_cur_value[0], _M_traits))));
    926 	  return true;
    927 	}
    928       if (_M_match_token(_ScannerT::_S_token_quoted_char))
    929 	{
    930 	  // note that in the ECMA grammar, this case covers backrefs.
    931 	  _M_stack.push(_StateSeq(_M_state_store,
    932 				  _M_state_store._M_insert_matcher
    933 				  (_CMatcher(_M_cur_value[0], _M_traits))));
    934 	  return true;
    935 	}
    936       if (_M_match_token(_ScannerT::_S_token_backref))
    937 	{
    938 	  // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
    939 	  return true;
    940 	}
    941       if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
    942 	{
    943 	  int __mark = _M_state_store._M_sub_count();
    944 	  _StateSeq __r(_M_state_store,
    945 			_M_state_store.
    946 			_M_insert_subexpr_begin(_Start(__mark)));
    947 	  this->_M_disjunction();
    948 	  if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
    949 	    __throw_regex_error(regex_constants::error_paren);
    950 	  if (!_M_stack.empty())
    951 	    {
    952 	      __r._M_append(_M_stack.top());
    953 	      _M_stack.pop();
    954 	    }
    955 	  __r._M_append(_M_state_store._M_insert_subexpr_end
    956 			(__mark, _End(__mark)));
    957 	  _M_stack.push(__r);
    958 	  return true;
    959 	}
    960       return _M_bracket_expression();
    961     }
    962 
    963   template<typename _InIter, typename _TraitsT>
    964     bool
    965     _Compiler<_InIter, _TraitsT>::
    966     _M_bracket_expression()
    967     {
    968       if (_M_match_token(_ScannerT::_S_token_bracket_begin))
    969 	{
    970 	  _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
    971 			       _M_traits);
    972 	  if (!_M_bracket_list(__matcher)
    973 	      || !_M_match_token(_ScannerT::_S_token_bracket_end))
    974 	    __throw_regex_error(regex_constants::error_brack);
    975 	  _M_stack.push(_StateSeq(_M_state_store,
    976 				  _M_state_store._M_insert_matcher(__matcher)));
    977 	  return true;
    978 	}
    979       return false;
    980     }
    981 
    982   // If the dash is the last character in the bracket expression, it is not
    983   // special.
    984   template<typename _InIter, typename _TraitsT>
    985     bool
    986     _Compiler<_InIter, _TraitsT>::
    987     _M_bracket_list(_RMatcherT& __matcher)
    988     {
    989       if (_M_follow_list(__matcher))
    990 	{
    991 	  if (_M_match_token(_ScannerT::_S_token_dash))
    992 	    __matcher._M_add_char(_M_cur_value[0]);
    993 	  return true;
    994 	}
    995       return false;
    996     }
    997 
    998   template<typename _InIter, typename _TraitsT>
    999     bool
   1000     _Compiler<_InIter, _TraitsT>::
   1001     _M_follow_list(_RMatcherT& __matcher)
   1002     { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
   1003 
   1004   template<typename _InIter, typename _TraitsT>
   1005     bool
   1006     _Compiler<_InIter, _TraitsT>::
   1007     _M_follow_list2(_RMatcherT& __matcher)
   1008     {
   1009       if (_M_expression_term(__matcher))
   1010 	return _M_follow_list2(__matcher);
   1011       return true;
   1012     }
   1013 
   1014   template<typename _InIter, typename _TraitsT>
   1015     bool
   1016     _Compiler<_InIter, _TraitsT>::
   1017     _M_expression_term(_RMatcherT& __matcher)
   1018     {
   1019       return (_M_collating_symbol(__matcher)
   1020 	      || _M_character_class(__matcher)
   1021 	      || _M_equivalence_class(__matcher)
   1022 	      || (_M_start_range(__matcher)
   1023 		  && _M_range_expression(__matcher)));
   1024     }
   1025 
   1026   template<typename _InIter, typename _TraitsT>
   1027     bool
   1028     _Compiler<_InIter, _TraitsT>::
   1029     _M_range_expression(_RMatcherT& __matcher)
   1030     {
   1031       if (!_M_collating_symbol(__matcher))
   1032 	if (!_M_match_token(_ScannerT::_S_token_dash))
   1033 	  __throw_regex_error(regex_constants::error_range);
   1034       __matcher._M_make_range();
   1035       return true;
   1036     }
   1037 
   1038   template<typename _InIter, typename _TraitsT>
   1039     bool
   1040     _Compiler<_InIter, _TraitsT>::
   1041     _M_start_range(_RMatcherT& __matcher)
   1042     { return _M_match_token(_ScannerT::_S_token_dash); }
   1043 
   1044   template<typename _InIter, typename _TraitsT>
   1045     bool
   1046     _Compiler<_InIter, _TraitsT>::
   1047     _M_collating_symbol(_RMatcherT& __matcher)
   1048     {
   1049       if (_M_match_token(_ScannerT::_S_token_collelem_single))
   1050 	{
   1051 	  __matcher._M_add_char(_M_cur_value[0]);
   1052 	  return true;
   1053 	}
   1054       if (_M_match_token(_ScannerT::_S_token_collsymbol))
   1055 	{
   1056 	  __matcher._M_add_collating_element(_M_cur_value);
   1057 	  return true;
   1058 	}
   1059       return false;
   1060     }
   1061 
   1062   template<typename _InIter, typename _TraitsT>
   1063     bool
   1064     _Compiler<_InIter, _TraitsT>::
   1065     _M_equivalence_class(_RMatcherT& __matcher)
   1066     {
   1067       if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
   1068 	{
   1069 	  __matcher._M_add_equivalence_class(_M_cur_value);
   1070 	  return true;
   1071 	}
   1072       return false;
   1073     }
   1074 
   1075   template<typename _InIter, typename _TraitsT>
   1076     bool
   1077     _Compiler<_InIter, _TraitsT>::
   1078     _M_character_class(_RMatcherT& __matcher)
   1079     {
   1080       if (_M_match_token(_ScannerT::_S_token_char_class_name))
   1081 	{
   1082 	  __matcher._M_add_character_class(_M_cur_value);
   1083 	  return true;
   1084 	}
   1085       return false;
   1086     }
   1087 
   1088   template<typename _InIter, typename _TraitsT>
   1089     int
   1090     _Compiler<_InIter, _TraitsT>::
   1091     _M_cur_int_value(int __radix)
   1092     {
   1093       int __v = 0;
   1094       for (typename _StringT::size_type __i = 0;
   1095 	   __i < _M_cur_value.length(); ++__i)
   1096 	__v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
   1097       return __v;
   1098     }
   1099 
   1100   template<typename _InIter, typename _TraitsT>
   1101     _AutomatonPtr
   1102     __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
   1103 	      regex_constants::syntax_option_type __f)
   1104     { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
   1105                                         __f)._M_nfa())); }
   1106 
   1107  //@} regex-detail
   1108 _GLIBCXX_END_NAMESPACE_VERSION
   1109 } // namespace __detail
   1110 } // namespace std
   1111