Home | History | Annotate | Download | only in bits
      1 // class template regex -*- C++ -*-
      2 
      3 // Copyright (C) 2013-2014 Free Software Foundation, Inc.
      4 //
      5 // This file is part of the GNU ISO C++ Library.  This library is free
      6 // software; you can redistribute it and/or modify it under the
      7 // terms of the GNU General Public License as published by the
      8 // Free Software Foundation; either version 3, or (at your option)
      9 // any later version.
     10 
     11 // This library is distributed in the hope that it will be useful,
     12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 // GNU General Public License for more details.
     15 
     16 // Under Section 7 of GPL version 3, you are granted additional
     17 // permissions described in the GCC Runtime Library Exception, version
     18 // 3.1, as published by the Free Software Foundation.
     19 
     20 // You should have received a copy of the GNU General Public License and
     21 // a copy of the GCC Runtime Library Exception along with this program;
     22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     23 // <http://www.gnu.org/licenses/>.
     24 
     25 /**
     26  *  @file bits/regex_automaton.h
     27  *  This is an internal header file, included by other library headers.
     28  *  Do not attempt to use it directly. @headername{regex}
     29  */
     30 
     31 namespace std _GLIBCXX_VISIBILITY(default)
     32 {
     33 namespace __detail
     34 {
     35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
     36 
     37   /**
     38    *  @defgroup regex-detail Base and Implementation Classes
     39    *  @ingroup regex
     40    *  @{
     41    */
     42 
     43   typedef long _StateIdT;
     44   static const _StateIdT _S_invalid_state_id  = -1;
     45 
     46   template<typename _CharT>
     47     using _Matcher = std::function<bool (_CharT)>;
     48 
     49   /// Operation codes that define the type of transitions within the base NFA
     50   /// that represents the regular expression.
     51   enum _Opcode : int
     52   {
     53       _S_opcode_unknown,
     54       _S_opcode_alternative,
     55       _S_opcode_backref,
     56       _S_opcode_line_begin_assertion,
     57       _S_opcode_line_end_assertion,
     58       _S_opcode_word_boundary,
     59       _S_opcode_subexpr_lookahead,
     60       _S_opcode_subexpr_begin,
     61       _S_opcode_subexpr_end,
     62       _S_opcode_dummy,
     63       _S_opcode_match,
     64       _S_opcode_accept,
     65   };
     66 
     67   struct _State_base
     68   {
     69     _Opcode      _M_opcode;           // type of outgoing transition
     70     _StateIdT    _M_next;             // outgoing transition
     71     union // Since they are mutually exclusive.
     72     {
     73       size_t _M_subexpr;        // for _S_opcode_subexpr_*
     74       size_t _M_backref_index;  // for _S_opcode_backref
     75       struct
     76       {
     77 	// for _S_opcode_alternative.
     78 	_StateIdT  _M_quant_index;
     79 	// for _S_opcode_alternative or _S_opcode_subexpr_lookahead
     80 	_StateIdT  _M_alt;
     81 	// for _S_opcode_word_boundary or _S_opcode_subexpr_lookahead or
     82 	// quantifiers (ungreedy if set true)
     83 	bool       _M_neg;
     84       };
     85     };
     86 
     87     explicit _State_base(_Opcode __opcode)
     88     : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
     89     { }
     90 
     91   protected:
     92     ~_State_base() = default;
     93 
     94   public:
     95 #ifdef _GLIBCXX_DEBUG
     96     std::ostream&
     97     _M_print(std::ostream& ostr) const;
     98 
     99     // Prints graphviz dot commands for state.
    100     std::ostream&
    101     _M_dot(std::ostream& __ostr, _StateIdT __id) const;
    102 #endif
    103   };
    104 
    105   template<typename _TraitsT>
    106     struct _State : _State_base
    107     {
    108       typedef _Matcher<typename _TraitsT::char_type> _MatcherT;
    109 
    110       _MatcherT      _M_matches;        // for _S_opcode_match
    111 
    112       explicit _State(_Opcode __opcode) : _State_base(__opcode) { }
    113     };
    114 
    115   struct _NFA_base
    116   {
    117     typedef size_t                              _SizeT;
    118     typedef regex_constants::syntax_option_type _FlagT;
    119 
    120     explicit
    121     _NFA_base(_FlagT __f)
    122     : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0),
    123     _M_quant_count(0), _M_has_backref(false)
    124     { }
    125 
    126     _NFA_base(_NFA_base&&) = default;
    127 
    128   protected:
    129     ~_NFA_base() = default;
    130 
    131   public:
    132     _FlagT
    133     _M_options() const
    134     { return _M_flags; }
    135 
    136     _StateIdT
    137     _M_start() const
    138     { return _M_start_state; }
    139 
    140     _SizeT
    141     _M_sub_count() const
    142     { return _M_subexpr_count; }
    143 
    144     std::vector<size_t>       _M_paren_stack;
    145     _FlagT                    _M_flags;
    146     _StateIdT                 _M_start_state;
    147     _SizeT                    _M_subexpr_count;
    148     _SizeT                    _M_quant_count;
    149     bool                      _M_has_backref;
    150   };
    151 
    152   template<typename _TraitsT>
    153     struct _NFA
    154     : _NFA_base, std::vector<_State<_TraitsT>>
    155     {
    156       typedef _State<_TraitsT>				_StateT;
    157       typedef _Matcher<typename _TraitsT::char_type>	_MatcherT;
    158 
    159       using _NFA_base::_NFA_base;
    160 
    161       // for performance reasons _NFA objects should only be moved not copied
    162       _NFA(const _NFA&) = delete;
    163       _NFA(_NFA&&) = default;
    164 
    165       _StateIdT
    166       _M_insert_accept()
    167       {
    168 	auto __ret = _M_insert_state(_StateT(_S_opcode_accept));
    169 	return __ret;
    170       }
    171 
    172       _StateIdT
    173       _M_insert_alt(_StateIdT __next, _StateIdT __alt, bool __neg)
    174       {
    175 	_StateT __tmp(_S_opcode_alternative);
    176 	// It labels every quantifier to make greedy comparison easier in BFS
    177 	// approach.
    178 	__tmp._M_quant_index = this->_M_quant_count++;
    179 	__tmp._M_next = __next;
    180 	__tmp._M_alt = __alt;
    181 	__tmp._M_neg = __neg;
    182 	return _M_insert_state(std::move(__tmp));
    183       }
    184 
    185       _StateIdT
    186       _M_insert_matcher(_MatcherT __m)
    187       {
    188 	_StateT __tmp(_S_opcode_match);
    189 	__tmp._M_matches = std::move(__m);
    190 	return _M_insert_state(std::move(__tmp));
    191       }
    192 
    193       _StateIdT
    194       _M_insert_subexpr_begin()
    195       {
    196 	auto __id = this->_M_subexpr_count++;
    197 	this->_M_paren_stack.push_back(__id);
    198 	_StateT __tmp(_S_opcode_subexpr_begin);
    199 	__tmp._M_subexpr = __id;
    200 	return _M_insert_state(std::move(__tmp));
    201       }
    202 
    203       _StateIdT
    204       _M_insert_subexpr_end()
    205       {
    206 	_StateT __tmp(_S_opcode_subexpr_end);
    207 	__tmp._M_subexpr = this->_M_paren_stack.back();
    208 	this->_M_paren_stack.pop_back();
    209 	return _M_insert_state(std::move(__tmp));
    210       }
    211 
    212       _StateIdT
    213       _M_insert_backref(size_t __index);
    214 
    215       _StateIdT
    216       _M_insert_line_begin()
    217       { return _M_insert_state(_StateT(_S_opcode_line_begin_assertion)); }
    218 
    219       _StateIdT
    220       _M_insert_line_end()
    221       { return _M_insert_state(_StateT(_S_opcode_line_end_assertion)); }
    222 
    223       _StateIdT
    224       _M_insert_word_bound(bool __neg)
    225       {
    226 	_StateT __tmp(_S_opcode_word_boundary);
    227 	__tmp._M_neg = __neg;
    228 	return _M_insert_state(std::move(__tmp));
    229       }
    230 
    231       _StateIdT
    232       _M_insert_lookahead(_StateIdT __alt, bool __neg)
    233       {
    234 	_StateT __tmp(_S_opcode_subexpr_lookahead);
    235 	__tmp._M_alt = __alt;
    236 	__tmp._M_neg = __neg;
    237 	return _M_insert_state(std::move(__tmp));
    238       }
    239 
    240       _StateIdT
    241       _M_insert_dummy()
    242       { return _M_insert_state(_StateT(_S_opcode_dummy)); }
    243 
    244       _StateIdT
    245       _M_insert_state(_StateT __s)
    246       {
    247 	this->push_back(std::move(__s));
    248 	return this->size()-1;
    249       }
    250 
    251       // Eliminate dummy node in this NFA to make it compact.
    252       void
    253       _M_eliminate_dummy();
    254 
    255 #ifdef _GLIBCXX_DEBUG
    256       std::ostream&
    257       _M_dot(std::ostream& __ostr) const;
    258 #endif
    259     };
    260 
    261   /// Describes a sequence of one or more %_State, its current start
    262   /// and end(s).  This structure contains fragments of an NFA during
    263   /// construction.
    264   template<typename _TraitsT>
    265     class _StateSeq
    266     {
    267     public:
    268       typedef _NFA<_TraitsT> _RegexT;
    269 
    270     public:
    271       _StateSeq(_RegexT& __nfa, _StateIdT __s)
    272       : _M_nfa(__nfa), _M_start(__s), _M_end(__s)
    273       { }
    274 
    275       _StateSeq(_RegexT& __nfa, _StateIdT __s, _StateIdT __end)
    276       : _M_nfa(__nfa), _M_start(__s), _M_end(__end)
    277       { }
    278 
    279       // Append a state on *this and change *this to the new sequence.
    280       void
    281       _M_append(_StateIdT __id)
    282       {
    283 	_M_nfa[_M_end]._M_next = __id;
    284 	_M_end = __id;
    285       }
    286 
    287       // Append a sequence on *this and change *this to the new sequence.
    288       void
    289       _M_append(const _StateSeq& __s)
    290       {
    291 	_M_nfa[_M_end]._M_next = __s._M_start;
    292 	_M_end = __s._M_end;
    293       }
    294 
    295       // Clones an entire sequence.
    296       _StateSeq
    297       _M_clone();
    298 
    299     public:
    300       _RegexT&  _M_nfa;
    301       _StateIdT _M_start;
    302       _StateIdT _M_end;
    303     };
    304 
    305  //@} regex-detail
    306 _GLIBCXX_END_NAMESPACE_VERSION
    307 } // namespace __detail
    308 } // namespace std
    309 
    310 #include <bits/regex_automaton.tcc>
    311