Home | History | Annotate | Download | only in bits
      1 // class template regex -*- C++ -*-
      2 
      3 // Copyright (C) 2010-2013 Free Software Foundation, Inc.
      4 //
      5 // This file is part of the GNU ISO C++ Library.  This library is free
      6 // software; you can redistribute it and/or modify it under the
      7 // terms of the GNU General Public License as published by the
      8 // Free Software Foundation; either version 3, or (at your option)
      9 // any later version.
     10 
     11 // This library is distributed in the hope that it will be useful,
     12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 // GNU General Public License for more details.
     15 
     16 // Under Section 7 of GPL version 3, you are granted additional
     17 // permissions described in the GCC Runtime Library Exception, version
     18 // 3.1, as published by the Free Software Foundation.
     19 
     20 // You should have received a copy of the GNU General Public License and
     21 // a copy of the GCC Runtime Library Exception along with this program;
     22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     23 // <http://www.gnu.org/licenses/>.
     24 
     25 /**
     26  *  @file bits/regex_nfa.h
     27  *  This is an internal header file, included by other library headers.
     28  *  Do not attempt to use it directly. @headername{regex}
     29  */
     30 
     31 namespace std _GLIBCXX_VISIBILITY(default)
     32 {
     33 namespace __detail
     34 {
     35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
     36 
     37   /**
     38    * @addtogroup regex-detail
     39    * @{
     40    */
     41 
     42   /// Base class for, um, automata.  Could be an NFA or a DFA.  Your choice.
     43   class _Automaton
     44   {
     45   public:
     46     typedef unsigned int _SizeT;
     47 
     48   public:
     49     virtual
     50     ~_Automaton() { }
     51 
     52     virtual _SizeT
     53     _M_sub_count() const = 0;
     54 
     55 #ifdef _GLIBCXX_DEBUG
     56     virtual std::ostream&
     57     _M_dot(std::ostream& __ostr) const = 0;
     58 #endif
     59   };
     60 
     61   /// Generic shared pointer to an automaton.
     62   typedef std::shared_ptr<_Automaton> _AutomatonPtr;
     63 
     64   /// Operation codes that define the type of transitions within the base NFA
     65   /// that represents the regular expression.
     66   enum _Opcode
     67   {
     68       _S_opcode_unknown       =   0,
     69       _S_opcode_alternative   =   1,
     70       _S_opcode_subexpr_begin =   4,
     71       _S_opcode_subexpr_end   =   5,
     72       _S_opcode_match         = 100,
     73       _S_opcode_accept        = 255
     74   };
     75 
     76   /// Provides a generic facade for a templated match_results.
     77   struct _Results
     78   {
     79     virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0;
     80     virtual void _M_set_matched(int __i, bool __is_matched) = 0;
     81   };
     82 
     83   /// Tags current state (for subexpr begin/end).
     84   typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger;
     85 
     86   /// Start state tag.
     87   template<typename _FwdIterT, typename _TraitsT>
     88     struct _StartTagger
     89     {
     90       explicit
     91       _StartTagger(int __i)
     92       : _M_index(__i)
     93       { }
     94 
     95       void
     96       operator()(const _PatternCursor& __pc, _Results& __r)
     97       { __r._M_set_pos(_M_index, 0, __pc); }
     98 
     99       int       _M_index;
    100     };
    101 
    102   /// End state tag.
    103   template<typename _FwdIterT, typename _TraitsT>
    104     struct _EndTagger
    105     {
    106       explicit
    107       _EndTagger(int __i)
    108       : _M_index(__i)
    109       { }
    110 
    111       void
    112       operator()(const _PatternCursor& __pc, _Results& __r)
    113       { __r._M_set_pos(_M_index, 1, __pc); }
    114 
    115       int       _M_index;
    116       _FwdIterT _M_pos;
    117     };
    118 
    119   /// Indicates if current state matches cursor current.
    120   typedef std::function<bool (const _PatternCursor&)> _Matcher;
    121 
    122   /// Matches any character
    123   inline bool
    124   _AnyMatcher(const _PatternCursor&)
    125   { return true; }
    126 
    127   /// Matches a single character
    128   template<typename _InIterT, typename _TraitsT>
    129     struct _CharMatcher
    130     {
    131       typedef typename _TraitsT::char_type char_type;
    132 
    133       explicit
    134       _CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT())
    135       : _M_traits(__t), _M_c(_M_traits.translate(__c))
    136       { }
    137 
    138       bool
    139       operator()(const _PatternCursor& __pc) const
    140       {
    141 	typedef const _SpecializedCursor<_InIterT>& _CursorT;
    142 	_CursorT __c = static_cast<_CursorT>(__pc);
    143 	return _M_traits.translate(__c._M_current()) == _M_c;
    144       }
    145 
    146       const _TraitsT& _M_traits;
    147       char_type       _M_c;
    148     };
    149 
    150   /// Matches a character range (bracket expression)
    151   template<typename _InIterT, typename _TraitsT>
    152     struct _RangeMatcher
    153     {
    154       typedef typename _TraitsT::char_type _CharT;
    155       typedef std::basic_string<_CharT>    _StringT;
    156 
    157       explicit
    158       _RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT())
    159       : _M_traits(__t), _M_is_non_matching(__is_non_matching)
    160       { }
    161 
    162       bool
    163       operator()(const _PatternCursor& __pc) const
    164       {
    165 	typedef const _SpecializedCursor<_InIterT>& _CursorT;
    166 	_CursorT __c = static_cast<_CursorT>(__pc);
    167 	return true;
    168       }
    169 
    170       void
    171       _M_add_char(_CharT __c)
    172       { }
    173 
    174       void
    175       _M_add_collating_element(const _StringT& __s)
    176       { }
    177 
    178       void
    179       _M_add_equivalence_class(const _StringT& __s)
    180       { }
    181 
    182       void
    183       _M_add_character_class(const _StringT& __s)
    184       { }
    185 
    186       void
    187       _M_make_range()
    188       { }
    189 
    190       const _TraitsT& _M_traits;
    191       bool            _M_is_non_matching;
    192     };
    193 
    194   /// Identifies a state in the NFA.
    195   typedef int _StateIdT;
    196 
    197   /// The special case in which a state identifier is not an index.
    198   static const _StateIdT _S_invalid_state_id  = -1;
    199 
    200 
    201   /**
    202    * @brief struct _State
    203    *
    204    * An individual state in an NFA
    205    *
    206    * In this case a "state" is an entry in the NFA definition coupled
    207    * with its outgoing transition(s).  All states have a single outgoing
    208    * transition, except for accepting states (which have no outgoing
    209    * transitions) and alt states, which have two outgoing transitions.
    210    */
    211   struct _State
    212   {
    213     typedef int  _OpcodeT;
    214 
    215     _OpcodeT     _M_opcode;    // type of outgoing transition
    216     _StateIdT    _M_next;      // outgoing transition
    217     _StateIdT    _M_alt;       // for _S_opcode_alternative
    218     unsigned int _M_subexpr;   // for _S_opcode_subexpr_*
    219     _Tagger      _M_tagger;    // for _S_opcode_subexpr_*
    220     _Matcher     _M_matches;   // for _S_opcode_match
    221 
    222     explicit _State(_OpcodeT __opcode)
    223     : _M_opcode(__opcode), _M_next(_S_invalid_state_id)
    224     { }
    225 
    226     _State(const _Matcher& __m)
    227     : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m)
    228     { }
    229 
    230     _State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t)
    231     : _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s),
    232       _M_tagger(__t)
    233     { }
    234 
    235     _State(_StateIdT __next, _StateIdT __alt)
    236     : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt)
    237     { }
    238 
    239 #ifdef _GLIBCXX_DEBUG
    240     std::ostream&
    241     _M_print(std::ostream& ostr) const;
    242 
    243     // Prints graphviz dot commands for state.
    244     std::ostream&
    245     _M_dot(std::ostream& __ostr, _StateIdT __id) const;
    246 #endif
    247   };
    248 
    249 
    250   /// The Grep Matcher works on sets of states.  Here are sets of states.
    251   typedef std::set<_StateIdT> _StateSet;
    252 
    253   /**
    254    * @brief struct _Nfa
    255    *
    256    * A collection of all states making up an NFA.
    257    *
    258    * An NFA is a 4-tuple M = (K, S, s, F), where
    259    *    K is a finite set of states,
    260    *    S is the alphabet of the NFA,
    261    *    s is the initial state,
    262    *    F is a set of final (accepting) states.
    263    *
    264    * This NFA class is templated on S, a type that will hold values of the
    265    * underlying alphabet (without regard to semantics of that alphabet).  The
    266    * other elements of the tuple are generated during construction of the NFA
    267    * and are available through accessor member functions.
    268    */
    269   class _Nfa
    270   : public _Automaton, public std::vector<_State>
    271   {
    272   public:
    273     typedef _State                              _StateT;
    274     typedef unsigned int                        _SizeT;
    275     typedef regex_constants::syntax_option_type _FlagT;
    276 
    277     _Nfa(_FlagT __f)
    278     : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0)
    279     { }
    280 
    281     ~_Nfa()
    282     { }
    283 
    284     _FlagT
    285     _M_options() const
    286     { return _M_flags; }
    287 
    288     _StateIdT
    289     _M_start() const
    290     { return _M_start_state; }
    291 
    292     const _StateSet&
    293     _M_final_states() const
    294     { return _M_accepting_states; }
    295 
    296     _SizeT
    297     _M_sub_count() const
    298     { return _M_subexpr_count; }
    299 
    300     _StateIdT
    301     _M_insert_accept()
    302     {
    303       this->push_back(_StateT(_S_opcode_accept));
    304       _M_accepting_states.insert(this->size()-1);
    305       return this->size()-1;
    306     }
    307 
    308     _StateIdT
    309     _M_insert_alt(_StateIdT __next, _StateIdT __alt)
    310     {
    311       this->push_back(_StateT(__next, __alt));
    312       return this->size()-1;
    313     }
    314 
    315     _StateIdT
    316     _M_insert_matcher(_Matcher __m)
    317     {
    318       this->push_back(_StateT(__m));
    319       return this->size()-1;
    320     }
    321 
    322     _StateIdT
    323     _M_insert_subexpr_begin(const _Tagger& __t)
    324     {
    325       this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++,
    326 			      __t));
    327       return this->size()-1;
    328     }
    329 
    330     _StateIdT
    331     _M_insert_subexpr_end(unsigned int __i, const _Tagger& __t)
    332     {
    333       this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t));
    334       return this->size()-1;
    335     }
    336 
    337 #ifdef _GLIBCXX_DEBUG
    338     std::ostream&
    339     _M_dot(std::ostream& __ostr) const;
    340 #endif
    341 
    342   private:
    343     _FlagT     _M_flags;
    344     _StateIdT  _M_start_state;
    345     _StateSet  _M_accepting_states;
    346     _SizeT     _M_subexpr_count;
    347   };
    348 
    349   /// Describes a sequence of one or more %_State, its current start
    350   /// and end(s).  This structure contains fragments of an NFA during
    351   /// construction.
    352   class _StateSeq
    353   {
    354   public:
    355     // Constructs a single-node sequence
    356     _StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id)
    357     : _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e)
    358     { }
    359     // Constructs a split sequence from two other sequencces
    360     _StateSeq(const _StateSeq& __e1, const _StateSeq& __e2)
    361     : _M_nfa(__e1._M_nfa),
    362       _M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)),
    363       _M_end1(__e1._M_end1), _M_end2(__e2._M_end1)
    364     { }
    365 
    366     // Constructs a split sequence from a single sequence
    367     _StateSeq(const _StateSeq& __e, _StateIdT __id)
    368     : _M_nfa(__e._M_nfa),
    369       _M_start(_M_nfa._M_insert_alt(__id, __e._M_start)),
    370       _M_end1(__id), _M_end2(__e._M_end1)
    371     { }
    372 
    373     // Constructs a copy of a %_StateSeq
    374     _StateSeq(const _StateSeq& __rhs)
    375     : _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start),
    376       _M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2)
    377     { }
    378 
    379 
    380     _StateSeq& operator=(const _StateSeq& __rhs);
    381 
    382     _StateIdT
    383     _M_front() const
    384     { return _M_start; }
    385 
    386     // Extends a sequence by one.
    387     void
    388     _M_push_back(_StateIdT __id);
    389 
    390     // Extends and maybe joins a sequence.
    391     void
    392     _M_append(_StateIdT __id);
    393 
    394     void
    395     _M_append(_StateSeq& __rhs);
    396 
    397     // Clones an entire sequence.
    398     _StateIdT
    399     _M_clone();
    400 
    401   private:
    402     _Nfa&     _M_nfa;
    403     _StateIdT _M_start;
    404     _StateIdT _M_end1;
    405     _StateIdT _M_end2;
    406 
    407   };
    408 
    409  //@} regex-detail
    410 _GLIBCXX_END_NAMESPACE_VERSION
    411 } // namespace __detail
    412 } // namespace std
    413 
    414 #include <bits/regex_nfa.tcc>
    415 
    416