1 // class template regex -*- C++ -*- 2 3 // Copyright (C) 2010-2013 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 /** 26 * @file bits/regex_nfa.h 27 * This is an internal header file, included by other library headers. 28 * Do not attempt to use it directly. @headername{regex} 29 */ 30 31 namespace std _GLIBCXX_VISIBILITY(default) 32 { 33 namespace __detail 34 { 35 _GLIBCXX_BEGIN_NAMESPACE_VERSION 36 37 /** 38 * @addtogroup regex-detail 39 * @{ 40 */ 41 42 /// Base class for, um, automata. Could be an NFA or a DFA. Your choice. 43 class _Automaton 44 { 45 public: 46 typedef unsigned int _SizeT; 47 48 public: 49 virtual 50 ~_Automaton() { } 51 52 virtual _SizeT 53 _M_sub_count() const = 0; 54 55 #ifdef _GLIBCXX_DEBUG 56 virtual std::ostream& 57 _M_dot(std::ostream& __ostr) const = 0; 58 #endif 59 }; 60 61 /// Generic shared pointer to an automaton. 62 typedef std::shared_ptr<_Automaton> _AutomatonPtr; 63 64 /// Operation codes that define the type of transitions within the base NFA 65 /// that represents the regular expression. 66 enum _Opcode 67 { 68 _S_opcode_unknown = 0, 69 _S_opcode_alternative = 1, 70 _S_opcode_subexpr_begin = 4, 71 _S_opcode_subexpr_end = 5, 72 _S_opcode_match = 100, 73 _S_opcode_accept = 255 74 }; 75 76 /// Provides a generic facade for a templated match_results. 77 struct _Results 78 { 79 virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0; 80 virtual void _M_set_matched(int __i, bool __is_matched) = 0; 81 }; 82 83 /// Tags current state (for subexpr begin/end). 84 typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger; 85 86 /// Start state tag. 87 template<typename _FwdIterT, typename _TraitsT> 88 struct _StartTagger 89 { 90 explicit 91 _StartTagger(int __i) 92 : _M_index(__i) 93 { } 94 95 void 96 operator()(const _PatternCursor& __pc, _Results& __r) 97 { __r._M_set_pos(_M_index, 0, __pc); } 98 99 int _M_index; 100 }; 101 102 /// End state tag. 103 template<typename _FwdIterT, typename _TraitsT> 104 struct _EndTagger 105 { 106 explicit 107 _EndTagger(int __i) 108 : _M_index(__i) 109 { } 110 111 void 112 operator()(const _PatternCursor& __pc, _Results& __r) 113 { __r._M_set_pos(_M_index, 1, __pc); } 114 115 int _M_index; 116 _FwdIterT _M_pos; 117 }; 118 119 /// Indicates if current state matches cursor current. 120 typedef std::function<bool (const _PatternCursor&)> _Matcher; 121 122 /// Matches any character 123 inline bool 124 _AnyMatcher(const _PatternCursor&) 125 { return true; } 126 127 /// Matches a single character 128 template<typename _InIterT, typename _TraitsT> 129 struct _CharMatcher 130 { 131 typedef typename _TraitsT::char_type char_type; 132 133 explicit 134 _CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT()) 135 : _M_traits(__t), _M_c(_M_traits.translate(__c)) 136 { } 137 138 bool 139 operator()(const _PatternCursor& __pc) const 140 { 141 typedef const _SpecializedCursor<_InIterT>& _CursorT; 142 _CursorT __c = static_cast<_CursorT>(__pc); 143 return _M_traits.translate(__c._M_current()) == _M_c; 144 } 145 146 const _TraitsT& _M_traits; 147 char_type _M_c; 148 }; 149 150 /// Matches a character range (bracket expression) 151 template<typename _InIterT, typename _TraitsT> 152 struct _RangeMatcher 153 { 154 typedef typename _TraitsT::char_type _CharT; 155 typedef std::basic_string<_CharT> _StringT; 156 157 explicit 158 _RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT()) 159 : _M_traits(__t), _M_is_non_matching(__is_non_matching) 160 { } 161 162 bool 163 operator()(const _PatternCursor& __pc) const 164 { 165 typedef const _SpecializedCursor<_InIterT>& _CursorT; 166 _CursorT __c = static_cast<_CursorT>(__pc); 167 return true; 168 } 169 170 void 171 _M_add_char(_CharT __c) 172 { } 173 174 void 175 _M_add_collating_element(const _StringT& __s) 176 { } 177 178 void 179 _M_add_equivalence_class(const _StringT& __s) 180 { } 181 182 void 183 _M_add_character_class(const _StringT& __s) 184 { } 185 186 void 187 _M_make_range() 188 { } 189 190 const _TraitsT& _M_traits; 191 bool _M_is_non_matching; 192 }; 193 194 /// Identifies a state in the NFA. 195 typedef int _StateIdT; 196 197 /// The special case in which a state identifier is not an index. 198 static const _StateIdT _S_invalid_state_id = -1; 199 200 201 /** 202 * @brief struct _State 203 * 204 * An individual state in an NFA 205 * 206 * In this case a "state" is an entry in the NFA definition coupled 207 * with its outgoing transition(s). All states have a single outgoing 208 * transition, except for accepting states (which have no outgoing 209 * transitions) and alt states, which have two outgoing transitions. 210 */ 211 struct _State 212 { 213 typedef int _OpcodeT; 214 215 _OpcodeT _M_opcode; // type of outgoing transition 216 _StateIdT _M_next; // outgoing transition 217 _StateIdT _M_alt; // for _S_opcode_alternative 218 unsigned int _M_subexpr; // for _S_opcode_subexpr_* 219 _Tagger _M_tagger; // for _S_opcode_subexpr_* 220 _Matcher _M_matches; // for _S_opcode_match 221 222 explicit _State(_OpcodeT __opcode) 223 : _M_opcode(__opcode), _M_next(_S_invalid_state_id) 224 { } 225 226 _State(const _Matcher& __m) 227 : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m) 228 { } 229 230 _State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t) 231 : _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s), 232 _M_tagger(__t) 233 { } 234 235 _State(_StateIdT __next, _StateIdT __alt) 236 : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt) 237 { } 238 239 #ifdef _GLIBCXX_DEBUG 240 std::ostream& 241 _M_print(std::ostream& ostr) const; 242 243 // Prints graphviz dot commands for state. 244 std::ostream& 245 _M_dot(std::ostream& __ostr, _StateIdT __id) const; 246 #endif 247 }; 248 249 250 /// The Grep Matcher works on sets of states. Here are sets of states. 251 typedef std::set<_StateIdT> _StateSet; 252 253 /** 254 * @brief struct _Nfa 255 * 256 * A collection of all states making up an NFA. 257 * 258 * An NFA is a 4-tuple M = (K, S, s, F), where 259 * K is a finite set of states, 260 * S is the alphabet of the NFA, 261 * s is the initial state, 262 * F is a set of final (accepting) states. 263 * 264 * This NFA class is templated on S, a type that will hold values of the 265 * underlying alphabet (without regard to semantics of that alphabet). The 266 * other elements of the tuple are generated during construction of the NFA 267 * and are available through accessor member functions. 268 */ 269 class _Nfa 270 : public _Automaton, public std::vector<_State> 271 { 272 public: 273 typedef _State _StateT; 274 typedef unsigned int _SizeT; 275 typedef regex_constants::syntax_option_type _FlagT; 276 277 _Nfa(_FlagT __f) 278 : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0) 279 { } 280 281 ~_Nfa() 282 { } 283 284 _FlagT 285 _M_options() const 286 { return _M_flags; } 287 288 _StateIdT 289 _M_start() const 290 { return _M_start_state; } 291 292 const _StateSet& 293 _M_final_states() const 294 { return _M_accepting_states; } 295 296 _SizeT 297 _M_sub_count() const 298 { return _M_subexpr_count; } 299 300 _StateIdT 301 _M_insert_accept() 302 { 303 this->push_back(_StateT(_S_opcode_accept)); 304 _M_accepting_states.insert(this->size()-1); 305 return this->size()-1; 306 } 307 308 _StateIdT 309 _M_insert_alt(_StateIdT __next, _StateIdT __alt) 310 { 311 this->push_back(_StateT(__next, __alt)); 312 return this->size()-1; 313 } 314 315 _StateIdT 316 _M_insert_matcher(_Matcher __m) 317 { 318 this->push_back(_StateT(__m)); 319 return this->size()-1; 320 } 321 322 _StateIdT 323 _M_insert_subexpr_begin(const _Tagger& __t) 324 { 325 this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++, 326 __t)); 327 return this->size()-1; 328 } 329 330 _StateIdT 331 _M_insert_subexpr_end(unsigned int __i, const _Tagger& __t) 332 { 333 this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t)); 334 return this->size()-1; 335 } 336 337 #ifdef _GLIBCXX_DEBUG 338 std::ostream& 339 _M_dot(std::ostream& __ostr) const; 340 #endif 341 342 private: 343 _FlagT _M_flags; 344 _StateIdT _M_start_state; 345 _StateSet _M_accepting_states; 346 _SizeT _M_subexpr_count; 347 }; 348 349 /// Describes a sequence of one or more %_State, its current start 350 /// and end(s). This structure contains fragments of an NFA during 351 /// construction. 352 class _StateSeq 353 { 354 public: 355 // Constructs a single-node sequence 356 _StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id) 357 : _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e) 358 { } 359 // Constructs a split sequence from two other sequencces 360 _StateSeq(const _StateSeq& __e1, const _StateSeq& __e2) 361 : _M_nfa(__e1._M_nfa), 362 _M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)), 363 _M_end1(__e1._M_end1), _M_end2(__e2._M_end1) 364 { } 365 366 // Constructs a split sequence from a single sequence 367 _StateSeq(const _StateSeq& __e, _StateIdT __id) 368 : _M_nfa(__e._M_nfa), 369 _M_start(_M_nfa._M_insert_alt(__id, __e._M_start)), 370 _M_end1(__id), _M_end2(__e._M_end1) 371 { } 372 373 // Constructs a copy of a %_StateSeq 374 _StateSeq(const _StateSeq& __rhs) 375 : _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start), 376 _M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2) 377 { } 378 379 380 _StateSeq& operator=(const _StateSeq& __rhs); 381 382 _StateIdT 383 _M_front() const 384 { return _M_start; } 385 386 // Extends a sequence by one. 387 void 388 _M_push_back(_StateIdT __id); 389 390 // Extends and maybe joins a sequence. 391 void 392 _M_append(_StateIdT __id); 393 394 void 395 _M_append(_StateSeq& __rhs); 396 397 // Clones an entire sequence. 398 _StateIdT 399 _M_clone(); 400 401 private: 402 _Nfa& _M_nfa; 403 _StateIdT _M_start; 404 _StateIdT _M_end1; 405 _StateIdT _M_end2; 406 407 }; 408 409 //@} regex-detail 410 _GLIBCXX_END_NAMESPACE_VERSION 411 } // namespace __detail 412 } // namespace std 413 414 #include <bits/regex_nfa.tcc> 415 416