1 // class template regex -*- C++ -*- 2 3 // Copyright (C) 2013-2014 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 /** 26 * @file bits/regex_scanner.h 27 * This is an internal header file, included by other library headers. 28 * Do not attempt to use it directly. @headername{regex} 29 */ 30 31 namespace std _GLIBCXX_VISIBILITY(default) 32 { 33 namespace __detail 34 { 35 _GLIBCXX_BEGIN_NAMESPACE_VERSION 36 37 /** 38 * @addtogroup regex-detail 39 * @{ 40 */ 41 42 struct _ScannerBase 43 { 44 public: 45 /// Token types returned from the scanner. 46 enum _TokenT 47 { 48 _S_token_anychar, 49 _S_token_ord_char, 50 _S_token_oct_num, 51 _S_token_hex_num, 52 _S_token_backref, 53 _S_token_subexpr_begin, 54 _S_token_subexpr_no_group_begin, 55 _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n' 56 _S_token_subexpr_end, 57 _S_token_bracket_begin, 58 _S_token_bracket_neg_begin, 59 _S_token_bracket_end, 60 _S_token_interval_begin, 61 _S_token_interval_end, 62 _S_token_quoted_class, 63 _S_token_char_class_name, 64 _S_token_collsymbol, 65 _S_token_equiv_class_name, 66 _S_token_opt, 67 _S_token_or, 68 _S_token_closure0, 69 _S_token_closure1, 70 _S_token_ungreedy, 71 _S_token_line_begin, 72 _S_token_line_end, 73 _S_token_word_bound, // neg if _M_value[0] == 'n' 74 _S_token_comma, 75 _S_token_dup_count, 76 _S_token_eof, 77 _S_token_unknown 78 }; 79 80 protected: 81 typedef regex_constants::syntax_option_type _FlagT; 82 83 enum _StateT 84 { 85 _S_state_normal, 86 _S_state_in_brace, 87 _S_state_in_bracket, 88 }; 89 90 protected: 91 _ScannerBase(_FlagT __flags) 92 : _M_state(_S_state_normal), 93 _M_flags(__flags), 94 _M_escape_tbl(_M_is_ecma() 95 ? _M_ecma_escape_tbl 96 : _M_awk_escape_tbl), 97 _M_spec_char(_M_is_ecma() 98 ? _M_ecma_spec_char 99 : _M_is_basic() 100 ? _M_basic_spec_char 101 : _M_extended_spec_char), 102 _M_at_bracket_start(false) 103 { } 104 105 protected: 106 const char* 107 _M_find_escape(char __c) 108 { 109 auto __it = _M_escape_tbl; 110 for (; __it->first != '\0'; ++__it) 111 if (__it->first == __c) 112 return &__it->second; 113 return nullptr; 114 } 115 116 bool 117 _M_is_ecma() const 118 { return _M_flags & regex_constants::ECMAScript; } 119 120 bool 121 _M_is_basic() const 122 { return _M_flags & (regex_constants::basic | regex_constants::grep); } 123 124 bool 125 _M_is_extended() const 126 { 127 return _M_flags & (regex_constants::extended 128 | regex_constants::egrep 129 | regex_constants::awk); 130 } 131 132 bool 133 _M_is_grep() const 134 { return _M_flags & (regex_constants::grep | regex_constants::egrep); } 135 136 bool 137 _M_is_awk() const 138 { return _M_flags & regex_constants::awk; } 139 140 protected: 141 const std::pair<char, _TokenT> _M_token_tbl[9] = 142 { 143 {'^', _S_token_line_begin}, 144 {'$', _S_token_line_end}, 145 {'.', _S_token_anychar}, 146 {'*', _S_token_closure0}, 147 {'+', _S_token_closure1}, 148 {'?', _S_token_opt}, 149 {'|', _S_token_or}, 150 {'\n', _S_token_or}, // grep and egrep 151 {'\0', _S_token_or}, 152 }; 153 const std::pair<char, char> _M_ecma_escape_tbl[8] = 154 { 155 {'0', '\0'}, 156 {'b', '\b'}, 157 {'f', '\f'}, 158 {'n', '\n'}, 159 {'r', '\r'}, 160 {'t', '\t'}, 161 {'v', '\v'}, 162 {'\0', '\0'}, 163 }; 164 const std::pair<char, char> _M_awk_escape_tbl[11] = 165 { 166 {'"', '"'}, 167 {'/', '/'}, 168 {'\\', '\\'}, 169 {'a', '\a'}, 170 {'b', '\b'}, 171 {'f', '\f'}, 172 {'n', '\n'}, 173 {'r', '\r'}, 174 {'t', '\t'}, 175 {'v', '\v'}, 176 {'\0', '\0'}, 177 }; 178 const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|"; 179 const char* _M_basic_spec_char = ".[\\*^$"; 180 const char* _M_extended_spec_char = ".[\\()*+?{|^$"; 181 182 _StateT _M_state; 183 _FlagT _M_flags; 184 _TokenT _M_token; 185 const std::pair<char, char>* _M_escape_tbl; 186 const char* _M_spec_char; 187 bool _M_at_bracket_start; 188 }; 189 190 /** 191 * @brief Scans an input range for regex tokens. 192 * 193 * The %_Scanner class interprets the regular expression pattern in 194 * the input range passed to its constructor as a sequence of parse 195 * tokens passed to the regular expression compiler. The sequence 196 * of tokens provided depends on the flag settings passed to the 197 * constructor: different regular expression grammars will interpret 198 * the same input pattern in syntactically different ways. 199 */ 200 template<typename _CharT> 201 class _Scanner 202 : public _ScannerBase 203 { 204 public: 205 typedef const _CharT* _IterT; 206 typedef std::basic_string<_CharT> _StringT; 207 typedef regex_constants::syntax_option_type _FlagT; 208 typedef const std::ctype<_CharT> _CtypeT; 209 210 _Scanner(_IterT __begin, _IterT __end, 211 _FlagT __flags, std::locale __loc); 212 213 void 214 _M_advance(); 215 216 _TokenT 217 _M_get_token() const 218 { return _M_token; } 219 220 const _StringT& 221 _M_get_value() const 222 { return _M_value; } 223 224 #ifdef _GLIBCXX_DEBUG 225 std::ostream& 226 _M_print(std::ostream&); 227 #endif 228 229 private: 230 void 231 _M_scan_normal(); 232 233 void 234 _M_scan_in_bracket(); 235 236 void 237 _M_scan_in_brace(); 238 239 void 240 _M_eat_escape_ecma(); 241 242 void 243 _M_eat_escape_posix(); 244 245 void 246 _M_eat_escape_awk(); 247 248 void 249 _M_eat_class(char); 250 251 _IterT _M_current; 252 _IterT _M_end; 253 _CtypeT& _M_ctype; 254 _StringT _M_value; 255 void (_Scanner::* _M_eat_escape)(); 256 }; 257 258 //@} regex-detail 259 _GLIBCXX_END_NAMESPACE_VERSION 260 } // namespace __detail 261 } // namespace std 262 263 #include <bits/regex_scanner.tcc> 264