Home | History | Annotate | Download | only in bits
      1 // class template regex -*- C++ -*-
      2 
      3 // Copyright (C) 2010-2014 Free Software Foundation, Inc.
      4 //
      5 // This file is part of the GNU ISO C++ Library.  This library is free
      6 // software; you can redistribute it and/or modify it under the
      7 // terms of the GNU General Public License as published by the
      8 // Free Software Foundation; either version 3, or (at your option)
      9 // any later version.
     10 
     11 // This library is distributed in the hope that it will be useful,
     12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
     13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14 // GNU General Public License for more details.
     15 
     16 // Under Section 7 of GPL version 3, you are granted additional
     17 // permissions described in the GCC Runtime Library Exception, version
     18 // 3.1, as published by the Free Software Foundation.
     19 
     20 // You should have received a copy of the GNU General Public License and
     21 // a copy of the GCC Runtime Library Exception along with this program;
     22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
     23 // <http://www.gnu.org/licenses/>.
     24 
     25 /**
     26  *  @file bits/regex_compiler.h
     27  *  This is an internal header file, included by other library headers.
     28  *  Do not attempt to use it directly. @headername{regex}
     29  */
     30 
     31 namespace std _GLIBCXX_VISIBILITY(default)
     32 {
     33 namespace __detail
     34 {
     35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
     36 
     37   /**
     38    * @addtogroup regex-detail
     39    * @{
     40    */
     41 
     42   template<typename, bool, bool>
     43     struct _BracketMatcher;
     44 
     45   /**
     46    * @brief Builds an NFA from an input iterator interval.
     47    *
     48    * The %_TraitsT type should fulfill requirements [28.3].
     49    */
     50   template<typename _TraitsT>
     51     class _Compiler
     52     {
     53     public:
     54       typedef typename _TraitsT::char_type        _CharT;
     55       typedef const _CharT*                       _IterT;
     56       typedef _NFA<_TraitsT>              	  _RegexT;
     57       typedef regex_constants::syntax_option_type _FlagT;
     58 
     59       _Compiler(_IterT __b, _IterT __e,
     60 		const _TraitsT& __traits, _FlagT __flags);
     61 
     62       std::shared_ptr<_RegexT>
     63       _M_get_nfa()
     64       { return make_shared<_RegexT>(std::move(_M_nfa)); }
     65 
     66     private:
     67       typedef _Scanner<_CharT>               _ScannerT;
     68       typedef typename _TraitsT::string_type _StringT;
     69       typedef typename _ScannerT::_TokenT    _TokenT;
     70       typedef _StateSeq<_TraitsT>            _StateSeqT;
     71       typedef std::stack<_StateSeqT>         _StackT;
     72       typedef std::ctype<_CharT>             _CtypeT;
     73 
     74       // accepts a specific token or returns false.
     75       bool
     76       _M_match_token(_TokenT __token);
     77 
     78       void
     79       _M_disjunction();
     80 
     81       void
     82       _M_alternative();
     83 
     84       bool
     85       _M_term();
     86 
     87       bool
     88       _M_assertion();
     89 
     90       bool
     91       _M_quantifier();
     92 
     93       bool
     94       _M_atom();
     95 
     96       bool
     97       _M_bracket_expression();
     98 
     99       template<bool __icase, bool __collate>
    100 	void
    101 	_M_insert_any_matcher_ecma();
    102 
    103       template<bool __icase, bool __collate>
    104 	void
    105 	_M_insert_any_matcher_posix();
    106 
    107       template<bool __icase, bool __collate>
    108 	void
    109 	_M_insert_char_matcher();
    110 
    111       template<bool __icase, bool __collate>
    112 	void
    113 	_M_insert_character_class_matcher();
    114 
    115       template<bool __icase, bool __collate>
    116 	void
    117 	_M_insert_bracket_matcher(bool __neg);
    118 
    119       template<bool __icase, bool __collate>
    120 	void
    121 	_M_expression_term(_BracketMatcher<_TraitsT, __icase, __collate>&
    122 			   __matcher);
    123 
    124       int
    125       _M_cur_int_value(int __radix);
    126 
    127       bool
    128       _M_try_char();
    129 
    130       _StateSeqT
    131       _M_pop()
    132       {
    133 	auto ret = _M_stack.top();
    134 	_M_stack.pop();
    135 	return ret;
    136       }
    137 
    138       _FlagT          _M_flags;
    139       const _TraitsT& _M_traits;
    140       const _CtypeT&  _M_ctype;
    141       _ScannerT       _M_scanner;
    142       _RegexT         _M_nfa;
    143       _StringT        _M_value;
    144       _StackT         _M_stack;
    145     };
    146 
    147   template<typename _TraitsT>
    148     inline std::shared_ptr<_NFA<_TraitsT>>
    149     __compile_nfa(const typename _TraitsT::char_type* __first,
    150 		  const typename _TraitsT::char_type* __last,
    151 		  const _TraitsT& __traits,
    152 		  regex_constants::syntax_option_type __flags)
    153     {
    154       using _Cmplr = _Compiler<_TraitsT>;
    155       return _Cmplr(__first, __last, __traits, __flags)._M_get_nfa();
    156     }
    157 
    158   // [28.13.14]
    159   template<typename _TraitsT, bool __icase, bool __collate>
    160     class _RegexTranslator
    161     {
    162     public:
    163       typedef typename _TraitsT::char_type	      _CharT;
    164       typedef typename _TraitsT::string_type	      _StringT;
    165       typedef typename std::conditional<__collate,
    166 					_StringT,
    167 					_CharT>::type _StrTransT;
    168 
    169       explicit
    170       _RegexTranslator(const _TraitsT& __traits)
    171       : _M_traits(__traits)
    172       { }
    173 
    174       _CharT
    175       _M_translate(_CharT __ch) const
    176       {
    177 	if (__icase)
    178 	  return _M_traits.translate_nocase(__ch);
    179 	else if (__collate)
    180 	  return _M_traits.translate(__ch);
    181 	else
    182 	  return __ch;
    183       }
    184 
    185       _StrTransT
    186       _M_transform(_CharT __ch) const
    187       {
    188 	return _M_transform_impl(__ch, typename integral_constant<bool,
    189 				 __collate>::type());
    190       }
    191 
    192     private:
    193       _StrTransT
    194       _M_transform_impl(_CharT __ch, false_type) const
    195       { return __ch; }
    196 
    197       _StrTransT
    198       _M_transform_impl(_CharT __ch, true_type) const
    199       {
    200 	_StrTransT __str = _StrTransT(1, _M_translate(__ch));
    201 	return _M_traits.transform(__str.begin(), __str.end());
    202       }
    203 
    204       const _TraitsT& _M_traits;
    205     };
    206 
    207   template<typename _TraitsT>
    208     class _RegexTranslator<_TraitsT, false, false>
    209     {
    210     public:
    211       typedef typename _TraitsT::char_type _CharT;
    212       typedef _CharT                       _StrTransT;
    213 
    214       explicit
    215       _RegexTranslator(const _TraitsT& __traits)
    216       { }
    217 
    218       _CharT
    219       _M_translate(_CharT __ch) const
    220       { return __ch; }
    221 
    222       _StrTransT
    223       _M_transform(_CharT __ch) const
    224       { return __ch; }
    225     };
    226 
    227   template<typename _TraitsT, bool __is_ecma, bool __icase, bool __collate>
    228     struct _AnyMatcher;
    229 
    230   template<typename _TraitsT, bool __icase, bool __collate>
    231     struct _AnyMatcher<_TraitsT, false, __icase, __collate>
    232     {
    233       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
    234       typedef typename _TransT::_CharT                       _CharT;
    235 
    236       explicit
    237       _AnyMatcher(const _TraitsT& __traits)
    238       : _M_translator(__traits)
    239       { }
    240 
    241       bool
    242       operator()(_CharT __ch) const
    243       {
    244 	static auto __nul = _M_translator._M_translate('\0');
    245 	return _M_translator._M_translate(__ch) != __nul;
    246       }
    247 
    248       _TransT _M_translator;
    249     };
    250 
    251   template<typename _TraitsT, bool __icase, bool __collate>
    252     struct _AnyMatcher<_TraitsT, true, __icase, __collate>
    253     {
    254       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
    255       typedef typename _TransT::_CharT                       _CharT;
    256 
    257       explicit
    258       _AnyMatcher(const _TraitsT& __traits)
    259       : _M_translator(__traits)
    260       { }
    261 
    262       bool
    263       operator()(_CharT __ch) const
    264       { return _M_apply(__ch, typename is_same<_CharT, char>::type()); }
    265 
    266       bool
    267       _M_apply(_CharT __ch, true_type) const
    268       {
    269 	auto __c = _M_translator._M_translate(__ch);
    270 	auto __n = _M_translator._M_translate('\n');
    271 	auto __r = _M_translator._M_translate('\r');
    272 	return __c != __n && __c != __r;
    273       }
    274 
    275       bool
    276       _M_apply(_CharT __ch, false_type) const
    277       {
    278 	auto __c = _M_translator._M_translate(__ch);
    279 	auto __n = _M_translator._M_translate('\n');
    280 	auto __r = _M_translator._M_translate('\r');
    281 	auto __u2028 = _M_translator._M_translate(u'\u2028');
    282 	auto __u2029 = _M_translator._M_translate(u'\u2029');
    283 	return __c != __n && __c != __r && __c != __u2028 && __c != __u2029;
    284       }
    285 
    286       _TransT _M_translator;
    287     };
    288 
    289   template<typename _TraitsT, bool __icase, bool __collate>
    290     struct _CharMatcher
    291     {
    292       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
    293       typedef typename _TransT::_CharT                       _CharT;
    294 
    295       _CharMatcher(_CharT __ch, const _TraitsT& __traits)
    296       : _M_translator(__traits), _M_ch(_M_translator._M_translate(__ch))
    297       { }
    298 
    299       bool
    300       operator()(_CharT __ch) const
    301       { return _M_ch == _M_translator._M_translate(__ch); }
    302 
    303       _TransT _M_translator;
    304       _CharT  _M_ch;
    305     };
    306 
    307   /// Matches a character range (bracket expression)
    308   template<typename _TraitsT, bool __icase, bool __collate>
    309     struct _BracketMatcher
    310     {
    311     public:
    312       typedef _RegexTranslator<_TraitsT, __icase, __collate> _TransT;
    313       typedef typename _TransT::_CharT                       _CharT;
    314       typedef typename _TransT::_StrTransT                   _StrTransT;
    315       typedef typename _TraitsT::string_type                 _StringT;
    316       typedef typename _TraitsT::char_class_type             _CharClassT;
    317 
    318     public:
    319       _BracketMatcher(bool __is_non_matching,
    320 		      const _TraitsT& __traits)
    321       : _M_class_set(0), _M_translator(__traits), _M_traits(__traits),
    322       _M_is_non_matching(__is_non_matching)
    323 #ifdef _GLIBCXX_DEBUG
    324       , _M_is_ready(false)
    325 #endif
    326       { }
    327 
    328       bool
    329       operator()(_CharT __ch) const
    330       {
    331 	_GLIBCXX_DEBUG_ASSERT(_M_is_ready);
    332 	return _M_apply(__ch, _IsChar());
    333       }
    334 
    335       void
    336       _M_add_char(_CharT __c)
    337       {
    338 	_M_char_set.push_back(_M_translator._M_translate(__c));
    339 #ifdef _GLIBCXX_DEBUG
    340 	_M_is_ready = false;
    341 #endif
    342       }
    343 
    344       void
    345       _M_add_collating_element(const _StringT& __s)
    346       {
    347 	auto __st = _M_traits.lookup_collatename(__s.data(),
    348 						 __s.data() + __s.size());
    349 	if (__st.empty())
    350 	  __throw_regex_error(regex_constants::error_collate);
    351 	_M_char_set.push_back(_M_translator._M_translate(__st[0]));
    352 #ifdef _GLIBCXX_DEBUG
    353 	_M_is_ready = false;
    354 #endif
    355       }
    356 
    357       void
    358       _M_add_equivalence_class(const _StringT& __s)
    359       {
    360 	auto __st = _M_traits.lookup_collatename(__s.data(),
    361 						 __s.data() + __s.size());
    362 	if (__st.empty())
    363 	  __throw_regex_error(regex_constants::error_collate);
    364 	__st = _M_traits.transform_primary(__st.data(),
    365 					   __st.data() + __st.size());
    366 	_M_equiv_set.push_back(__st);
    367 #ifdef _GLIBCXX_DEBUG
    368 	_M_is_ready = false;
    369 #endif
    370       }
    371 
    372       // __neg should be true for \D, \S and \W only.
    373       void
    374       _M_add_character_class(const _StringT& __s, bool __neg)
    375       {
    376 	auto __mask = _M_traits.lookup_classname(__s.data(),
    377 						 __s.data() + __s.size(),
    378 						 __icase);
    379 	if (__mask == 0)
    380 	  __throw_regex_error(regex_constants::error_ctype);
    381 	if (!__neg)
    382 	  _M_class_set |= __mask;
    383 	else
    384 	  _M_neg_class_set.push_back(__mask);
    385 #ifdef _GLIBCXX_DEBUG
    386 	_M_is_ready = false;
    387 #endif
    388       }
    389 
    390       void
    391       _M_make_range(_CharT __l, _CharT __r)
    392       {
    393 	_M_range_set.push_back(make_pair(_M_translator._M_transform(__l),
    394 					 _M_translator._M_transform(__r)));
    395 #ifdef _GLIBCXX_DEBUG
    396 	_M_is_ready = false;
    397 #endif
    398       }
    399 
    400       void
    401       _M_ready()
    402       {
    403 	_M_make_cache(_IsChar());
    404 #ifdef _GLIBCXX_DEBUG
    405 	_M_is_ready = true;
    406 #endif
    407       }
    408 
    409     private:
    410       typedef typename is_same<_CharT, char>::type _IsChar;
    411       struct _Dummy { };
    412       typedef typename conditional<_IsChar::value,
    413 				   std::bitset<1 << (8 * sizeof(_CharT))>,
    414 				   _Dummy>::type _CacheT;
    415       typedef typename make_unsigned<_CharT>::type _UnsignedCharT;
    416 
    417     private:
    418       bool
    419       _M_apply(_CharT __ch, false_type) const;
    420 
    421       bool
    422       _M_apply(_CharT __ch, true_type) const
    423       { return _M_cache[static_cast<_UnsignedCharT>(__ch)]; }
    424 
    425       void
    426       _M_make_cache(true_type)
    427       {
    428 	for (int __i = 0; __i < _M_cache.size(); __i++)
    429 	  _M_cache[static_cast<_UnsignedCharT>(__i)] =
    430 	    _M_apply(__i, false_type());
    431       }
    432 
    433       void
    434       _M_make_cache(false_type)
    435       { }
    436 
    437     private:
    438       _CacheT                                   _M_cache;
    439       std::vector<_CharT>                       _M_char_set;
    440       std::vector<_StringT>                     _M_equiv_set;
    441       std::vector<pair<_StrTransT, _StrTransT>> _M_range_set;
    442       std::vector<_CharClassT>                  _M_neg_class_set;
    443       _CharClassT                               _M_class_set;
    444       _TransT                                   _M_translator;
    445       const _TraitsT&                           _M_traits;
    446       bool                                      _M_is_non_matching;
    447 #ifdef _GLIBCXX_DEBUG
    448       bool                                      _M_is_ready;
    449 #endif
    450     };
    451 
    452  //@} regex-detail
    453 _GLIBCXX_END_NAMESPACE_VERSION
    454 } // namespace __detail
    455 } // namespace std
    456 
    457 #include <bits/regex_compiler.tcc>
    458