1 //===-------------------------- regex.cpp ---------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is dual licensed under the MIT and the University of Illinois Open 6 // Source Licenses. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "regex" 11 #include "algorithm" 12 #include "iterator" 13 14 _LIBCPP_BEGIN_NAMESPACE_STD 15 16 static 17 const char* 18 make_error_type_string(regex_constants::error_type ecode) 19 { 20 switch (ecode) 21 { 22 case regex_constants::error_collate: 23 return "The expression contained an invalid collating element name."; 24 case regex_constants::error_ctype: 25 return "The expression contained an invalid character class name."; 26 case regex_constants::error_escape: 27 return "The expression contained an invalid escaped character, or a " 28 "trailing escape."; 29 case regex_constants::error_backref: 30 return "The expression contained an invalid back reference."; 31 case regex_constants::error_brack: 32 return "The expression contained mismatched [ and ]."; 33 case regex_constants::error_paren: 34 return "The expression contained mismatched ( and )."; 35 case regex_constants::error_brace: 36 return "The expression contained mismatched { and }."; 37 case regex_constants::error_badbrace: 38 return "The expression contained an invalid range in a {} expression."; 39 case regex_constants::error_range: 40 return "The expression contained an invalid character range, " 41 "such as [b-a] in most encodings."; 42 case regex_constants::error_space: 43 return "There was insufficient memory to convert the expression into " 44 "a finite state machine."; 45 case regex_constants::error_badrepeat: 46 return "One of *?+{ was not preceded by a valid regular expression."; 47 case regex_constants::error_complexity: 48 return "The complexity of an attempted match against a regular " 49 "expression exceeded a pre-set level."; 50 case regex_constants::error_stack: 51 return "There was insufficient memory to determine whether the regular " 52 "expression could match the specified character sequence."; 53 case regex_constants::__re_err_grammar: 54 return "An invalid regex grammar has been requested."; 55 case regex_constants::__re_err_empty: 56 return "An empty regex is not allowed in the POSIX grammar."; 57 default: 58 break; 59 } 60 return "Unknown error type"; 61 } 62 63 regex_error::regex_error(regex_constants::error_type ecode) 64 : runtime_error(make_error_type_string(ecode)), 65 __code_(ecode) 66 {} 67 68 regex_error::~regex_error() throw() {} 69 70 namespace { 71 72 struct collationnames 73 { 74 const char* elem_; 75 char char_; 76 }; 77 78 const collationnames collatenames[] = 79 { 80 {"A", 0x41}, 81 {"B", 0x42}, 82 {"C", 0x43}, 83 {"D", 0x44}, 84 {"E", 0x45}, 85 {"F", 0x46}, 86 {"G", 0x47}, 87 {"H", 0x48}, 88 {"I", 0x49}, 89 {"J", 0x4a}, 90 {"K", 0x4b}, 91 {"L", 0x4c}, 92 {"M", 0x4d}, 93 {"N", 0x4e}, 94 {"NUL", 0x00}, 95 {"O", 0x4f}, 96 {"P", 0x50}, 97 {"Q", 0x51}, 98 {"R", 0x52}, 99 {"S", 0x53}, 100 {"T", 0x54}, 101 {"U", 0x55}, 102 {"V", 0x56}, 103 {"W", 0x57}, 104 {"X", 0x58}, 105 {"Y", 0x59}, 106 {"Z", 0x5a}, 107 {"a", 0x61}, 108 {"alert", 0x07}, 109 {"ampersand", 0x26}, 110 {"apostrophe", 0x27}, 111 {"asterisk", 0x2a}, 112 {"b", 0x62}, 113 {"backslash", 0x5c}, 114 {"backspace", 0x08}, 115 {"c", 0x63}, 116 {"carriage-return", 0x0d}, 117 {"circumflex", 0x5e}, 118 {"circumflex-accent", 0x5e}, 119 {"colon", 0x3a}, 120 {"comma", 0x2c}, 121 {"commercial-at", 0x40}, 122 {"d", 0x64}, 123 {"dollar-sign", 0x24}, 124 {"e", 0x65}, 125 {"eight", 0x38}, 126 {"equals-sign", 0x3d}, 127 {"exclamation-mark", 0x21}, 128 {"f", 0x66}, 129 {"five", 0x35}, 130 {"form-feed", 0x0c}, 131 {"four", 0x34}, 132 {"full-stop", 0x2e}, 133 {"g", 0x67}, 134 {"grave-accent", 0x60}, 135 {"greater-than-sign", 0x3e}, 136 {"h", 0x68}, 137 {"hyphen", 0x2d}, 138 {"hyphen-minus", 0x2d}, 139 {"i", 0x69}, 140 {"j", 0x6a}, 141 {"k", 0x6b}, 142 {"l", 0x6c}, 143 {"left-brace", 0x7b}, 144 {"left-curly-bracket", 0x7b}, 145 {"left-parenthesis", 0x28}, 146 {"left-square-bracket", 0x5b}, 147 {"less-than-sign", 0x3c}, 148 {"low-line", 0x5f}, 149 {"m", 0x6d}, 150 {"n", 0x6e}, 151 {"newline", 0x0a}, 152 {"nine", 0x39}, 153 {"number-sign", 0x23}, 154 {"o", 0x6f}, 155 {"one", 0x31}, 156 {"p", 0x70}, 157 {"percent-sign", 0x25}, 158 {"period", 0x2e}, 159 {"plus-sign", 0x2b}, 160 {"q", 0x71}, 161 {"question-mark", 0x3f}, 162 {"quotation-mark", 0x22}, 163 {"r", 0x72}, 164 {"reverse-solidus", 0x5c}, 165 {"right-brace", 0x7d}, 166 {"right-curly-bracket", 0x7d}, 167 {"right-parenthesis", 0x29}, 168 {"right-square-bracket", 0x5d}, 169 {"s", 0x73}, 170 {"semicolon", 0x3b}, 171 {"seven", 0x37}, 172 {"six", 0x36}, 173 {"slash", 0x2f}, 174 {"solidus", 0x2f}, 175 {"space", 0x20}, 176 {"t", 0x74}, 177 {"tab", 0x09}, 178 {"three", 0x33}, 179 {"tilde", 0x7e}, 180 {"two", 0x32}, 181 {"u", 0x75}, 182 {"underscore", 0x5f}, 183 {"v", 0x76}, 184 {"vertical-line", 0x7c}, 185 {"vertical-tab", 0x0b}, 186 {"w", 0x77}, 187 {"x", 0x78}, 188 {"y", 0x79}, 189 {"z", 0x7a}, 190 {"zero", 0x30} 191 }; 192 193 struct classnames 194 { 195 const char* elem_; 196 regex_traits<char>::char_class_type mask_; 197 }; 198 199 const classnames ClassNames[] = 200 { 201 {"alnum", ctype_base::alnum}, 202 {"alpha", ctype_base::alpha}, 203 {"blank", ctype_base::blank}, 204 {"cntrl", ctype_base::cntrl}, 205 {"d", ctype_base::digit}, 206 {"digit", ctype_base::digit}, 207 {"graph", ctype_base::graph}, 208 {"lower", ctype_base::lower}, 209 {"print", ctype_base::print}, 210 {"punct", ctype_base::punct}, 211 {"s", ctype_base::space}, 212 {"space", ctype_base::space}, 213 {"upper", ctype_base::upper}, 214 {"w", regex_traits<char>::__regex_word}, 215 {"xdigit", ctype_base::xdigit} 216 }; 217 218 struct use_strcmp 219 { 220 bool operator()(const collationnames& x, const char* y) 221 {return strcmp(x.elem_, y) < 0;} 222 bool operator()(const classnames& x, const char* y) 223 {return strcmp(x.elem_, y) < 0;} 224 }; 225 226 } 227 228 string 229 __get_collation_name(const char* s) 230 { 231 const collationnames* i = 232 _VSTD::lower_bound(begin(collatenames), end(collatenames), s, use_strcmp()); 233 string r; 234 if (i != end(collatenames) && strcmp(s, i->elem_) == 0) 235 r = char(i->char_); 236 return r; 237 } 238 239 regex_traits<char>::char_class_type 240 __get_classname(const char* s, bool __icase) 241 { 242 const classnames* i = 243 _VSTD::lower_bound(begin(ClassNames), end(ClassNames), s, use_strcmp()); 244 regex_traits<char>::char_class_type r = 0; 245 if (i != end(ClassNames) && strcmp(s, i->elem_) == 0) 246 { 247 r = i->mask_; 248 if (r == regex_traits<char>::__regex_word) 249 r |= ctype_base::alnum | ctype_base::upper | ctype_base::lower; 250 else if (__icase) 251 { 252 if (r & (ctype_base::lower | ctype_base::upper)) 253 r |= ctype_base::alpha; 254 } 255 } 256 return r; 257 } 258 259 template <> 260 void 261 __match_any_but_newline<char>::__exec(__state& __s) const 262 { 263 if (__s.__current_ != __s.__last_) 264 { 265 switch (*__s.__current_) 266 { 267 case '\r': 268 case '\n': 269 __s.__do_ = __state::__reject; 270 __s.__node_ = nullptr; 271 break; 272 default: 273 __s.__do_ = __state::__accept_and_consume; 274 ++__s.__current_; 275 __s.__node_ = this->first(); 276 break; 277 } 278 } 279 else 280 { 281 __s.__do_ = __state::__reject; 282 __s.__node_ = nullptr; 283 } 284 } 285 286 template <> 287 void 288 __match_any_but_newline<wchar_t>::__exec(__state& __s) const 289 { 290 if (__s.__current_ != __s.__last_) 291 { 292 switch (*__s.__current_) 293 { 294 case '\r': 295 case '\n': 296 case 0x2028: 297 case 0x2029: 298 __s.__do_ = __state::__reject; 299 __s.__node_ = nullptr; 300 break; 301 default: 302 __s.__do_ = __state::__accept_and_consume; 303 ++__s.__current_; 304 __s.__node_ = this->first(); 305 break; 306 } 307 } 308 else 309 { 310 __s.__do_ = __state::__reject; 311 __s.__node_ = nullptr; 312 } 313 } 314 315 _LIBCPP_END_NAMESPACE_STD 316