1 // Copyright 2016 the V8 project authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "src/ostreams.h" 6 #include "src/regexp/regexp-ast.h" 7 8 namespace v8 { 9 namespace internal { 10 11 #define MAKE_ACCEPT(Name) \ 12 void* RegExp##Name::Accept(RegExpVisitor* visitor, void* data) { \ 13 return visitor->Visit##Name(this, data); \ 14 } 15 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_ACCEPT) 16 #undef MAKE_ACCEPT 17 18 #define MAKE_TYPE_CASE(Name) \ 19 RegExp##Name* RegExpTree::As##Name() { return NULL; } \ 20 bool RegExpTree::Is##Name() { return false; } 21 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) 22 #undef MAKE_TYPE_CASE 23 24 #define MAKE_TYPE_CASE(Name) \ 25 RegExp##Name* RegExp##Name::As##Name() { return this; } \ 26 bool RegExp##Name::Is##Name() { return true; } 27 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_TYPE_CASE) 28 #undef MAKE_TYPE_CASE 29 30 31 static Interval ListCaptureRegisters(ZoneList<RegExpTree*>* children) { 32 Interval result = Interval::Empty(); 33 for (int i = 0; i < children->length(); i++) 34 result = result.Union(children->at(i)->CaptureRegisters()); 35 return result; 36 } 37 38 39 Interval RegExpAlternative::CaptureRegisters() { 40 return ListCaptureRegisters(nodes()); 41 } 42 43 44 Interval RegExpDisjunction::CaptureRegisters() { 45 return ListCaptureRegisters(alternatives()); 46 } 47 48 49 Interval RegExpLookaround::CaptureRegisters() { 50 return body()->CaptureRegisters(); 51 } 52 53 54 Interval RegExpCapture::CaptureRegisters() { 55 Interval self(StartRegister(index()), EndRegister(index())); 56 return self.Union(body()->CaptureRegisters()); 57 } 58 59 60 Interval RegExpQuantifier::CaptureRegisters() { 61 return body()->CaptureRegisters(); 62 } 63 64 65 bool RegExpAssertion::IsAnchoredAtStart() { 66 return assertion_type() == RegExpAssertion::START_OF_INPUT; 67 } 68 69 70 bool RegExpAssertion::IsAnchoredAtEnd() { 71 return assertion_type() == RegExpAssertion::END_OF_INPUT; 72 } 73 74 75 bool RegExpAlternative::IsAnchoredAtStart() { 76 ZoneList<RegExpTree*>* nodes = this->nodes(); 77 for (int i = 0; i < nodes->length(); i++) { 78 RegExpTree* node = nodes->at(i); 79 if (node->IsAnchoredAtStart()) { 80 return true; 81 } 82 if (node->max_match() > 0) { 83 return false; 84 } 85 } 86 return false; 87 } 88 89 90 bool RegExpAlternative::IsAnchoredAtEnd() { 91 ZoneList<RegExpTree*>* nodes = this->nodes(); 92 for (int i = nodes->length() - 1; i >= 0; i--) { 93 RegExpTree* node = nodes->at(i); 94 if (node->IsAnchoredAtEnd()) { 95 return true; 96 } 97 if (node->max_match() > 0) { 98 return false; 99 } 100 } 101 return false; 102 } 103 104 105 bool RegExpDisjunction::IsAnchoredAtStart() { 106 ZoneList<RegExpTree*>* alternatives = this->alternatives(); 107 for (int i = 0; i < alternatives->length(); i++) { 108 if (!alternatives->at(i)->IsAnchoredAtStart()) return false; 109 } 110 return true; 111 } 112 113 114 bool RegExpDisjunction::IsAnchoredAtEnd() { 115 ZoneList<RegExpTree*>* alternatives = this->alternatives(); 116 for (int i = 0; i < alternatives->length(); i++) { 117 if (!alternatives->at(i)->IsAnchoredAtEnd()) return false; 118 } 119 return true; 120 } 121 122 123 bool RegExpLookaround::IsAnchoredAtStart() { 124 return is_positive() && type() == LOOKAHEAD && body()->IsAnchoredAtStart(); 125 } 126 127 128 bool RegExpCapture::IsAnchoredAtStart() { return body()->IsAnchoredAtStart(); } 129 130 131 bool RegExpCapture::IsAnchoredAtEnd() { return body()->IsAnchoredAtEnd(); } 132 133 134 // Convert regular expression trees to a simple sexp representation. 135 // This representation should be different from the input grammar 136 // in as many cases as possible, to make it more difficult for incorrect 137 // parses to look as correct ones which is likely if the input and 138 // output formats are alike. 139 class RegExpUnparser final : public RegExpVisitor { 140 public: 141 RegExpUnparser(std::ostream& os, Zone* zone) : os_(os), zone_(zone) {} 142 void VisitCharacterRange(CharacterRange that); 143 #define MAKE_CASE(Name) void* Visit##Name(RegExp##Name*, void* data) override; 144 FOR_EACH_REG_EXP_TREE_TYPE(MAKE_CASE) 145 #undef MAKE_CASE 146 private: 147 std::ostream& os_; 148 Zone* zone_; 149 }; 150 151 152 void* RegExpUnparser::VisitDisjunction(RegExpDisjunction* that, void* data) { 153 os_ << "(|"; 154 for (int i = 0; i < that->alternatives()->length(); i++) { 155 os_ << " "; 156 that->alternatives()->at(i)->Accept(this, data); 157 } 158 os_ << ")"; 159 return NULL; 160 } 161 162 163 void* RegExpUnparser::VisitAlternative(RegExpAlternative* that, void* data) { 164 os_ << "(:"; 165 for (int i = 0; i < that->nodes()->length(); i++) { 166 os_ << " "; 167 that->nodes()->at(i)->Accept(this, data); 168 } 169 os_ << ")"; 170 return NULL; 171 } 172 173 174 void RegExpUnparser::VisitCharacterRange(CharacterRange that) { 175 os_ << AsUC16(that.from()); 176 if (!that.IsSingleton()) { 177 os_ << "-" << AsUC16(that.to()); 178 } 179 } 180 181 182 void* RegExpUnparser::VisitCharacterClass(RegExpCharacterClass* that, 183 void* data) { 184 if (that->is_negated()) os_ << "^"; 185 os_ << "["; 186 for (int i = 0; i < that->ranges(zone_)->length(); i++) { 187 if (i > 0) os_ << " "; 188 VisitCharacterRange(that->ranges(zone_)->at(i)); 189 } 190 os_ << "]"; 191 return NULL; 192 } 193 194 195 void* RegExpUnparser::VisitAssertion(RegExpAssertion* that, void* data) { 196 switch (that->assertion_type()) { 197 case RegExpAssertion::START_OF_INPUT: 198 os_ << "@^i"; 199 break; 200 case RegExpAssertion::END_OF_INPUT: 201 os_ << "@$i"; 202 break; 203 case RegExpAssertion::START_OF_LINE: 204 os_ << "@^l"; 205 break; 206 case RegExpAssertion::END_OF_LINE: 207 os_ << "@$l"; 208 break; 209 case RegExpAssertion::BOUNDARY: 210 os_ << "@b"; 211 break; 212 case RegExpAssertion::NON_BOUNDARY: 213 os_ << "@B"; 214 break; 215 } 216 return NULL; 217 } 218 219 220 void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) { 221 os_ << "'"; 222 Vector<const uc16> chardata = that->data(); 223 for (int i = 0; i < chardata.length(); i++) { 224 os_ << AsUC16(chardata[i]); 225 } 226 os_ << "'"; 227 return NULL; 228 } 229 230 231 void* RegExpUnparser::VisitText(RegExpText* that, void* data) { 232 if (that->elements()->length() == 1) { 233 that->elements()->at(0).tree()->Accept(this, data); 234 } else { 235 os_ << "(!"; 236 for (int i = 0; i < that->elements()->length(); i++) { 237 os_ << " "; 238 that->elements()->at(i).tree()->Accept(this, data); 239 } 240 os_ << ")"; 241 } 242 return NULL; 243 } 244 245 246 void* RegExpUnparser::VisitQuantifier(RegExpQuantifier* that, void* data) { 247 os_ << "(# " << that->min() << " "; 248 if (that->max() == RegExpTree::kInfinity) { 249 os_ << "- "; 250 } else { 251 os_ << that->max() << " "; 252 } 253 os_ << (that->is_greedy() ? "g " : that->is_possessive() ? "p " : "n "); 254 that->body()->Accept(this, data); 255 os_ << ")"; 256 return NULL; 257 } 258 259 260 void* RegExpUnparser::VisitCapture(RegExpCapture* that, void* data) { 261 os_ << "(^ "; 262 that->body()->Accept(this, data); 263 os_ << ")"; 264 return NULL; 265 } 266 267 268 void* RegExpUnparser::VisitLookaround(RegExpLookaround* that, void* data) { 269 os_ << "("; 270 os_ << (that->type() == RegExpLookaround::LOOKAHEAD ? "->" : "<-"); 271 os_ << (that->is_positive() ? " + " : " - "); 272 that->body()->Accept(this, data); 273 os_ << ")"; 274 return NULL; 275 } 276 277 278 void* RegExpUnparser::VisitBackReference(RegExpBackReference* that, 279 void* data) { 280 os_ << "(<- " << that->index() << ")"; 281 return NULL; 282 } 283 284 285 void* RegExpUnparser::VisitEmpty(RegExpEmpty* that, void* data) { 286 os_ << '%'; 287 return NULL; 288 } 289 290 291 std::ostream& RegExpTree::Print(std::ostream& os, Zone* zone) { // NOLINT 292 RegExpUnparser unparser(os, zone); 293 Accept(&unparser, NULL); 294 return os; 295 } 296 297 298 RegExpDisjunction::RegExpDisjunction(ZoneList<RegExpTree*>* alternatives) 299 : alternatives_(alternatives) { 300 DCHECK(alternatives->length() > 1); 301 RegExpTree* first_alternative = alternatives->at(0); 302 min_match_ = first_alternative->min_match(); 303 max_match_ = first_alternative->max_match(); 304 for (int i = 1; i < alternatives->length(); i++) { 305 RegExpTree* alternative = alternatives->at(i); 306 min_match_ = Min(min_match_, alternative->min_match()); 307 max_match_ = Max(max_match_, alternative->max_match()); 308 } 309 } 310 311 312 static int IncreaseBy(int previous, int increase) { 313 if (RegExpTree::kInfinity - previous < increase) { 314 return RegExpTree::kInfinity; 315 } else { 316 return previous + increase; 317 } 318 } 319 320 321 RegExpAlternative::RegExpAlternative(ZoneList<RegExpTree*>* nodes) 322 : nodes_(nodes) { 323 DCHECK(nodes->length() > 1); 324 min_match_ = 0; 325 max_match_ = 0; 326 for (int i = 0; i < nodes->length(); i++) { 327 RegExpTree* node = nodes->at(i); 328 int node_min_match = node->min_match(); 329 min_match_ = IncreaseBy(min_match_, node_min_match); 330 int node_max_match = node->max_match(); 331 max_match_ = IncreaseBy(max_match_, node_max_match); 332 } 333 } 334 335 336 } // namespace internal 337 } // namespace v8 338