Home | History | Annotate | Download | only in re2

Lines Matching refs:Regexp

9 // of the ParseState class.  The Regexp::Parse function is
17 // See regexp.h for rationale.
20 #include "re2/regexp.h"
29 // Regexp pointers called the stack. Left parenthesis and vertical
40 // form LeftParen regexp VerticalBar regexp VerticalBar ... regexp VerticalBar.
44 class Regexp::ParseState {
60 bool PushRegexp(Regexp* re);
65 // Pushes a regexp with the given op (and no args) onto the stack.
80 // Pushes a repeat operator regexp onto the stack.
85 // Pushes a repetition regexp onto the stack.
89 // Checks whether a particular regexp op is a marker.
103 // Processes the end of input, returning the final regexp.
104 Regexp* DoFinish();
106 // Finishes the regexp if necessary, preparing it for use
109 Regexp* FinishRegexp(Regexp*);
113 // ParseCharClass also manipulates the internals of Regexp
118 bool ParseCharClass(StringPiece* s, Regexp** out_re,
138 // collapsing it into a single regexp on the stack.
142 // collapsing it to a single regexp on the stack.
155 Regexp* stacktop_;
166 Regexp::ParseState::ParseState(ParseFlags flags,
178 Regexp::ParseState::~ParseState() {
179 Regexp* next;
180 for (Regexp* re = stacktop_; re != NULL; re = next) {
189 // Finishes the regexp if necessary, preparing it for use in
192 Regexp* Regexp::ParseState::FinishRegexp(Regexp* re) {
209 bool Regexp::ParseState::PushRegexp(Regexp* re) {
221 re = new Regexp(kRegexpLiteral, flags_);
227 re = new Regexp(kRegexpLiteral, flags_ | FoldCase);
368 bool Regexp::ParseState::PushLiteral(Rune r) {
371 Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
386 return PushRegexp(new Regexp(kRegexpNoMatch, flags_));
392 Regexp* re = new Regexp(kRegexpLiteral, flags_);
398 bool Regexp::ParseState::PushCarat() {
406 bool Regexp::ParseState::PushWordBoundary(bool word) {
413 bool Regexp::ParseState::PushDollar() {
417 Regexp::ParseFlags oflags = flags_;
427 bool Regexp::ParseState::PushDot() {
431 Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
438 // Pushes a regexp with the given op (and no args) onto the stack.
439 bool Regexp::ParseState::PushSimpleOp(RegexpOp op) {
440 Regexp* re = new Regexp(op, flags_);
444 // Pushes a repeat operator regexp onto the stack.
447 bool Regexp::ParseState::PushRepeatOp(RegexpOp op, const StringPiece& s,
454 Regexp::ParseFlags fl = flags_;
457 Regexp* re = new Regexp(op, fl);
466 // Pushes a repetition regexp onto the stack.
468 bool Regexp::ParseState::PushRepetition(int min, int max,
481 Regexp::ParseFlags fl = flags_;
484 Regexp* re = new Regexp(kRegexpRepeat, fl);
496 // Checks whether a particular regexp op is a marker.
497 bool Regexp::ParseState::IsMarker(RegexpOp op) {
503 bool Regexp::ParseState::DoLeftParen(const StringPiece& name) {
504 Regexp* re = new Regexp(kLeftParen, flags_);
512 bool Regexp::ParseState::DoLeftParenNoCapture() {
513 Regexp* re = new Regexp(kLeftParen, flags_);
526 bool Regexp::ParseState::DoVerticalBar() {
535 Regexp* r1;
536 Regexp* r2;
542 Regexp* r3;
554 AddLiteral(r3->ccb_, rune, r3->parse_flags_ & Regexp::FoldCase);
559 r1->parse_flags_ & Regexp::FoldCase);
588 bool Regexp::ParseState::DoRightParen() {
592 // The stack should be: LeftParen regexp
593 // Remove the LeftParen, leaving the regexp,
595 Regexp* r1;
596 Regexp* r2;
609 Regexp* re = r2;
626 // Processes the end of input, returning the final regexp.
627 Regexp* Regexp::ParseState::DoFinish() {
629 Regexp* re = stacktop_;
639 // Returns the leading regexp that re starts with.
640 // The returned Regexp* points into a piece of re,
642 Regexp* Regexp::LeadingRegexp(Regexp* re) {
646 Regexp** sub = re->sub();
658 Regexp* Regexp::RemoveLeadingRegexp(Regexp* re) {
662 Regexp** sub = re->sub();
668 // Collapse concatenation to single regexp.
669 Regexp* nre = sub[1];
679 Regexp::ParseFlags pf = re->parse_flags();
681 return new Regexp(kRegexpEmptyMatch, pf);
687 Rune* Regexp::LeadingString(Regexp* re, int *nrune,
688 Regexp::ParseFlags *flags) {
692 *flags = static_cast<Regexp::ParseFlags>(re->parse_flags_ & Regexp::FoldCase);
710 void Regexp::RemoveLeadingString(Regexp* re, int n) {
716 Regexp* stk[4];
750 Regexp** sub = re->sub();
766 Regexp* old = sub[1];
803 int Regexp::FactorAlternation(
804 Regexp** sub, int n,
805 Regexp::ParseFlags altflags) {
810 int Regexp::FactorAlternationRecursive(
811 Regexp** sub, int n,
812 Regexp::ParseFlags altflags,
821 Regexp::ParseFlags runeflags = Regexp::NoParseFlags;
833 Regexp::ParseFlags runeflags_i = Regexp::NoParseFlags;
860 Regexp* x[2]; // x[0] = prefix, x[1] = suffix1|suffix2|...
885 Regexp* first = NULL;
892 Regexp* first_i = NULL;
895 if (first != NULL && Regexp::Equal(first, first_i)) {
900 // Found end of a run with common leading regexp:
903 // Factor out common regexp and append factored expression to sub[0:out].
911 Regexp* x[2]; // x[0] = prefix, x[1] = suffix1|suffix2|...
954 Regexp* re = sub[j];
997 void Regexp::ParseState::DoCollapse(RegexpOp op) {
1000 Regexp* next = NULL;
1001 Regexp* sub;
1016 Regexp** subs = new Regexp*[n];
1022 Regexp** sub_subs = sub->sub();
1031 Regexp* re = ConcatOrAlternate(op, subs, n, flags_, true);
1039 // collapsing it into a single regexp on the stack.
1040 void Regexp::ParseState::DoConcatenation() {
1041 Regexp* r1 = stacktop_;
1044 Regexp* re = new Regexp(kRegexpEmptyMatch, flags_);
1051 // collapsing it to a single regexp on the stack.
1052 void Regexp::ParseState::DoAlternation() {
1055 Regexp* r1 = stacktop_;
1066 // Only called when another regexp is about to be pushed
1071 bool Regexp::ParseState::MaybeConcatString(int r, ParseFlags flags) {
1072 Regexp* re1;
1073 Regexp* re2;
1121 // Sets *out_re to the regexp for the class.
1147 // The Maybe in the name signifies that the regexp parse
1362 // when in POSIX regexp mode. Surprisingly,
1385 Rune lo, Rune hi, Regexp::ParseFlags parse_flags) {
1388 bool cutnl = !(parse_flags & Regexp::ClassNL) ||
1389 (parse_flags & Regexp::NeverNL);
1399 if (parse_flags & Regexp::FoldCase)
1439 Regexp::ParseFlags parse_flags) {
1448 if (parse_flags & Regexp::FoldCase) {
1458 bool cutnl = !(parse_flags & Regexp::ClassNL) ||
1459 (parse_flags & Regexp::NeverNL);
1489 UGroup* MaybeParsePerlCCEscape(StringPiece* s, Regexp::ParseFlags parse_flags) {
1490 if (!(parse_flags & Regexp::PerlClasses))
1512 ParseStatus ParseUnicodeGroup(StringPiece* s, Regexp::ParseFlags parse_flags,
1516 if (!(parse_flags & Regexp::UnicodeGroups))
1576 static ParseStatus ParseCCName(StringPiece* s, Regexp::ParseFlags parse_flags,
1611 // There are fewer special characters here than in the rest of the regexp.
1614 bool Regexp::ParseState::ParseCCCharacter(StringPiece* s, Rune *rp,
1637 bool Regexp::ParseState::ParseCCRange(StringPiece* s, RuneRange* rr,
1661 // Sets *out_re to the regexp for the class.
1662 bool Regexp::ParseState::ParseCharClass(StringPiece* s,
1663 Regexp** out_re,
1673 Regexp* re = new Regexp(kRegexpCharClass, flags_ & ~FoldCase);
1749 // Regexp::ClassNL is set. In an explicit range or singleton
1752 re->ccb_->AddRangeFlags(rr.lo, rr.hi, flags_ | Regexp::ClassNL);
1794 bool Regexp::ParseState::ParsePerlFlags(StringPiece* s) {
1806 // Check for named captures, first introduced in Python's regexp library.
1925 flags_ = static_cast<Regexp::ParseFlags>(nflags);
1951 // returning the corresponding Regexp tree.
1954 Regexp* Regexp::Parse(const StringPiece& s, ParseFlags global_flags,
1964 // Convert regexp to UTF-8 (easier on the rest of the parser).
2046 Regexp* re;
2123 if ((ps.flags() & Regexp::PerlB) &&
2131 if ((ps.flags() & Regexp::PerlX) && t.size() >= 2) {
2174 Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase);
2192 Regexp* re = new Regexp(kRegexpCharClass, ps.flags() & ~FoldCase);