Home | History | Annotate | Download | only in re2
      1 // Copyright 2006 The RE2 Authors.  All Rights Reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // --- SPONSORED LINK --------------------------------------------------
      6 // If you want to use this library for regular expression matching,
      7 // you should use re2/re2.h, which provides a class RE2 that
      8 // mimics the PCRE interface provided by PCRE's C++ wrappers.
      9 // This header describes the low-level interface used to implement RE2
     10 // and may change in backwards-incompatible ways from time to time.
     11 // In contrast, RE2's interface will not.
     12 // ---------------------------------------------------------------------
     13 
     14 // Regular expression library: parsing, execution, and manipulation
     15 // of regular expressions.
     16 //
     17 // Any operation that traverses the Regexp structures should be written
     18 // using Regexp::Walker (see walker-inl.h), not recursively, because deeply nested
     19 // regular expressions such as x++++++++++++++++++++... might cause recursive
     20 // traversals to overflow the stack.
     21 //
     22 // It is the caller's responsibility to provide appropriate mutual exclusion
     23 // around manipulation of the regexps.  RE2 does this.
     24 //
     25 // PARSING
     26 //
     27 // Regexp::Parse parses regular expressions encoded in UTF-8.
     28 // The default syntax is POSIX extended regular expressions,
     29 // with the following changes:
     30 //
     31 //   1.  Backreferences (optional in POSIX EREs) are not supported.
     32 //         (Supporting them precludes the use of DFA-based
     33 //          matching engines.)
     34 //
     35 //   2.  Collating elements and collation classes are not supported.
     36 //         (No one has needed or wanted them.)
     37 //
     38 // The exact syntax accepted can be modified by passing flags to
     39 // Regexp::Parse.  In particular, many of the basic Perl additions
     40 // are available.  The flags are documented below (search for LikePerl).
     41 //
     42 // If parsed with the flag Regexp::Latin1, both the regular expression
     43 // and the input to the matching routines are assumed to be encoded in
     44 // Latin-1, not UTF-8.
     45 //
     46 // EXECUTION
     47 //
     48 // Once Regexp has parsed a regular expression, it provides methods
     49 // to search text using that regular expression.  These methods are
     50 // implemented via calling out to other regular expression libraries.
     51 // (Let's call them the sublibraries.)
     52 //
     53 // To call a sublibrary, Regexp does not simply prepare a
     54 // string version of the regular expression and hand it to the
     55 // sublibrary.  Instead, Regexp prepares, from its own parsed form, the
     56 // corresponding internal representation used by the sublibrary.
     57 // This has the drawback of needing to know the internal representation
     58 // used by the sublibrary, but it has two important benefits:
     59 //
     60 //   1. The syntax and meaning of regular expressions is guaranteed
     61 //      to be that used by Regexp's parser, not the syntax expected
     62 //      by the sublibrary.  Regexp might accept a restricted or
     63 //      expanded syntax for regular expressions as compared with
     64 //      the sublibrary.  As long as Regexp can translate from its
     65 //      internal form into the sublibrary's, clients need not know
     66 //      exactly which sublibrary they are using.
     67 //
     68 //   2. The sublibrary parsers are bypassed.  For whatever reason,
     69 //      sublibrary regular expression parsers often have security
     70 //      problems.  For example, plan9grep's regular expression parser
     71 //      has a buffer overflow in its handling of large character
     72 //      classes, and PCRE's parser has had buffer overflow problems
     73 //      in the past.  Security-team requires sandboxing of sublibrary
     74 //      regular expression parsers.  Avoiding the sublibrary parsers
     75 //      avoids the sandbox.
     76 //
     77 // The execution methods we use now are provided by the compiled form,
     78 // Prog, described in prog.h
     79 //
     80 // MANIPULATION
     81 //
     82 // Unlike other regular expression libraries, Regexp makes its parsed
     83 // form accessible to clients, so that client code can analyze the
     84 // parsed regular expressions.
     85 
     86 #ifndef RE2_REGEXP_H__
     87 #define RE2_REGEXP_H__
     88 
     89 #include "util/util.h"
     90 #include "re2/stringpiece.h"
     91 
     92 namespace re2 {
     93 
     94 // Keep in sync with string list kOpcodeNames[] in testing/dump.cc
     95 enum RegexpOp {
     96   // Matches no strings.
     97   kRegexpNoMatch = 1,
     98 
     99   // Matches empty string.
    100   kRegexpEmptyMatch,
    101 
    102   // Matches rune_.
    103   kRegexpLiteral,
    104 
    105   // Matches runes_.
    106   kRegexpLiteralString,
    107 
    108   // Matches concatenation of sub_[0..nsub-1].
    109   kRegexpConcat,
    110   // Matches union of sub_[0..nsub-1].
    111   kRegexpAlternate,
    112 
    113   // Matches sub_[0] zero or more times.
    114   kRegexpStar,
    115   // Matches sub_[0] one or more times.
    116   kRegexpPlus,
    117   // Matches sub_[0] zero or one times.
    118   kRegexpQuest,
    119 
    120   // Matches sub_[0] at least min_ times, at most max_ times.
    121   // max_ == -1 means no upper limit.
    122   kRegexpRepeat,
    123 
    124   // Parenthesized (capturing) subexpression.  Index is cap_.
    125   // Optionally, capturing name is name_.
    126   kRegexpCapture,
    127 
    128   // Matches any character.
    129   kRegexpAnyChar,
    130 
    131   // Matches any byte [sic].
    132   kRegexpAnyByte,
    133 
    134   // Matches empty string at beginning of line.
    135   kRegexpBeginLine,
    136   // Matches empty string at end of line.
    137   kRegexpEndLine,
    138 
    139   // Matches word boundary "\b".
    140   kRegexpWordBoundary,
    141   // Matches not-a-word boundary "\B".
    142   kRegexpNoWordBoundary,
    143 
    144   // Matches empty string at beginning of text.
    145   kRegexpBeginText,
    146   // Matches empty string at end of text.
    147   kRegexpEndText,
    148 
    149   // Matches character class given by cc_.
    150   kRegexpCharClass,
    151 
    152   // Forces match of entire expression right now,
    153   // with match ID match_id_ (used by RE2::Set).
    154   kRegexpHaveMatch,
    155 
    156   kMaxRegexpOp = kRegexpHaveMatch,
    157 };
    158 
    159 // Keep in sync with string list in regexp.cc
    160 enum RegexpStatusCode {
    161   // No error
    162   kRegexpSuccess = 0,
    163 
    164   // Unexpected error
    165   kRegexpInternalError,
    166 
    167   // Parse errors
    168   kRegexpBadEscape,          // bad escape sequence
    169   kRegexpBadCharClass,       // bad character class
    170   kRegexpBadCharRange,       // bad character class range
    171   kRegexpMissingBracket,     // missing closing ]
    172   kRegexpMissingParen,       // missing closing )
    173   kRegexpTrailingBackslash,  // at end of regexp
    174   kRegexpRepeatArgument,     // repeat argument missing, e.g. "*"
    175   kRegexpRepeatSize,         // bad repetition argument
    176   kRegexpRepeatOp,           // bad repetition operator
    177   kRegexpBadPerlOp,          // bad perl operator
    178   kRegexpBadUTF8,            // invalid UTF-8 in regexp
    179   kRegexpBadNamedCapture,    // bad named capture
    180 };
    181 
    182 // Error status for certain operations.
    183 class RegexpStatus {
    184  public:
    185   RegexpStatus() : code_(kRegexpSuccess), tmp_(NULL) {}
    186   ~RegexpStatus() { delete tmp_; }
    187 
    188   void set_code(enum RegexpStatusCode code) { code_ = code; }
    189   void set_error_arg(const StringPiece& error_arg) { error_arg_ = error_arg; }
    190   void set_tmp(string* tmp) { delete tmp_; tmp_ = tmp; }
    191   enum RegexpStatusCode code() const { return code_; }
    192   const StringPiece& error_arg() const { return error_arg_; }
    193   bool ok() const { return code() == kRegexpSuccess; }
    194 
    195   // Copies state from status.
    196   void Copy(const RegexpStatus& status);
    197 
    198   // Returns text equivalent of code, e.g.:
    199   //   "Bad character class"
    200   static const string& CodeText(enum RegexpStatusCode code);
    201 
    202   // Returns text describing error, e.g.:
    203   //   "Bad character class: [z-a]"
    204   string Text() const;
    205 
    206  private:
    207   enum RegexpStatusCode code_;  // Kind of error
    208   StringPiece error_arg_;       // Piece of regexp containing syntax error.
    209   string* tmp_;                 // Temporary storage, possibly where error_arg_ is.
    210 
    211   DISALLOW_EVIL_CONSTRUCTORS(RegexpStatus);
    212 };
    213 
    214 // Walker to implement Simplify.
    215 class SimplifyWalker;
    216 
    217 // Compiled form; see prog.h
    218 class Prog;
    219 
    220 struct RuneRange {
    221   RuneRange() : lo(0), hi(0) { }
    222   RuneRange(int l, int h) : lo(l), hi(h) { }
    223   Rune lo;
    224   Rune hi;
    225 };
    226 
    227 // Less-than on RuneRanges treats a == b if they overlap at all.
    228 // This lets us look in a set to find the range covering a particular Rune.
    229 struct RuneRangeLess {
    230   bool operator()(const RuneRange& a, const RuneRange& b) const {
    231     return a.hi < b.lo;
    232   }
    233 };
    234 
    235 class CharClassBuilder;
    236 
    237 class CharClass {
    238  public:
    239   void Delete();
    240 
    241   typedef RuneRange* iterator;
    242   iterator begin() { return ranges_; }
    243   iterator end() { return ranges_ + nranges_; }
    244 
    245   int size() { return nrunes_; }
    246   bool empty() { return nrunes_ == 0; }
    247   bool full() { return nrunes_ == Runemax+1; }
    248   bool FoldsASCII() { return folds_ascii_; }
    249 
    250   bool Contains(Rune r);
    251   CharClass* Negate();
    252 
    253  private:
    254   CharClass();  // not implemented
    255   ~CharClass();  // not implemented
    256   static CharClass* New(int maxranges);
    257 
    258   friend class CharClassBuilder;
    259 
    260   bool folds_ascii_;
    261   int nrunes_;
    262   RuneRange *ranges_;
    263   int nranges_;
    264   DISALLOW_EVIL_CONSTRUCTORS(CharClass);
    265 };
    266 
    267 class Regexp {
    268  public:
    269 
    270   // Flags for parsing.  Can be ORed together.
    271   enum ParseFlags {
    272     NoParseFlags = 0,
    273     FoldCase     = 1<<0,   // Fold case during matching (case-insensitive).
    274     Literal      = 1<<1,   // Treat s as literal string instead of a regexp.
    275     ClassNL      = 1<<2,   // Allow char classes like [^a-z] and \D and \s
    276                            // and [[:space:]] to match newline.
    277     DotNL        = 1<<3,   // Allow . to match newline.
    278     MatchNL      = ClassNL | DotNL,
    279     OneLine      = 1<<4,   // Treat ^ and $ as only matching at beginning and
    280                            // end of text, not around embedded newlines.
    281                            // (Perl's default)
    282     Latin1       = 1<<5,   // Regexp and text are in Latin1, not UTF-8.
    283     NonGreedy    = 1<<6,   // Repetition operators are non-greedy by default.
    284     PerlClasses  = 1<<7,   // Allow Perl character classes like \d.
    285     PerlB        = 1<<8,   // Allow Perl's \b and \B.
    286     PerlX        = 1<<9,   // Perl extensions:
    287                            //   non-capturing parens - (?: )
    288                            //   non-greedy operators - *? +? ?? {}?
    289                            //   flag edits - (?i) (?-i) (?i: )
    290                            //     i - FoldCase
    291                            //     m - !OneLine
    292                            //     s - DotNL
    293                            //     U - NonGreedy
    294                            //   line ends: \A \z
    295                            //   \Q and \E to disable/enable metacharacters
    296                            //   (?P<name>expr) for named captures
    297                            //   \C to match any single byte
    298     UnicodeGroups = 1<<10, // Allow \p{Han} for Unicode Han group
    299                            //   and \P{Han} for its negation.
    300     NeverNL      = 1<<11,  // Never match NL, even if the regexp mentions
    301                            //   it explicitly.
    302 
    303     // As close to Perl as we can get.
    304     LikePerl     = ClassNL | OneLine | PerlClasses | PerlB | PerlX |
    305                    UnicodeGroups,
    306 
    307     // Internal use only.
    308     WasDollar    = 1<<15,  // on kRegexpEndText: was $ in regexp text
    309   };
    310 
    311   // Get.  No set, Regexps are logically immutable once created.
    312   RegexpOp op() { return static_cast<RegexpOp>(op_); }
    313   int nsub() { return nsub_; }
    314   bool simple() { return simple_; }
    315   enum ParseFlags parse_flags() { return static_cast<ParseFlags>(parse_flags_); }
    316   int Ref();  // For testing.
    317 
    318   Regexp** sub() {
    319     if(nsub_ <= 1)
    320       return &subone_;
    321     else
    322       return submany_;
    323   }
    324 
    325   int min() { DCHECK_EQ(op_, kRegexpRepeat); return min_; }
    326   int max() { DCHECK_EQ(op_, kRegexpRepeat); return max_; }
    327   Rune rune() { DCHECK_EQ(op_, kRegexpLiteral); return rune_; }
    328   CharClass* cc() { DCHECK_EQ(op_, kRegexpCharClass); return cc_; }
    329   int cap() { DCHECK_EQ(op_, kRegexpCapture); return cap_; }
    330   const string* name() { DCHECK_EQ(op_, kRegexpCapture); return name_; }
    331   Rune* runes() { DCHECK_EQ(op_, kRegexpLiteralString); return runes_; }
    332   int nrunes() { DCHECK_EQ(op_, kRegexpLiteralString); return nrunes_; }
    333   int match_id() { DCHECK_EQ(op_, kRegexpHaveMatch); return match_id_; }
    334 
    335   // Increments reference count, returns object as convenience.
    336   Regexp* Incref();
    337 
    338   // Decrements reference count and deletes this object if count reaches 0.
    339   void Decref();
    340 
    341   // Parses string s to produce regular expression, returned.
    342   // Caller must release return value with re->Decref().
    343   // On failure, sets *status (if status != NULL) and returns NULL.
    344   static Regexp* Parse(const StringPiece& s, ParseFlags flags,
    345                        RegexpStatus* status);
    346 
    347   // Returns a _new_ simplified version of the current regexp.
    348   // Does not edit the current regexp.
    349   // Caller must release return value with re->Decref().
    350   // Simplified means that counted repetition has been rewritten
    351   // into simpler terms and all Perl/POSIX features have been
    352   // removed.  The result will capture exactly the same
    353   // subexpressions the original did, unless formatted with ToString.
    354   Regexp* Simplify();
    355   friend class SimplifyWalker;
    356 
    357   // Parses the regexp src and then simplifies it and sets *dst to the
    358   // string representation of the simplified form.  Returns true on success.
    359   // Returns false and sets *status (if status != NULL) on parse error.
    360   static bool SimplifyRegexp(const StringPiece& src, ParseFlags flags,
    361                              string* dst,
    362                              RegexpStatus* status);
    363 
    364   // Returns the number of capturing groups in the regexp.
    365   int NumCaptures();
    366   friend class NumCapturesWalker;
    367 
    368   // Returns a map from names to capturing group indices,
    369   // or NULL if the regexp contains no named capture groups.
    370   // The caller is responsible for deleting the map.
    371   map<string, int>* NamedCaptures();
    372 
    373   // Returns a map from capturing group indices to capturing group
    374   // names or NULL if the regexp contains no named capture groups. The
    375   // caller is responsible for deleting the map.
    376   map<int, string>* CaptureNames();
    377 
    378   // Returns a string representation of the current regexp,
    379   // using as few parentheses as possible.
    380   string ToString();
    381 
    382   // Convenience functions.  They consume the passed reference,
    383   // so in many cases you should use, e.g., Plus(re->Incref(), flags).
    384   // They do not consume allocated arrays like subs or runes.
    385   static Regexp* Plus(Regexp* sub, ParseFlags flags);
    386   static Regexp* Star(Regexp* sub, ParseFlags flags);
    387   static Regexp* Quest(Regexp* sub, ParseFlags flags);
    388   static Regexp* Concat(Regexp** subs, int nsubs, ParseFlags flags);
    389   static Regexp* Alternate(Regexp** subs, int nsubs, ParseFlags flags);
    390   static Regexp* Capture(Regexp* sub, ParseFlags flags, int cap);
    391   static Regexp* Repeat(Regexp* sub, ParseFlags flags, int min, int max);
    392   static Regexp* NewLiteral(Rune rune, ParseFlags flags);
    393   static Regexp* NewCharClass(CharClass* cc, ParseFlags flags);
    394   static Regexp* LiteralString(Rune* runes, int nrunes, ParseFlags flags);
    395   static Regexp* HaveMatch(int match_id, ParseFlags flags);
    396 
    397   // Like Alternate but does not factor out common prefixes.
    398   static Regexp* AlternateNoFactor(Regexp** subs, int nsubs, ParseFlags flags);
    399 
    400   // Debugging function.  Returns string format for regexp
    401   // that makes structure clear.  Does NOT use regexp syntax.
    402   string Dump();
    403 
    404   // Helper traversal class, defined fully in walker-inl.h.
    405   template<typename T> class Walker;
    406 
    407   // Compile to Prog.  See prog.h
    408   // Reverse prog expects to be run over text backward.
    409   // Construction and execution of prog will
    410   // stay within approximately max_mem bytes of memory.
    411   // If max_mem <= 0, a reasonable default is used.
    412   Prog* CompileToProg(int64 max_mem);
    413   Prog* CompileToReverseProg(int64 max_mem);
    414 
    415   // Whether to expect this library to find exactly the same answer as PCRE
    416   // when running this regexp.  Most regexps do mimic PCRE exactly, but a few
    417   // obscure cases behave differently.  Technically this is more a property
    418   // of the Prog than the Regexp, but the computation is much easier to do
    419   // on the Regexp.  See mimics_pcre.cc for the exact conditions.
    420   bool MimicsPCRE();
    421 
    422   // Benchmarking function.
    423   void NullWalk();
    424 
    425   // Whether every match of this regexp must be anchored and
    426   // begin with a non-empty fixed string (perhaps after ASCII
    427   // case-folding).  If so, returns the prefix and the sub-regexp that
    428   // follows it.
    429   bool RequiredPrefix(string* prefix, bool *foldcase, Regexp** suffix);
    430 
    431  private:
    432   // Constructor allocates vectors as appropriate for operator.
    433   explicit Regexp(RegexpOp op, ParseFlags parse_flags);
    434 
    435   // Use Decref() instead of delete to release Regexps.
    436   // This is private to catch deletes at compile time.
    437   ~Regexp();
    438   void Destroy();
    439   bool QuickDestroy();
    440 
    441   // Helpers for Parse.  Listed here so they can edit Regexps.
    442   class ParseState;
    443   friend class ParseState;
    444   friend bool ParseCharClass(StringPiece* s, Regexp** out_re,
    445                              RegexpStatus* status);
    446 
    447   // Helper for testing [sic].
    448   friend bool RegexpEqualTestingOnly(Regexp*, Regexp*);
    449 
    450   // Computes whether Regexp is already simple.
    451   bool ComputeSimple();
    452 
    453   // Constructor that generates a concatenation or alternation,
    454   // enforcing the limit on the number of subexpressions for
    455   // a particular Regexp.
    456   static Regexp* ConcatOrAlternate(RegexpOp op, Regexp** subs, int nsubs,
    457                                    ParseFlags flags, bool can_factor);
    458 
    459   // Returns the leading string that re starts with.
    460   // The returned Rune* points into a piece of re,
    461   // so it must not be used after the caller calls re->Decref().
    462   static Rune* LeadingString(Regexp* re, int* nrune, ParseFlags* flags);
    463 
    464   // Removes the first n leading runes from the beginning of re.
    465   // Edits re in place.
    466   static void RemoveLeadingString(Regexp* re, int n);
    467 
    468   // Returns the leading regexp in re's top-level concatenation.
    469   // The returned Regexp* points at re or a sub-expression of re,
    470   // so it must not be used after the caller calls re->Decref().
    471   static Regexp* LeadingRegexp(Regexp* re);
    472 
    473   // Removes LeadingRegexp(re) from re and returns the remainder.
    474   // Might edit re in place.
    475   static Regexp* RemoveLeadingRegexp(Regexp* re);
    476 
    477   // Simplifies an alternation of literal strings by factoring out
    478   // common prefixes.
    479   static int FactorAlternation(Regexp** sub, int nsub, ParseFlags flags);
    480   static int FactorAlternationRecursive(Regexp** sub, int nsub,
    481                                         ParseFlags flags, int maxdepth);
    482 
    483   // Is a == b?  Only efficient on regexps that have not been through
    484   // Simplify yet - the expansion of a kRegexpRepeat will make this
    485   // take a long time.  Do not call on such regexps, hence private.
    486   static bool Equal(Regexp* a, Regexp* b);
    487 
    488   // Allocate space for n sub-regexps.
    489   void AllocSub(int n) {
    490     if (n < 0 || static_cast<uint16>(n) != n)
    491       LOG(FATAL) << "Cannot AllocSub " << n;
    492     if (n > 1)
    493       submany_ = new Regexp*[n];
    494     nsub_ = n;
    495   }
    496 
    497   // Add Rune to LiteralString
    498   void AddRuneToString(Rune r);
    499 
    500   // Swaps this with that, in place.
    501   void Swap(Regexp *that);
    502 
    503   // Operator.  See description of operators above.
    504   // uint8 instead of RegexpOp to control space usage.
    505   uint8 op_;
    506 
    507   // Is this regexp structure already simple
    508   // (has it been returned by Simplify)?
    509   // uint8 instead of bool to control space usage.
    510   uint8 simple_;
    511 
    512   // Flags saved from parsing and used during execution.
    513   // (Only FoldCase is used.)
    514   // uint16 instead of ParseFlags to control space usage.
    515   uint16 parse_flags_;
    516 
    517   // Reference count.  Exists so that SimplifyRegexp can build
    518   // regexp structures that are dags rather than trees to avoid
    519   // exponential blowup in space requirements.
    520   // uint16 to control space usage.
    521   // The standard regexp routines will never generate a
    522   // ref greater than the maximum repeat count (100),
    523   // but even so, Incref and Decref consult an overflow map
    524   // when ref_ reaches kMaxRef.
    525   uint16 ref_;
    526   static const uint16 kMaxRef = 0xffff;
    527 
    528   // Subexpressions.
    529   // uint16 to control space usage.
    530   // Concat and Alternate handle larger numbers of subexpressions
    531   // by building concatenation or alternation trees.
    532   // Other routines should call Concat or Alternate instead of
    533   // filling in sub() by hand.
    534   uint16 nsub_;
    535   static const uint16 kMaxNsub = 0xffff;
    536   union {
    537     Regexp** submany_;  // if nsub_ > 1
    538     Regexp* subone_;  // if nsub_ == 1
    539   };
    540 
    541   // Extra space for parse and teardown stacks.
    542   Regexp* down_;
    543 
    544   // Arguments to operator.  See description of operators above.
    545   union {
    546     struct {  // Repeat
    547       int max_;
    548       int min_;
    549     };
    550     struct {  // Capture
    551       int cap_;
    552       string* name_;
    553     };
    554     struct {  // LiteralString
    555       int nrunes_;
    556       Rune* runes_;
    557     };
    558     struct {  // CharClass
    559       // These two could be in separate union members,
    560       // but it wouldn't save any space (there are other two-word structs)
    561       // and keeping them separate avoids confusion during parsing.
    562       CharClass* cc_;
    563       CharClassBuilder* ccb_;
    564     };
    565     Rune rune_;  // Literal
    566     int match_id_;  // HaveMatch
    567     void *the_union_[2];  // as big as any other element, for memset
    568   };
    569 
    570   DISALLOW_EVIL_CONSTRUCTORS(Regexp);
    571 };
    572 
    573 // Character class set: contains non-overlapping, non-abutting RuneRanges.
    574 typedef set<RuneRange, RuneRangeLess> RuneRangeSet;
    575 
    576 class CharClassBuilder {
    577  public:
    578   CharClassBuilder();
    579 
    580   typedef RuneRangeSet::iterator iterator;
    581   iterator begin() { return ranges_.begin(); }
    582   iterator end() { return ranges_.end(); }
    583 
    584   int size() { return nrunes_; }
    585   bool empty() { return nrunes_ == 0; }
    586   bool full() { return nrunes_ == Runemax+1; }
    587 
    588   bool Contains(Rune r);
    589   bool FoldsASCII();
    590   bool AddRange(Rune lo, Rune hi);  // returns whether class changed
    591   CharClassBuilder* Copy();
    592   void AddCharClass(CharClassBuilder* cc);
    593   void Negate();
    594   void RemoveAbove(Rune r);
    595   CharClass* GetCharClass();
    596   void AddRangeFlags(Rune lo, Rune hi, Regexp::ParseFlags parse_flags);
    597 
    598  private:
    599   static const uint32 AlphaMask = (1<<26) - 1;
    600   uint32 upper_;  // bitmap of A-Z
    601   uint32 lower_;  // bitmap of a-z
    602   int nrunes_;
    603   RuneRangeSet ranges_;
    604   DISALLOW_EVIL_CONSTRUCTORS(CharClassBuilder);
    605 };
    606 
    607 // Tell g++ that bitwise ops on ParseFlags produce ParseFlags.
    608 inline Regexp::ParseFlags operator|(Regexp::ParseFlags a, Regexp::ParseFlags b)
    609 {
    610   return static_cast<Regexp::ParseFlags>(static_cast<int>(a) | static_cast<int>(b));
    611 }
    612 
    613 inline Regexp::ParseFlags operator^(Regexp::ParseFlags a, Regexp::ParseFlags b)
    614 {
    615   return static_cast<Regexp::ParseFlags>(static_cast<int>(a) ^ static_cast<int>(b));
    616 }
    617 
    618 inline Regexp::ParseFlags operator&(Regexp::ParseFlags a, Regexp::ParseFlags b)
    619 {
    620   return static_cast<Regexp::ParseFlags>(static_cast<int>(a) & static_cast<int>(b));
    621 }
    622 
    623 inline Regexp::ParseFlags operator~(Regexp::ParseFlags a)
    624 {
    625   return static_cast<Regexp::ParseFlags>(~static_cast<int>(a));
    626 }
    627 
    628 
    629 
    630 }  // namespace re2
    631 
    632 #endif  // RE2_REGEXP_H__
    633