Home | History | Annotate | Download | only in lex
      1 /*
      2  * Copyright 2017 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #ifndef SKSL_REGEXPARSER
      9 #define SKSL_REGEXPARSER
     10 
     11 #include "RegexNode.h"
     12 
     13 #include <stack>
     14 #include <string>
     15 
     16 /**
     17  * Turns a simple regular expression into a parse tree. The regular expression syntax supports only
     18  * the basic quantifiers ('*', '+', and '?'), alternation ('|'), character sets ('[a-z]'), and
     19  * groups ('()').
     20  */
     21 class RegexParser {
     22 public:
     23     RegexNode parse(std::string source);
     24 
     25 private:
     26     static constexpr char END = '\0';
     27 
     28     char peek();
     29 
     30     void expect(char c);
     31 
     32     RegexNode pop();
     33 
     34     /**
     35      * Matches a char literal, parenthesized group, character set, or dot ('.').
     36      */
     37     void term();
     38 
     39     /**
     40      * Matches a term followed by an optional quantifier ('*', '+', or '?').
     41      */
     42     void quantifiedTerm();
     43 
     44     /**
     45      * Matches a sequence of quantifiedTerms.
     46      */
     47     void sequence();
     48 
     49     /**
     50      * Returns a node representing the given escape character (e.g. escapeSequence('n') returns a
     51      * node which matches a newline character).
     52      */
     53     RegexNode escapeSequence(char c);
     54 
     55     /**
     56      * Matches a literal character or escape sequence.
     57      */
     58     void literal();
     59 
     60     /**
     61      * Matches a dot ('.').
     62      */
     63     void dot();
     64 
     65     /**
     66      * Matches a parenthesized group.
     67      */
     68     void group();
     69 
     70     /**
     71      * Matches a literal character, escape sequence, or character range from a character set.
     72      */
     73     void setItem();
     74 
     75     /**
     76      * Matches a character set.
     77      */
     78     void set();
     79 
     80     void regex();
     81 
     82     std::string fSource;
     83 
     84     size_t fIndex;
     85 
     86     std::stack<RegexNode> fStack;
     87 };
     88 
     89 #endif
     90