Home | History | Annotate | Download | only in gn
      1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef TOOLS_GN_TOKENIZER_H_
      6 #define TOOLS_GN_TOKENIZER_H_
      7 
      8 #include <vector>
      9 
     10 #include "base/basictypes.h"
     11 #include "base/strings/string_piece.h"
     12 #include "base/strings/string_util.h"
     13 #include "tools/gn/err.h"
     14 #include "tools/gn/token.h"
     15 
     16 class InputFile;
     17 
     18 class Tokenizer {
     19  public:
     20   static std::vector<Token> Tokenize(const InputFile* input_file, Err* err);
     21 
     22   // Counts lines in the given buffer (the first line is "1") and returns
     23   // the byte offset of the beginning of that line, or (size_t)-1 if there
     24   // aren't that many lines in the file. Note that this will return the byte
     25   // one past the end of the input if the last character is a newline.
     26   //
     27   // This is a helper function for error output so that the tokenizer's
     28   // notion of lines can be used elsewhere.
     29   static size_t ByteOffsetOfNthLine(const base::StringPiece& buf, int n);
     30 
     31   // Returns true if the given offset of the string piece counts as a newline.
     32   // The offset must be in the buffer.
     33   static bool IsNewline(const base::StringPiece& buffer, size_t offset);
     34 
     35   static bool IsIdentifierFirstChar(char c) {
     36     return IsAsciiAlpha(c) || c == '_';
     37   }
     38 
     39   static bool IsIdentifierContinuingChar(char c) {
     40     // Also allow digits after the first char.
     41     return IsIdentifierFirstChar(c) || IsAsciiDigit(c);
     42   }
     43 
     44  private:
     45   // InputFile must outlive the tokenizer and all generated tokens.
     46   explicit Tokenizer(const InputFile* input_file, Err* err);
     47   ~Tokenizer();
     48 
     49   std::vector<Token> Run();
     50 
     51   void AdvanceToNextToken();
     52   Token::Type ClassifyCurrent() const;
     53   void AdvanceToEndOfToken(const Location& location, Token::Type type);
     54 
     55   // Whether from this location back to the beginning of the line is only
     56   // whitespace. |location| should be the first character of the token to be
     57   // checked.
     58   bool AtStartOfLine(size_t location) const;
     59 
     60   bool IsCurrentWhitespace() const;
     61   bool IsCurrentNewline() const;
     62   bool IsCurrentStringTerminator(char quote_char) const;
     63 
     64   bool CanIncrement() const { return cur_ < input_.size(); }
     65 
     66   // Increments the current location by one.
     67   void Advance();
     68 
     69   // Returns the current character in the file as a location.
     70   Location GetCurrentLocation() const;
     71 
     72   Err GetErrorForInvalidToken(const Location& location) const;
     73 
     74   bool done() const { return at_end() || has_error(); }
     75 
     76   bool at_end() const { return cur_ == input_.size(); }
     77   char cur_char() const { return input_[cur_]; }
     78 
     79   bool has_error() const { return err_->has_error(); }
     80 
     81   std::vector<Token> tokens_;
     82 
     83   const InputFile* input_file_;
     84   const base::StringPiece input_;
     85   Err* err_;
     86   size_t cur_;  // Byte offset into input buffer.
     87 
     88   int line_number_;
     89   int char_in_line_;
     90 
     91   DISALLOW_COPY_AND_ASSIGN(Tokenizer);
     92 };
     93 
     94 #endif  // TOOLS_GN_TOKENIZER_H_
     95