1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef TOOLS_GN_TOKENIZER_H_ 6 #define TOOLS_GN_TOKENIZER_H_ 7 8 #include <vector> 9 10 #include "base/basictypes.h" 11 #include "base/strings/string_piece.h" 12 #include "base/strings/string_util.h" 13 #include "tools/gn/err.h" 14 #include "tools/gn/token.h" 15 16 class InputFile; 17 18 class Tokenizer { 19 public: 20 static std::vector<Token> Tokenize(const InputFile* input_file, Err* err); 21 22 // Counts lines in the given buffer (the first line is "1") and returns 23 // the byte offset of the beginning of that line, or (size_t)-1 if there 24 // aren't that many lines in the file. Note that this will return the byte 25 // one past the end of the input if the last character is a newline. 26 // 27 // This is a helper function for error output so that the tokenizer's 28 // notion of lines can be used elsewhere. 29 static size_t ByteOffsetOfNthLine(const base::StringPiece& buf, int n); 30 31 // Returns true if the given offset of the string piece counts as a newline. 32 // The offset must be in the buffer. 33 static bool IsNewline(const base::StringPiece& buffer, size_t offset); 34 35 static bool IsIdentifierFirstChar(char c) { 36 return IsAsciiAlpha(c) || c == '_'; 37 } 38 39 static bool IsIdentifierContinuingChar(char c) { 40 // Also allow digits after the first char. 41 return IsIdentifierFirstChar(c) || IsAsciiDigit(c); 42 } 43 44 private: 45 // InputFile must outlive the tokenizer and all generated tokens. 46 explicit Tokenizer(const InputFile* input_file, Err* err); 47 ~Tokenizer(); 48 49 std::vector<Token> Run(); 50 51 void AdvanceToNextToken(); 52 Token::Type ClassifyCurrent() const; 53 void AdvanceToEndOfToken(const Location& location, Token::Type type); 54 55 // Whether from this location back to the beginning of the line is only 56 // whitespace. |location| should be the first character of the token to be 57 // checked. 58 bool AtStartOfLine(size_t location) const; 59 60 bool IsCurrentWhitespace() const; 61 bool IsCurrentNewline() const; 62 bool IsCurrentStringTerminator(char quote_char) const; 63 64 bool CanIncrement() const { return cur_ < input_.size(); } 65 66 // Increments the current location by one. 67 void Advance(); 68 69 // Returns the current character in the file as a location. 70 Location GetCurrentLocation() const; 71 72 Err GetErrorForInvalidToken(const Location& location) const; 73 74 bool done() const { return at_end() || has_error(); } 75 76 bool at_end() const { return cur_ == input_.size(); } 77 char cur_char() const { return input_[cur_]; } 78 79 bool has_error() const { return err_->has_error(); } 80 81 std::vector<Token> tokens_; 82 83 const InputFile* input_file_; 84 const base::StringPiece input_; 85 Err* err_; 86 size_t cur_; // Byte offset into input buffer. 87 88 int line_number_; 89 int char_in_line_; 90 91 DISALLOW_COPY_AND_ASSIGN(Tokenizer); 92 }; 93 94 #endif // TOOLS_GN_TOKENIZER_H_ 95