1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef TOOLS_GN_TOKENIZER_H_ 6 #define TOOLS_GN_TOKENIZER_H_ 7 8 #include <vector> 9 10 #include "base/basictypes.h" 11 #include "base/strings/string_piece.h" 12 #include "tools/gn/err.h" 13 #include "tools/gn/token.h" 14 15 class InputFile; 16 17 class Tokenizer { 18 public: 19 static std::vector<Token> Tokenize(const InputFile* input_file, Err* err); 20 21 // Counts lines in the given buffer (the first line is "1") and returns 22 // the byte offset of the beginning of that line, or (size_t)-1 if there 23 // aren't that many lines in the file. Note that this will return the byte 24 // one past the end of the input if the last character is a newline. 25 // 26 // This is a helper function for error output so that the tokenizer's 27 // notion of lines can be used elsewhere. 28 static size_t ByteOffsetOfNthLine(const base::StringPiece& buf, int n); 29 30 // Returns true if the given offset of the string piece counts as a newline. 31 // The offset must be in the buffer. 32 static bool IsNewline(const base::StringPiece& buffer, size_t offset); 33 34 static bool IsIdentifierFirstChar(char c) { 35 return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'; 36 } 37 38 static bool IsIdentifierContinuingChar(char c) { 39 // Also allow digits after the first char. 40 return IsIdentifierFirstChar(c) || (c >= '0' && c <= '9'); 41 } 42 43 private: 44 // InputFile must outlive the tokenizer and all generated tokens. 45 explicit Tokenizer(const InputFile* input_file, Err* err); 46 ~Tokenizer(); 47 48 std::vector<Token> Run(); 49 50 void AdvanceToNextToken(); 51 Token::Type ClassifyCurrent() const; 52 void AdvanceToEndOfToken(const Location& location, Token::Type type); 53 54 bool IsCurrentWhitespace() const; 55 bool IsCurrentNewline() const; 56 bool IsCurrentStringTerminator(char quote_char) const; 57 58 bool CanIncrement() const { return cur_ < input_.size(); } 59 60 // Increments the current location by one. 61 void Advance(); 62 63 // Returns the current character in the file as a location. 64 Location GetCurrentLocation() const; 65 66 Err GetErrorForInvalidToken(const Location& location) const; 67 68 bool done() const { return at_end() || has_error(); } 69 70 bool at_end() const { return cur_ == input_.size(); } 71 char cur_char() const { return input_[cur_]; } 72 73 bool has_error() const { return err_->has_error(); } 74 75 std::vector<Token> tokens_; 76 77 const InputFile* input_file_; 78 const base::StringPiece input_; 79 Err* err_; 80 size_t cur_; // Byte offset into input buffer. 81 82 int line_number_; 83 int char_in_line_; 84 85 DISALLOW_COPY_AND_ASSIGN(Tokenizer); 86 }; 87 88 #endif // TOOLS_GN_TOKENIZER_H_ 89