Home | History | Annotate | Download | only in parsing
      1 // Copyright 2011 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Features shared by parsing and pre-parsing scanners.
      6 
      7 #ifndef V8_PARSING_SCANNER_H_
      8 #define V8_PARSING_SCANNER_H_
      9 
     10 #include "src/allocation.h"
     11 #include "src/base/logging.h"
     12 #include "src/char-predicates.h"
     13 #include "src/globals.h"
     14 #include "src/messages.h"
     15 #include "src/parsing/token.h"
     16 #include "src/unicode-decoder.h"
     17 #include "src/unicode.h"
     18 
     19 namespace v8 {
     20 namespace internal {
     21 
     22 
     23 class AstRawString;
     24 class AstValueFactory;
     25 class DuplicateFinder;
     26 class ExternalOneByteString;
     27 class ExternalTwoByteString;
     28 class ParserRecorder;
     29 class UnicodeCache;
     30 
     31 // ---------------------------------------------------------------------
     32 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
     33 // A code unit is a 16 bit value representing either a 16 bit code point
     34 // or one part of a surrogate pair that make a single 21 bit code point.
     35 class Utf16CharacterStream {
     36  public:
     37   static const uc32 kEndOfInput = -1;
     38 
     39   virtual ~Utf16CharacterStream() { }
     40 
     41   // Returns and advances past the next UTF-16 code unit in the input
     42   // stream. If there are no more code units it returns kEndOfInput.
     43   inline uc32 Advance() {
     44     if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
     45       return static_cast<uc32>(*(buffer_cursor_++));
     46     } else if (ReadBlock()) {
     47       return static_cast<uc32>(*(buffer_cursor_++));
     48     } else {
     49       // Note: currently the following increment is necessary to avoid a
     50       // parser problem! The scanner treats the final kEndOfInput as
     51       // a code unit with a position, and does math relative to that
     52       // position.
     53       buffer_cursor_++;
     54       return kEndOfInput;
     55     }
     56   }
     57 
     58   // Go back one by one character in the input stream.
     59   // This undoes the most recent Advance().
     60   inline void Back() {
     61     // The common case - if the previous character is within
     62     // buffer_start_ .. buffer_end_ will be handles locally.
     63     // Otherwise, a new block is requested.
     64     if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
     65       buffer_cursor_--;
     66     } else {
     67       ReadBlockAt(pos() - 1);
     68     }
     69   }
     70 
     71   // Go back one by two characters in the input stream. (This is the same as
     72   // calling Back() twice. But Back() may - in some instances - do substantial
     73   // work. Back2() guarantees this work will be done only once.)
     74   inline void Back2() {
     75     if (V8_LIKELY(buffer_cursor_ - 2 >= buffer_start_)) {
     76       buffer_cursor_ -= 2;
     77     } else {
     78       ReadBlockAt(pos() - 2);
     79     }
     80   }
     81 
     82   inline size_t pos() const {
     83     return buffer_pos_ + (buffer_cursor_ - buffer_start_);
     84   }
     85 
     86   inline void Seek(size_t pos) {
     87     if (V8_LIKELY(pos >= buffer_pos_ &&
     88                   pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
     89       buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
     90     } else {
     91       ReadBlockAt(pos);
     92     }
     93   }
     94 
     95  protected:
     96   Utf16CharacterStream(const uint16_t* buffer_start,
     97                        const uint16_t* buffer_cursor,
     98                        const uint16_t* buffer_end, size_t buffer_pos)
     99       : buffer_start_(buffer_start),
    100         buffer_cursor_(buffer_cursor),
    101         buffer_end_(buffer_end),
    102         buffer_pos_(buffer_pos) {}
    103   Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {}
    104 
    105   void ReadBlockAt(size_t new_pos) {
    106     // The callers of this method (Back/Back2/Seek) should handle the easy
    107     // case (seeking within the current buffer), and we should only get here
    108     // if we actually require new data.
    109     // (This is really an efficiency check, not a correctness invariant.)
    110     DCHECK(new_pos < buffer_pos_ ||
    111            new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
    112 
    113     // Change pos() to point to new_pos.
    114     buffer_pos_ = new_pos;
    115     buffer_cursor_ = buffer_start_;
    116     bool success = ReadBlock();
    117     USE(success);
    118 
    119     // Post-conditions: 1, on success, we should be at the right position.
    120     //                  2, success == we should have more characters available.
    121     DCHECK_IMPLIES(success, pos() == new_pos);
    122     DCHECK_EQ(success, buffer_cursor_ < buffer_end_);
    123     DCHECK_EQ(success, buffer_start_ < buffer_end_);
    124   }
    125 
    126   // Read more data, and update buffer_*_ to point to it.
    127   // Returns true if more data was available.
    128   //
    129   // ReadBlock() may modify any of the buffer_*_ members, but must sure that
    130   // the result of pos() remains unaffected.
    131   //
    132   // Examples:
    133   // - a stream could either fill a separate buffer. Then buffer_start_ and
    134   //   buffer_cursor_ would point to the beginning of the buffer, and
    135   //   buffer_pos would be the old pos().
    136   // - a stream with existing buffer chunks would set buffer_start_ and
    137   //   buffer_end_ to cover the full chunk, and then buffer_cursor_ would
    138   //   point into the middle of the buffer, while buffer_pos_ would describe
    139   //   the start of the buffer.
    140   virtual bool ReadBlock() = 0;
    141 
    142   const uint16_t* buffer_start_;
    143   const uint16_t* buffer_cursor_;
    144   const uint16_t* buffer_end_;
    145   size_t buffer_pos_;
    146 };
    147 
    148 
    149 // ----------------------------------------------------------------------------
    150 // JavaScript Scanner.
    151 
    152 class Scanner {
    153  public:
    154   // Scoped helper for a re-settable bookmark.
    155   class BookmarkScope {
    156    public:
    157     explicit BookmarkScope(Scanner* scanner)
    158         : scanner_(scanner), bookmark_(kNoBookmark) {
    159       DCHECK_NOT_NULL(scanner_);
    160     }
    161     ~BookmarkScope() {}
    162 
    163     void Set();
    164     void Apply();
    165     bool HasBeenSet();
    166     bool HasBeenApplied();
    167 
    168    private:
    169     static const size_t kNoBookmark;
    170     static const size_t kBookmarkWasApplied;
    171     static const size_t kBookmarkAtFirstPos;
    172 
    173     Scanner* scanner_;
    174     size_t bookmark_;
    175 
    176     DISALLOW_COPY_AND_ASSIGN(BookmarkScope);
    177   };
    178 
    179   // Representation of an interval of source positions.
    180   struct Location {
    181     Location(int b, int e) : beg_pos(b), end_pos(e) { }
    182     Location() : beg_pos(0), end_pos(0) { }
    183 
    184     bool IsValid() const {
    185       return beg_pos >= 0 && end_pos >= beg_pos;
    186     }
    187 
    188     static Location invalid() { return Location(-1, -1); }
    189 
    190     int beg_pos;
    191     int end_pos;
    192   };
    193 
    194   // -1 is outside of the range of any real source code.
    195   static const int kNoOctalLocation = -1;
    196   static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;
    197 
    198   explicit Scanner(UnicodeCache* scanner_contants);
    199 
    200   void Initialize(Utf16CharacterStream* source);
    201 
    202   // Returns the next token and advances input.
    203   Token::Value Next();
    204   // Returns the token following peek()
    205   Token::Value PeekAhead();
    206   // Returns the current token again.
    207   Token::Value current_token() { return current_.token; }
    208   // Returns the location information for the current token
    209   // (the token last returned by Next()).
    210   Location location() const { return current_.location; }
    211 
    212   // This error is specifically an invalid hex or unicode escape sequence.
    213   bool has_error() const { return scanner_error_ != MessageTemplate::kNone; }
    214   MessageTemplate::Template error() const { return scanner_error_; }
    215   Location error_location() const { return scanner_error_location_; }
    216 
    217   bool has_invalid_template_escape() const {
    218     return invalid_template_escape_message_ != MessageTemplate::kNone;
    219   }
    220   MessageTemplate::Template invalid_template_escape_message() const {
    221     return invalid_template_escape_message_;
    222   }
    223   Location invalid_template_escape_location() const {
    224     return invalid_template_escape_location_;
    225   }
    226 
    227   void clear_invalid_template_escape() {
    228     DCHECK(has_invalid_template_escape());
    229     invalid_template_escape_message_ = MessageTemplate::kNone;
    230     invalid_template_escape_location_ = Location::invalid();
    231   }
    232 
    233   // Similar functions for the upcoming token.
    234 
    235   // One token look-ahead (past the token returned by Next()).
    236   Token::Value peek() const { return next_.token; }
    237 
    238   Location peek_location() const { return next_.location; }
    239 
    240   bool literal_contains_escapes() const {
    241     return LiteralContainsEscapes(current_);
    242   }
    243   bool is_literal_contextual_keyword(Vector<const char> keyword) {
    244     DCHECK(current_.token == Token::IDENTIFIER ||
    245            current_.token == Token::ESCAPED_STRICT_RESERVED_WORD);
    246     DCHECK_NOT_NULL(current_.literal_chars);
    247     return current_.literal_chars->is_contextual_keyword(keyword);
    248   }
    249   bool is_next_contextual_keyword(Vector<const char> keyword) {
    250     DCHECK_NOT_NULL(next_.literal_chars);
    251     return next_.literal_chars->is_contextual_keyword(keyword);
    252   }
    253 
    254   const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);
    255   const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);
    256   const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory);
    257 
    258   double DoubleValue();
    259   bool ContainsDot();
    260   bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {
    261     if (!current_.literal_chars) {
    262       return !strncmp(Token::Name(current_.token), data, length);
    263     } else if (is_literal_one_byte() && literal_length() == length &&
    264                (allow_escapes || !literal_contains_escapes())) {
    265       const char* token =
    266           reinterpret_cast<const char*>(literal_one_byte_string().start());
    267       return !strncmp(token, data, length);
    268     }
    269     return false;
    270   }
    271   inline bool UnescapedLiteralMatches(const char* data, int length) {
    272     return LiteralMatches(data, length, false);
    273   }
    274 
    275   bool IsGetOrSet(bool* is_get, bool* is_set) {
    276     if (is_literal_one_byte() &&
    277         literal_length() == 3 &&
    278         !literal_contains_escapes()) {
    279       const char* token =
    280           reinterpret_cast<const char*>(literal_one_byte_string().start());
    281       *is_get = strncmp(token, "get", 3) == 0;
    282       *is_set = !*is_get && strncmp(token, "set", 3) == 0;
    283       return *is_get || *is_set;
    284     }
    285     return false;
    286   }
    287 
    288   bool FindSymbol(DuplicateFinder* finder);
    289 
    290   UnicodeCache* unicode_cache() { return unicode_cache_; }
    291 
    292   // Returns the location of the last seen octal literal.
    293   Location octal_position() const { return octal_pos_; }
    294   void clear_octal_position() {
    295     octal_pos_ = Location::invalid();
    296     octal_message_ = MessageTemplate::kNone;
    297   }
    298   MessageTemplate::Template octal_message() const { return octal_message_; }
    299 
    300   // Returns the value of the last smi that was scanned.
    301   uint32_t smi_value() const { return current_.smi_value_; }
    302 
    303   // Seek forward to the given position.  This operation does not
    304   // work in general, for instance when there are pushed back
    305   // characters, but works for seeking forward until simple delimiter
    306   // tokens, which is what it is used for.
    307   void SeekForward(int pos);
    308 
    309   // Returns true if there was a line terminator before the peek'ed token,
    310   // possibly inside a multi-line comment.
    311   bool HasAnyLineTerminatorBeforeNext() const {
    312     return has_line_terminator_before_next_ ||
    313            has_multiline_comment_before_next_;
    314   }
    315 
    316   bool HasAnyLineTerminatorAfterNext() {
    317     Token::Value ensure_next_next = PeekAhead();
    318     USE(ensure_next_next);
    319     return has_line_terminator_after_next_;
    320   }
    321 
    322   // Scans the input as a regular expression pattern, next token must be /(=).
    323   // Returns true if a pattern is scanned.
    324   bool ScanRegExpPattern();
    325   // Scans the input as regular expression flags. Returns the flags on success.
    326   Maybe<RegExp::Flags> ScanRegExpFlags();
    327 
    328   // Scans the input as a template literal
    329   Token::Value ScanTemplateStart();
    330   Token::Value ScanTemplateContinuation();
    331 
    332   Handle<String> SourceUrl(Isolate* isolate) const {
    333     Handle<String> tmp;
    334     if (source_url_.length() > 0) tmp = source_url_.Internalize(isolate);
    335     return tmp;
    336   }
    337 
    338   Handle<String> SourceMappingUrl(Isolate* isolate) const {
    339     Handle<String> tmp;
    340     if (source_mapping_url_.length() > 0)
    341       tmp = source_mapping_url_.Internalize(isolate);
    342     return tmp;
    343   }
    344 
    345   bool FoundHtmlComment() const { return found_html_comment_; }
    346 
    347  private:
    348   // Scoped helper for literal recording. Automatically drops the literal
    349   // if aborting the scanning before it's complete.
    350   class LiteralScope {
    351    public:
    352     explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) {
    353       scanner_->StartLiteral();
    354     }
    355     ~LiteralScope() {
    356       if (!complete_) scanner_->DropLiteral();
    357     }
    358     void Complete() { complete_ = true; }
    359 
    360    private:
    361     Scanner* scanner_;
    362     bool complete_;
    363   };
    364 
    365   // LiteralBuffer -  Collector of chars of literals.
    366   class LiteralBuffer {
    367    public:
    368     LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() {}
    369 
    370     ~LiteralBuffer() { backing_store_.Dispose(); }
    371 
    372     INLINE(void AddChar(char code_unit)) {
    373       DCHECK(IsValidAscii(code_unit));
    374       AddOneByteChar(static_cast<byte>(code_unit));
    375     }
    376 
    377     INLINE(void AddChar(uc32 code_unit)) {
    378       if (is_one_byte_ &&
    379           code_unit <= static_cast<uc32>(unibrow::Latin1::kMaxChar)) {
    380         AddOneByteChar(static_cast<byte>(code_unit));
    381       } else {
    382         AddCharSlow(code_unit);
    383       }
    384     }
    385 
    386     bool is_one_byte() const { return is_one_byte_; }
    387 
    388     bool is_contextual_keyword(Vector<const char> keyword) const {
    389       return is_one_byte() && keyword.length() == position_ &&
    390              (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
    391     }
    392 
    393     Vector<const uint16_t> two_byte_literal() const {
    394       DCHECK(!is_one_byte_);
    395       DCHECK((position_ & 0x1) == 0);
    396       return Vector<const uint16_t>(
    397           reinterpret_cast<const uint16_t*>(backing_store_.start()),
    398           position_ >> 1);
    399     }
    400 
    401     Vector<const uint8_t> one_byte_literal() const {
    402       DCHECK(is_one_byte_);
    403       return Vector<const uint8_t>(
    404           reinterpret_cast<const uint8_t*>(backing_store_.start()), position_);
    405     }
    406 
    407     int length() const { return is_one_byte_ ? position_ : (position_ >> 1); }
    408 
    409     void ReduceLength(int delta) {
    410       position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size);
    411     }
    412 
    413     void Reset() {
    414       position_ = 0;
    415       is_one_byte_ = true;
    416     }
    417 
    418     Handle<String> Internalize(Isolate* isolate) const;
    419 
    420    private:
    421     static const int kInitialCapacity = 16;
    422     static const int kGrowthFactory = 4;
    423     static const int kMinConversionSlack = 256;
    424     static const int kMaxGrowth = 1 * MB;
    425 
    426     inline bool IsValidAscii(char code_unit) {
    427       // Control characters and printable characters span the range of
    428       // valid ASCII characters (0-127). Chars are unsigned on some
    429       // platforms which causes compiler warnings if the validity check
    430       // tests the lower bound >= 0 as it's always true.
    431       return iscntrl(code_unit) || isprint(code_unit);
    432     }
    433 
    434     INLINE(void AddOneByteChar(byte one_byte_char)) {
    435       DCHECK(is_one_byte_);
    436       if (position_ >= backing_store_.length()) ExpandBuffer();
    437       backing_store_[position_] = one_byte_char;
    438       position_ += kOneByteSize;
    439     }
    440 
    441     void AddCharSlow(uc32 code_unit);
    442     int NewCapacity(int min_capacity);
    443     void ExpandBuffer();
    444     void ConvertToTwoByte();
    445 
    446     bool is_one_byte_;
    447     int position_;
    448     Vector<byte> backing_store_;
    449 
    450     DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
    451   };
    452 
    453   // The current and look-ahead token.
    454   struct TokenDesc {
    455     Location location;
    456     LiteralBuffer* literal_chars;
    457     LiteralBuffer* raw_literal_chars;
    458     uint32_t smi_value_;
    459     Token::Value token;
    460   };
    461 
    462   static const int kCharacterLookaheadBufferSize = 1;
    463   const int kMaxAscii = 127;
    464 
    465   // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
    466   template <bool capture_raw>
    467   uc32 ScanOctalEscape(uc32 c, int length);
    468 
    469   // Call this after setting source_ to the input.
    470   void Init() {
    471     // Set c0_ (one character ahead)
    472     STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
    473     Advance();
    474     // Initialize current_ to not refer to a literal.
    475     current_.token = Token::UNINITIALIZED;
    476     current_.literal_chars = NULL;
    477     current_.raw_literal_chars = NULL;
    478     next_.token = Token::UNINITIALIZED;
    479     next_.literal_chars = NULL;
    480     next_.raw_literal_chars = NULL;
    481     next_next_.token = Token::UNINITIALIZED;
    482     next_next_.literal_chars = NULL;
    483     next_next_.raw_literal_chars = NULL;
    484     found_html_comment_ = false;
    485     scanner_error_ = MessageTemplate::kNone;
    486     invalid_template_escape_message_ = MessageTemplate::kNone;
    487   }
    488 
    489   void ReportScannerError(const Location& location,
    490                           MessageTemplate::Template error) {
    491     if (has_error()) return;
    492     scanner_error_ = error;
    493     scanner_error_location_ = location;
    494   }
    495 
    496   void ReportScannerError(int pos, MessageTemplate::Template error) {
    497     if (has_error()) return;
    498     scanner_error_ = error;
    499     scanner_error_location_ = Location(pos, pos + 1);
    500   }
    501 
    502   // Seek to the next_ token at the given position.
    503   void SeekNext(size_t position);
    504 
    505   // Literal buffer support
    506   inline void StartLiteral() {
    507     LiteralBuffer* free_buffer =
    508         (current_.literal_chars == &literal_buffer0_)
    509             ? &literal_buffer1_
    510             : (current_.literal_chars == &literal_buffer1_) ? &literal_buffer2_
    511                                                             : &literal_buffer0_;
    512     free_buffer->Reset();
    513     next_.literal_chars = free_buffer;
    514   }
    515 
    516   inline void StartRawLiteral() {
    517     LiteralBuffer* free_buffer =
    518         (current_.raw_literal_chars == &raw_literal_buffer0_)
    519             ? &raw_literal_buffer1_
    520             : (current_.raw_literal_chars == &raw_literal_buffer1_)
    521                   ? &raw_literal_buffer2_
    522                   : &raw_literal_buffer0_;
    523     free_buffer->Reset();
    524     next_.raw_literal_chars = free_buffer;
    525   }
    526 
    527   INLINE(void AddLiteralChar(uc32 c)) {
    528     DCHECK_NOT_NULL(next_.literal_chars);
    529     next_.literal_chars->AddChar(c);
    530   }
    531 
    532   INLINE(void AddLiteralChar(char c)) {
    533     DCHECK_NOT_NULL(next_.literal_chars);
    534     next_.literal_chars->AddChar(c);
    535   }
    536 
    537   INLINE(void AddRawLiteralChar(uc32 c)) {
    538     DCHECK_NOT_NULL(next_.raw_literal_chars);
    539     next_.raw_literal_chars->AddChar(c);
    540   }
    541 
    542   INLINE(void ReduceRawLiteralLength(int delta)) {
    543     DCHECK_NOT_NULL(next_.raw_literal_chars);
    544     next_.raw_literal_chars->ReduceLength(delta);
    545   }
    546 
    547   // Stops scanning of a literal and drop the collected characters,
    548   // e.g., due to an encountered error.
    549   inline void DropLiteral() {
    550     next_.literal_chars = NULL;
    551     next_.raw_literal_chars = NULL;
    552   }
    553 
    554   inline void AddLiteralCharAdvance() {
    555     AddLiteralChar(c0_);
    556     Advance();
    557   }
    558 
    559   // Low-level scanning support.
    560   template <bool capture_raw = false, bool check_surrogate = true>
    561   void Advance() {
    562     if (capture_raw) {
    563       AddRawLiteralChar(c0_);
    564     }
    565     c0_ = source_->Advance();
    566     if (check_surrogate) HandleLeadSurrogate();
    567   }
    568 
    569   void HandleLeadSurrogate() {
    570     if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
    571       uc32 c1 = source_->Advance();
    572       if (!unibrow::Utf16::IsTrailSurrogate(c1)) {
    573         source_->Back();
    574       } else {
    575         c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
    576       }
    577     }
    578   }
    579 
    580   void PushBack(uc32 ch) {
    581     if (c0_ > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
    582       source_->Back2();
    583     } else {
    584       source_->Back();
    585     }
    586     c0_ = ch;
    587   }
    588 
    589   // Same as PushBack(ch1); PushBack(ch2).
    590   // - Potentially more efficient as it uses Back2() on the stream.
    591   // - Uses char as parameters, since we're only calling it with ASCII chars in
    592   //   practice. This way, we can avoid a few edge cases.
    593   void PushBack2(char ch1, char ch2) {
    594     source_->Back2();
    595     c0_ = ch2;
    596   }
    597 
    598   inline Token::Value Select(Token::Value tok) {
    599     Advance();
    600     return tok;
    601   }
    602 
    603   inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
    604     Advance();
    605     if (c0_ == next) {
    606       Advance();
    607       return then;
    608     } else {
    609       return else_;
    610     }
    611   }
    612 
    613   // Returns the literal string, if any, for the current token (the
    614   // token last returned by Next()). The string is 0-terminated.
    615   // Literal strings are collected for identifiers, strings, numbers as well
    616   // as for template literals. For template literals we also collect the raw
    617   // form.
    618   // These functions only give the correct result if the literal was scanned
    619   // when a LiteralScope object is alive.
    620   //
    621   // Current usage of these functions is unfortunately a little undisciplined,
    622   // and is_literal_one_byte() + is_literal_one_byte_string() is also
    623   // requested for tokens that do not have a literal. Hence, we treat any
    624   // token as a one-byte literal. E.g. Token::FUNCTION pretends to have a
    625   // literal "function".
    626   Vector<const uint8_t> literal_one_byte_string() {
    627     if (current_.literal_chars)
    628       return current_.literal_chars->one_byte_literal();
    629     const char* str = Token::String(current_.token);
    630     const uint8_t* str_as_uint8 = reinterpret_cast<const uint8_t*>(str);
    631     return Vector<const uint8_t>(str_as_uint8,
    632                                  Token::StringLength(current_.token));
    633   }
    634   Vector<const uint16_t> literal_two_byte_string() {
    635     DCHECK_NOT_NULL(current_.literal_chars);
    636     return current_.literal_chars->two_byte_literal();
    637   }
    638   bool is_literal_one_byte() {
    639     return !current_.literal_chars || current_.literal_chars->is_one_byte();
    640   }
    641   int literal_length() const {
    642     if (current_.literal_chars) return current_.literal_chars->length();
    643     return Token::StringLength(current_.token);
    644   }
    645   // Returns the literal string for the next token (the token that
    646   // would be returned if Next() were called).
    647   Vector<const uint8_t> next_literal_one_byte_string() {
    648     DCHECK_NOT_NULL(next_.literal_chars);
    649     return next_.literal_chars->one_byte_literal();
    650   }
    651   Vector<const uint16_t> next_literal_two_byte_string() {
    652     DCHECK_NOT_NULL(next_.literal_chars);
    653     return next_.literal_chars->two_byte_literal();
    654   }
    655   bool is_next_literal_one_byte() {
    656     DCHECK_NOT_NULL(next_.literal_chars);
    657     return next_.literal_chars->is_one_byte();
    658   }
    659   Vector<const uint8_t> raw_literal_one_byte_string() {
    660     DCHECK_NOT_NULL(current_.raw_literal_chars);
    661     return current_.raw_literal_chars->one_byte_literal();
    662   }
    663   Vector<const uint16_t> raw_literal_two_byte_string() {
    664     DCHECK_NOT_NULL(current_.raw_literal_chars);
    665     return current_.raw_literal_chars->two_byte_literal();
    666   }
    667   bool is_raw_literal_one_byte() {
    668     DCHECK_NOT_NULL(current_.raw_literal_chars);
    669     return current_.raw_literal_chars->is_one_byte();
    670   }
    671 
    672   template <bool capture_raw, bool unicode = false>
    673   uc32 ScanHexNumber(int expected_length);
    674   // Scan a number of any length but not bigger than max_value. For example, the
    675   // number can be 000000001, so it's very long in characters but its value is
    676   // small.
    677   template <bool capture_raw>
    678   uc32 ScanUnlimitedLengthHexNumber(int max_value, int beg_pos);
    679 
    680   // Scans a single JavaScript token.
    681   void Scan();
    682 
    683   bool SkipWhiteSpace();
    684   Token::Value SkipSingleLineComment();
    685   Token::Value SkipSourceURLComment();
    686   void TryToParseSourceURLComment();
    687   Token::Value SkipMultiLineComment();
    688   // Scans a possible HTML comment -- begins with '<!'.
    689   Token::Value ScanHtmlComment();
    690 
    691   void ScanDecimalDigits();
    692   Token::Value ScanNumber(bool seen_period);
    693   Token::Value ScanIdentifierOrKeyword();
    694   Token::Value ScanIdentifierSuffix(LiteralScope* literal, bool escaped);
    695 
    696   Token::Value ScanString();
    697 
    698   // Scans an escape-sequence which is part of a string and adds the
    699   // decoded character to the current literal. Returns true if a pattern
    700   // is scanned.
    701   template <bool capture_raw, bool in_template_literal>
    702   bool ScanEscape();
    703 
    704   // Decodes a Unicode escape-sequence which is part of an identifier.
    705   // If the escape sequence cannot be decoded the result is kBadChar.
    706   uc32 ScanIdentifierUnicodeEscape();
    707   // Helper for the above functions.
    708   template <bool capture_raw>
    709   uc32 ScanUnicodeEscape();
    710 
    711   Token::Value ScanTemplateSpan();
    712 
    713   // Return the current source position.
    714   int source_pos() {
    715     return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;
    716   }
    717 
    718   static bool LiteralContainsEscapes(const TokenDesc& token) {
    719     Location location = token.location;
    720     int source_length = (location.end_pos - location.beg_pos);
    721     if (token.token == Token::STRING) {
    722       // Subtract delimiters.
    723       source_length -= 2;
    724     }
    725     return token.literal_chars &&
    726            (token.literal_chars->length() != source_length);
    727   }
    728 
    729 #ifdef DEBUG
    730   void SanityCheckTokenDesc(const TokenDesc&) const;
    731 #endif
    732 
    733   UnicodeCache* unicode_cache_;
    734 
    735   // Buffers collecting literal strings, numbers, etc.
    736   LiteralBuffer literal_buffer0_;
    737   LiteralBuffer literal_buffer1_;
    738   LiteralBuffer literal_buffer2_;
    739 
    740   // Values parsed from magic comments.
    741   LiteralBuffer source_url_;
    742   LiteralBuffer source_mapping_url_;
    743 
    744   // Buffer to store raw string values
    745   LiteralBuffer raw_literal_buffer0_;
    746   LiteralBuffer raw_literal_buffer1_;
    747   LiteralBuffer raw_literal_buffer2_;
    748 
    749   TokenDesc current_;    // desc for current token (as returned by Next())
    750   TokenDesc next_;       // desc for next token (one token look-ahead)
    751   TokenDesc next_next_;  // desc for the token after next (after PeakAhead())
    752 
    753   // Input stream. Must be initialized to an Utf16CharacterStream.
    754   Utf16CharacterStream* source_;
    755 
    756   // Last-seen positions of potentially problematic tokens.
    757   Location octal_pos_;
    758   MessageTemplate::Template octal_message_;
    759 
    760   // One Unicode character look-ahead; c0_ < 0 at the end of the input.
    761   uc32 c0_;
    762 
    763   // Whether there is a line terminator whitespace character after
    764   // the current token, and  before the next. Does not count newlines
    765   // inside multiline comments.
    766   bool has_line_terminator_before_next_;
    767   // Whether there is a multi-line comment that contains a
    768   // line-terminator after the current token, and before the next.
    769   bool has_multiline_comment_before_next_;
    770   bool has_line_terminator_after_next_;
    771 
    772   // Whether this scanner encountered an HTML comment.
    773   bool found_html_comment_;
    774 
    775   MessageTemplate::Template scanner_error_;
    776   Location scanner_error_location_;
    777 
    778   MessageTemplate::Template invalid_template_escape_message_;
    779   Location invalid_template_escape_location_;
    780 };
    781 
    782 }  // namespace internal
    783 }  // namespace v8
    784 
    785 #endif  // V8_PARSING_SCANNER_H_
    786