Home | History | Annotate | Download | only in parsing
      1 // Copyright 2011 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Features shared by parsing and pre-parsing scanners.
      6 
      7 #ifndef V8_PARSING_SCANNER_H_
      8 #define V8_PARSING_SCANNER_H_
      9 
     10 #include <algorithm>
     11 
     12 #include "src/allocation.h"
     13 #include "src/base/logging.h"
     14 #include "src/char-predicates.h"
     15 #include "src/globals.h"
     16 #include "src/messages.h"
     17 #include "src/parsing/token.h"
     18 #include "src/unicode-decoder.h"
     19 #include "src/unicode.h"
     20 
     21 namespace v8 {
     22 namespace internal {
     23 
     24 
     25 class AstRawString;
     26 class AstValueFactory;
     27 class DuplicateFinder;
     28 class ExternalOneByteString;
     29 class ExternalTwoByteString;
     30 class ParserRecorder;
     31 class UnicodeCache;
     32 
     33 // ---------------------------------------------------------------------
     34 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
     35 // A code unit is a 16 bit value representing either a 16 bit code point
     36 // or one part of a surrogate pair that make a single 21 bit code point.
     37 class Utf16CharacterStream {
     38  public:
     39   static const uc32 kEndOfInput = -1;
     40 
     41   virtual ~Utf16CharacterStream() {}
     42 
     43   inline uc32 Peek() {
     44     if (V8_LIKELY(buffer_cursor_ < buffer_end_)) {
     45       return static_cast<uc32>(*buffer_cursor_);
     46     } else if (ReadBlockChecked()) {
     47       return static_cast<uc32>(*buffer_cursor_);
     48     } else {
     49       return kEndOfInput;
     50     }
     51   }
     52 
     53   // Returns and advances past the next UTF-16 code unit in the input
     54   // stream. If there are no more code units it returns kEndOfInput.
     55   inline uc32 Advance() {
     56     uc32 result = Peek();
     57     buffer_cursor_++;
     58     return result;
     59   }
     60 
     61   // Returns and advances past the next UTF-16 code unit in the input stream
     62   // that meets the checks requirement. If there are no more code units it
     63   // returns kEndOfInput.
     64   template <typename FunctionType>
     65   V8_INLINE uc32 AdvanceUntil(FunctionType check) {
     66     while (true) {
     67       auto next_cursor_pos =
     68           std::find_if(buffer_cursor_, buffer_end_, [&check](uint16_t raw_c0_) {
     69             uc32 c0_ = static_cast<uc32>(raw_c0_);
     70             return check(c0_);
     71           });
     72 
     73       if (next_cursor_pos == buffer_end_) {
     74         buffer_cursor_ = buffer_end_;
     75         if (!ReadBlockChecked()) {
     76           buffer_cursor_++;
     77           return kEndOfInput;
     78         }
     79       } else {
     80         buffer_cursor_ = next_cursor_pos + 1;
     81         return static_cast<uc32>(*next_cursor_pos);
     82       }
     83     }
     84   }
     85 
     86   // Go back one by one character in the input stream.
     87   // This undoes the most recent Advance().
     88   inline void Back() {
     89     // The common case - if the previous character is within
     90     // buffer_start_ .. buffer_end_ will be handles locally.
     91     // Otherwise, a new block is requested.
     92     if (V8_LIKELY(buffer_cursor_ > buffer_start_)) {
     93       buffer_cursor_--;
     94     } else {
     95       ReadBlockAt(pos() - 1);
     96     }
     97   }
     98 
     99   inline size_t pos() const {
    100     return buffer_pos_ + (buffer_cursor_ - buffer_start_);
    101   }
    102 
    103   inline void Seek(size_t pos) {
    104     if (V8_LIKELY(pos >= buffer_pos_ &&
    105                   pos < (buffer_pos_ + (buffer_end_ - buffer_start_)))) {
    106       buffer_cursor_ = buffer_start_ + (pos - buffer_pos_);
    107     } else {
    108       ReadBlockAt(pos);
    109     }
    110   }
    111 
    112   // Returns true if the stream could access the V8 heap after construction.
    113   virtual bool can_access_heap() = 0;
    114 
    115  protected:
    116   Utf16CharacterStream(const uint16_t* buffer_start,
    117                        const uint16_t* buffer_cursor,
    118                        const uint16_t* buffer_end, size_t buffer_pos)
    119       : buffer_start_(buffer_start),
    120         buffer_cursor_(buffer_cursor),
    121         buffer_end_(buffer_end),
    122         buffer_pos_(buffer_pos) {}
    123   Utf16CharacterStream() : Utf16CharacterStream(nullptr, nullptr, nullptr, 0) {}
    124 
    125   bool ReadBlockChecked() {
    126     size_t position = pos();
    127     USE(position);
    128     bool success = ReadBlock();
    129 
    130     // Post-conditions: 1, We should always be at the right position.
    131     //                  2, Cursor should be inside the buffer.
    132     //                  3, We should have more characters available iff success.
    133     DCHECK_EQ(pos(), position);
    134     DCHECK_LE(buffer_cursor_, buffer_end_);
    135     DCHECK_LE(buffer_start_, buffer_cursor_);
    136     DCHECK_EQ(success, buffer_cursor_ < buffer_end_);
    137     return success;
    138   }
    139 
    140   void ReadBlockAt(size_t new_pos) {
    141     // The callers of this method (Back/Back2/Seek) should handle the easy
    142     // case (seeking within the current buffer), and we should only get here
    143     // if we actually require new data.
    144     // (This is really an efficiency check, not a correctness invariant.)
    145     DCHECK(new_pos < buffer_pos_ ||
    146            new_pos >= buffer_pos_ + (buffer_end_ - buffer_start_));
    147 
    148     // Change pos() to point to new_pos.
    149     buffer_pos_ = new_pos;
    150     buffer_cursor_ = buffer_start_;
    151     DCHECK_EQ(pos(), new_pos);
    152     ReadBlockChecked();
    153   }
    154 
    155   // Read more data, and update buffer_*_ to point to it.
    156   // Returns true if more data was available.
    157   //
    158   // ReadBlock() may modify any of the buffer_*_ members, but must sure that
    159   // the result of pos() remains unaffected.
    160   //
    161   // Examples:
    162   // - a stream could either fill a separate buffer. Then buffer_start_ and
    163   //   buffer_cursor_ would point to the beginning of the buffer, and
    164   //   buffer_pos would be the old pos().
    165   // - a stream with existing buffer chunks would set buffer_start_ and
    166   //   buffer_end_ to cover the full chunk, and then buffer_cursor_ would
    167   //   point into the middle of the buffer, while buffer_pos_ would describe
    168   //   the start of the buffer.
    169   virtual bool ReadBlock() = 0;
    170 
    171   const uint16_t* buffer_start_;
    172   const uint16_t* buffer_cursor_;
    173   const uint16_t* buffer_end_;
    174   size_t buffer_pos_;
    175 };
    176 
    177 // ----------------------------------------------------------------------------
    178 // JavaScript Scanner.
    179 
    180 class Scanner {
    181  public:
    182   // Scoped helper for a re-settable bookmark.
    183   class BookmarkScope {
    184    public:
    185     explicit BookmarkScope(Scanner* scanner)
    186         : scanner_(scanner), bookmark_(kNoBookmark) {
    187       DCHECK_NOT_NULL(scanner_);
    188     }
    189     ~BookmarkScope() {}
    190 
    191     void Set();
    192     void Apply();
    193     bool HasBeenSet();
    194     bool HasBeenApplied();
    195 
    196    private:
    197     static const size_t kNoBookmark;
    198     static const size_t kBookmarkWasApplied;
    199     static const size_t kBookmarkAtFirstPos;
    200 
    201     Scanner* scanner_;
    202     size_t bookmark_;
    203 
    204     DISALLOW_COPY_AND_ASSIGN(BookmarkScope);
    205   };
    206 
    207   // Representation of an interval of source positions.
    208   struct Location {
    209     Location(int b, int e) : beg_pos(b), end_pos(e) { }
    210     Location() : beg_pos(0), end_pos(0) { }
    211 
    212     bool IsValid() const {
    213       return beg_pos >= 0 && end_pos >= beg_pos;
    214     }
    215 
    216     static Location invalid() { return Location(-1, -1); }
    217 
    218     int beg_pos;
    219     int end_pos;
    220   };
    221 
    222   // -1 is outside of the range of any real source code.
    223   static const int kNoOctalLocation = -1;
    224   static const uc32 kEndOfInput = Utf16CharacterStream::kEndOfInput;
    225 
    226   explicit Scanner(UnicodeCache* scanner_contants, Utf16CharacterStream* source,
    227                    bool is_module);
    228 
    229   void Initialize();
    230 
    231   // Returns the next token and advances input.
    232   Token::Value Next();
    233   // Returns the token following peek()
    234   Token::Value PeekAhead();
    235   // Returns the current token again.
    236   Token::Value current_token() { return current().token; }
    237 
    238   Token::Value current_contextual_token() { return current().contextual_token; }
    239   Token::Value next_contextual_token() { return next().contextual_token; }
    240 
    241   // Returns the location information for the current token
    242   // (the token last returned by Next()).
    243   Location location() const { return current().location; }
    244 
    245   // This error is specifically an invalid hex or unicode escape sequence.
    246   bool has_error() const { return scanner_error_ != MessageTemplate::kNone; }
    247   MessageTemplate::Template error() const { return scanner_error_; }
    248   Location error_location() const { return scanner_error_location_; }
    249 
    250   bool has_invalid_template_escape() const {
    251     return current().invalid_template_escape_message != MessageTemplate::kNone;
    252   }
    253   MessageTemplate::Template invalid_template_escape_message() const {
    254     DCHECK(has_invalid_template_escape());
    255     return current().invalid_template_escape_message;
    256   }
    257   Location invalid_template_escape_location() const {
    258     DCHECK(has_invalid_template_escape());
    259     return current().invalid_template_escape_location;
    260   }
    261 
    262   // Similar functions for the upcoming token.
    263 
    264   // One token look-ahead (past the token returned by Next()).
    265   Token::Value peek() const { return next().token; }
    266 
    267   Location peek_location() const { return next().location; }
    268 
    269   bool literal_contains_escapes() const {
    270     return LiteralContainsEscapes(current());
    271   }
    272 
    273   const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory) const;
    274   const AstRawString* NextSymbol(AstValueFactory* ast_value_factory) const;
    275   const AstRawString* CurrentRawSymbol(
    276       AstValueFactory* ast_value_factory) const;
    277 
    278   double DoubleValue();
    279 
    280   const char* CurrentLiteralAsCString(Zone* zone) const;
    281 
    282   inline bool CurrentMatches(Token::Value token) const {
    283     DCHECK(Token::IsKeyword(token));
    284     return current().token == token;
    285   }
    286 
    287   inline bool CurrentMatchesContextual(Token::Value token) const {
    288     DCHECK(Token::IsContextualKeyword(token));
    289     return current().contextual_token == token;
    290   }
    291 
    292   // Match the token against the contextual keyword or literal buffer.
    293   inline bool CurrentMatchesContextualEscaped(Token::Value token) const {
    294     DCHECK(Token::IsContextualKeyword(token) || token == Token::LET);
    295     // Escaped keywords are not matched as tokens. So if we require escape
    296     // and/or string processing we need to look at the literal content
    297     // (which was escape-processed already).
    298     // Conveniently, !current().literal_chars.is_used() for all proper
    299     // keywords, so this second condition should exit early in common cases.
    300     return (current().contextual_token == token) ||
    301            (current().literal_chars.is_used() &&
    302             current().literal_chars.Equals(Vector<const char>(
    303                 Token::String(token), Token::StringLength(token))));
    304   }
    305 
    306   bool IsUseStrict() const {
    307     return current().token == Token::STRING &&
    308            current().literal_chars.Equals(
    309                Vector<const char>("use strict", strlen("use strict")));
    310   }
    311   bool IsGetOrSet(bool* is_get, bool* is_set) const {
    312     *is_get = CurrentMatchesContextual(Token::GET);
    313     *is_set = CurrentMatchesContextual(Token::SET);
    314     return *is_get || *is_set;
    315   }
    316   bool IsLet() const {
    317     return CurrentMatches(Token::LET) ||
    318            CurrentMatchesContextualEscaped(Token::LET);
    319   }
    320 
    321   // Check whether the CurrentSymbol() has already been seen.
    322   // The DuplicateFinder holds the data, so different instances can be used
    323   // for different sets of duplicates to check for.
    324   bool IsDuplicateSymbol(DuplicateFinder* duplicate_finder,
    325                          AstValueFactory* ast_value_factory) const;
    326 
    327   UnicodeCache* unicode_cache() { return unicode_cache_; }
    328 
    329   // Returns the location of the last seen octal literal.
    330   Location octal_position() const { return octal_pos_; }
    331   void clear_octal_position() {
    332     octal_pos_ = Location::invalid();
    333     octal_message_ = MessageTemplate::kNone;
    334   }
    335   MessageTemplate::Template octal_message() const { return octal_message_; }
    336 
    337   // Returns the value of the last smi that was scanned.
    338   uint32_t smi_value() const { return current().smi_value_; }
    339 
    340   // Seek forward to the given position.  This operation does not
    341   // work in general, for instance when there are pushed back
    342   // characters, but works for seeking forward until simple delimiter
    343   // tokens, which is what it is used for.
    344   void SeekForward(int pos);
    345 
    346   // Returns true if there was a line terminator before the peek'ed token,
    347   // possibly inside a multi-line comment.
    348   bool HasLineTerminatorBeforeNext() const {
    349     return next().after_line_terminator;
    350   }
    351 
    352   bool HasLineTerminatorAfterNext() {
    353     Token::Value ensure_next_next = PeekAhead();
    354     USE(ensure_next_next);
    355     return next_next().after_line_terminator;
    356   }
    357 
    358   // Scans the input as a regular expression pattern, next token must be /(=).
    359   // Returns true if a pattern is scanned.
    360   bool ScanRegExpPattern();
    361   // Scans the input as regular expression flags. Returns the flags on success.
    362   Maybe<RegExp::Flags> ScanRegExpFlags();
    363 
    364   // Scans the input as a template literal
    365   Token::Value ScanTemplateStart();
    366   Token::Value ScanTemplateContinuation() {
    367     DCHECK_EQ(next().token, Token::RBRACE);
    368     next().location.beg_pos = source_pos() - 1;  // We already consumed }
    369     return ScanTemplateSpan();
    370   }
    371 
    372   Handle<String> SourceUrl(Isolate* isolate) const;
    373   Handle<String> SourceMappingUrl(Isolate* isolate) const;
    374 
    375   bool FoundHtmlComment() const { return found_html_comment_; }
    376 
    377   bool allow_harmony_bigint() const { return allow_harmony_bigint_; }
    378   void set_allow_harmony_bigint(bool allow) { allow_harmony_bigint_ = allow; }
    379   bool allow_harmony_private_fields() const {
    380     return allow_harmony_private_fields_;
    381   }
    382   void set_allow_harmony_private_fields(bool allow) {
    383     allow_harmony_private_fields_ = allow;
    384   }
    385   bool allow_harmony_numeric_separator() const {
    386     return allow_harmony_numeric_separator_;
    387   }
    388   void set_allow_harmony_numeric_separator(bool allow) {
    389     allow_harmony_numeric_separator_ = allow;
    390   }
    391 
    392  private:
    393   // Scoped helper for saving & restoring scanner error state.
    394   // This is used for tagged template literals, in which normally forbidden
    395   // escape sequences are allowed.
    396   class ErrorState;
    397 
    398   // Scoped helper for literal recording. Automatically drops the literal
    399   // if aborting the scanning before it's complete.
    400   class LiteralScope {
    401    public:
    402     explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) {
    403       scanner_->StartLiteral();
    404     }
    405     ~LiteralScope() {
    406       if (!complete_) scanner_->DropLiteral();
    407     }
    408     void Complete() { complete_ = true; }
    409 
    410    private:
    411     Scanner* scanner_;
    412     bool complete_;
    413   };
    414 
    415   // LiteralBuffer -  Collector of chars of literals.
    416   class LiteralBuffer {
    417    public:
    418     LiteralBuffer()
    419         : position_(0), is_one_byte_(true), is_used_(false), backing_store_() {}
    420 
    421     ~LiteralBuffer() { backing_store_.Dispose(); }
    422 
    423     V8_INLINE void AddChar(char code_unit) {
    424       DCHECK(is_used_);
    425       DCHECK(IsValidAscii(code_unit));
    426       AddOneByteChar(static_cast<byte>(code_unit));
    427     }
    428 
    429     V8_INLINE void AddChar(uc32 code_unit) {
    430       DCHECK(is_used_);
    431       if (is_one_byte_) {
    432         if (code_unit <= static_cast<uc32>(unibrow::Latin1::kMaxChar)) {
    433           AddOneByteChar(static_cast<byte>(code_unit));
    434           return;
    435         }
    436         ConvertToTwoByte();
    437       }
    438       AddTwoByteChar(code_unit);
    439     }
    440 
    441     bool is_one_byte() const { return is_one_byte_; }
    442 
    443     bool Equals(Vector<const char> keyword) const {
    444       DCHECK(is_used_);
    445       return is_one_byte() && keyword.length() == position_ &&
    446              (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
    447     }
    448 
    449     Vector<const uint16_t> two_byte_literal() const {
    450       DCHECK(!is_one_byte_);
    451       DCHECK(is_used_);
    452       DCHECK_EQ(position_ & 0x1, 0);
    453       return Vector<const uint16_t>(
    454           reinterpret_cast<const uint16_t*>(backing_store_.start()),
    455           position_ >> 1);
    456     }
    457 
    458     Vector<const uint8_t> one_byte_literal() const {
    459       DCHECK(is_one_byte_);
    460       DCHECK(is_used_);
    461       return Vector<const uint8_t>(
    462           reinterpret_cast<const uint8_t*>(backing_store_.start()), position_);
    463     }
    464 
    465     int length() const { return is_one_byte_ ? position_ : (position_ >> 1); }
    466 
    467     void Start() {
    468       DCHECK(!is_used_);
    469       DCHECK_EQ(0, position_);
    470       is_used_ = true;
    471     }
    472 
    473     bool is_used() const { return is_used_; }
    474 
    475     void Drop() {
    476       is_used_ = false;
    477       position_ = 0;
    478       is_one_byte_ = true;
    479     }
    480 
    481     Handle<String> Internalize(Isolate* isolate) const;
    482 
    483    private:
    484     static const int kInitialCapacity = 16;
    485     static const int kGrowthFactory = 4;
    486     static const int kMinConversionSlack = 256;
    487     static const int kMaxGrowth = 1 * MB;
    488 
    489     inline bool IsValidAscii(char code_unit) {
    490       // Control characters and printable characters span the range of
    491       // valid ASCII characters (0-127). Chars are unsigned on some
    492       // platforms which causes compiler warnings if the validity check
    493       // tests the lower bound >= 0 as it's always true.
    494       return iscntrl(code_unit) || isprint(code_unit);
    495     }
    496 
    497     V8_INLINE void AddOneByteChar(byte one_byte_char) {
    498       DCHECK(is_one_byte_);
    499       if (position_ >= backing_store_.length()) ExpandBuffer();
    500       backing_store_[position_] = one_byte_char;
    501       position_ += kOneByteSize;
    502     }
    503 
    504     void AddTwoByteChar(uc32 code_unit);
    505     int NewCapacity(int min_capacity);
    506     void ExpandBuffer();
    507     void ConvertToTwoByte();
    508 
    509     int position_;
    510     bool is_one_byte_;
    511     bool is_used_;
    512     Vector<byte> backing_store_;
    513 
    514     DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
    515   };
    516 
    517   // The current and look-ahead token.
    518   struct TokenDesc {
    519     Location location = {0, 0};
    520     LiteralBuffer literal_chars;
    521     LiteralBuffer raw_literal_chars;
    522     Token::Value token = Token::UNINITIALIZED;
    523     MessageTemplate::Template invalid_template_escape_message =
    524         MessageTemplate::kNone;
    525     Location invalid_template_escape_location;
    526     Token::Value contextual_token = Token::UNINITIALIZED;
    527     uint32_t smi_value_ = 0;
    528     bool after_line_terminator = false;
    529   };
    530 
    531   enum NumberKind {
    532     BINARY,
    533     OCTAL,
    534     IMPLICIT_OCTAL,
    535     HEX,
    536     DECIMAL,
    537     DECIMAL_WITH_LEADING_ZERO
    538   };
    539 
    540   static const int kCharacterLookaheadBufferSize = 1;
    541   const int kMaxAscii = 127;
    542 
    543   // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
    544   template <bool capture_raw>
    545   uc32 ScanOctalEscape(uc32 c, int length);
    546 
    547   // Call this after setting source_ to the input.
    548   void Init() {
    549     // Set c0_ (one character ahead)
    550     STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
    551     Advance();
    552 
    553     current_ = &token_storage_[0];
    554     next_ = &token_storage_[1];
    555     next_next_ = &token_storage_[2];
    556 
    557     found_html_comment_ = false;
    558     scanner_error_ = MessageTemplate::kNone;
    559   }
    560 
    561   void ReportScannerError(const Location& location,
    562                           MessageTemplate::Template error) {
    563     if (has_error()) return;
    564     scanner_error_ = error;
    565     scanner_error_location_ = location;
    566   }
    567 
    568   void ReportScannerError(int pos, MessageTemplate::Template error) {
    569     if (has_error()) return;
    570     scanner_error_ = error;
    571     scanner_error_location_ = Location(pos, pos + 1);
    572   }
    573 
    574   // Seek to the next_ token at the given position.
    575   void SeekNext(size_t position);
    576 
    577   // Literal buffer support
    578   inline void StartLiteral() { next().literal_chars.Start(); }
    579 
    580   inline void StartRawLiteral() { next().raw_literal_chars.Start(); }
    581 
    582   V8_INLINE void AddLiteralChar(uc32 c) { next().literal_chars.AddChar(c); }
    583 
    584   V8_INLINE void AddLiteralChar(char c) { next().literal_chars.AddChar(c); }
    585 
    586   V8_INLINE void AddRawLiteralChar(uc32 c) {
    587     next().raw_literal_chars.AddChar(c);
    588   }
    589 
    590   // Stops scanning of a literal and drop the collected characters,
    591   // e.g., due to an encountered error.
    592   inline void DropLiteral() {
    593     next().literal_chars.Drop();
    594     next().raw_literal_chars.Drop();
    595   }
    596 
    597   inline void AddLiteralCharAdvance() {
    598     AddLiteralChar(c0_);
    599     Advance();
    600   }
    601 
    602   // Low-level scanning support.
    603   template <bool capture_raw = false>
    604   void Advance() {
    605     if (capture_raw) {
    606       AddRawLiteralChar(c0_);
    607     }
    608     c0_ = source_->Advance();
    609   }
    610 
    611   template <typename FunctionType>
    612   V8_INLINE void AdvanceUntil(FunctionType check) {
    613     c0_ = source_->AdvanceUntil(check);
    614   }
    615 
    616   bool CombineSurrogatePair() {
    617     DCHECK(!unibrow::Utf16::IsLeadSurrogate(kEndOfInput));
    618     if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
    619       uc32 c1 = source_->Advance();
    620       DCHECK(!unibrow::Utf16::IsTrailSurrogate(kEndOfInput));
    621       if (unibrow::Utf16::IsTrailSurrogate(c1)) {
    622         c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
    623         return true;
    624       }
    625       source_->Back();
    626     }
    627     return false;
    628   }
    629 
    630   void PushBack(uc32 ch) {
    631     DCHECK_LE(c0_, static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode));
    632     source_->Back();
    633     c0_ = ch;
    634   }
    635 
    636   uc32 Peek() const { return source_->Peek(); }
    637 
    638   inline Token::Value Select(Token::Value tok) {
    639     Advance();
    640     return tok;
    641   }
    642 
    643   inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
    644     Advance();
    645     if (c0_ == next) {
    646       Advance();
    647       return then;
    648     } else {
    649       return else_;
    650     }
    651   }
    652   // Returns the literal string, if any, for the current token (the
    653   // token last returned by Next()). The string is 0-terminated.
    654   // Literal strings are collected for identifiers, strings, numbers as well
    655   // as for template literals. For template literals we also collect the raw
    656   // form.
    657   // These functions only give the correct result if the literal was scanned
    658   // when a LiteralScope object is alive.
    659   //
    660   // Current usage of these functions is unfortunately a little undisciplined,
    661   // and is_literal_one_byte() + is_literal_one_byte_string() is also
    662   // requested for tokens that do not have a literal. Hence, we treat any
    663   // token as a one-byte literal. E.g. Token::FUNCTION pretends to have a
    664   // literal "function".
    665   Vector<const uint8_t> literal_one_byte_string() const {
    666     if (current().literal_chars.is_used())
    667       return current().literal_chars.one_byte_literal();
    668     const char* str = Token::String(current().token);
    669     const uint8_t* str_as_uint8 = reinterpret_cast<const uint8_t*>(str);
    670     return Vector<const uint8_t>(str_as_uint8,
    671                                  Token::StringLength(current().token));
    672   }
    673   Vector<const uint16_t> literal_two_byte_string() const {
    674     DCHECK(current().literal_chars.is_used());
    675     return current().literal_chars.two_byte_literal();
    676   }
    677   bool is_literal_one_byte() const {
    678     return !current().literal_chars.is_used() ||
    679            current().literal_chars.is_one_byte();
    680   }
    681   // Returns the literal string for the next token (the token that
    682   // would be returned if Next() were called).
    683   Vector<const uint8_t> next_literal_one_byte_string() const {
    684     DCHECK(next().literal_chars.is_used());
    685     return next().literal_chars.one_byte_literal();
    686   }
    687   Vector<const uint16_t> next_literal_two_byte_string() const {
    688     DCHECK(next().literal_chars.is_used());
    689     return next().literal_chars.two_byte_literal();
    690   }
    691   bool is_next_literal_one_byte() const {
    692     DCHECK(next().literal_chars.is_used());
    693     return next().literal_chars.is_one_byte();
    694   }
    695   Vector<const uint8_t> raw_literal_one_byte_string() const {
    696     DCHECK(current().raw_literal_chars.is_used());
    697     return current().raw_literal_chars.one_byte_literal();
    698   }
    699   Vector<const uint16_t> raw_literal_two_byte_string() const {
    700     DCHECK(current().raw_literal_chars.is_used());
    701     return current().raw_literal_chars.two_byte_literal();
    702   }
    703   bool is_raw_literal_one_byte() const {
    704     DCHECK(current().raw_literal_chars.is_used());
    705     return current().raw_literal_chars.is_one_byte();
    706   }
    707 
    708   template <bool capture_raw, bool unicode = false>
    709   uc32 ScanHexNumber(int expected_length);
    710   // Scan a number of any length but not bigger than max_value. For example, the
    711   // number can be 000000001, so it's very long in characters but its value is
    712   // small.
    713   template <bool capture_raw>
    714   uc32 ScanUnlimitedLengthHexNumber(int max_value, int beg_pos);
    715 
    716   // Scans a single JavaScript token.
    717   void Scan();
    718 
    719   V8_INLINE Token::Value SkipWhiteSpace();
    720   Token::Value SkipSingleHTMLComment();
    721   Token::Value SkipSingleLineComment();
    722   Token::Value SkipSourceURLComment();
    723   void TryToParseSourceURLComment();
    724   Token::Value SkipMultiLineComment();
    725   // Scans a possible HTML comment -- begins with '<!'.
    726   Token::Value ScanHtmlComment();
    727 
    728   bool ScanDigitsWithNumericSeparators(bool (*predicate)(uc32 ch),
    729                                        bool is_check_first_digit);
    730   bool ScanDecimalDigits();
    731   // Optimized function to scan decimal number as Smi.
    732   bool ScanDecimalAsSmi(uint64_t* value);
    733   bool ScanDecimalAsSmiWithNumericSeparators(uint64_t* value);
    734   bool ScanHexDigits();
    735   bool ScanBinaryDigits();
    736   bool ScanSignedInteger();
    737   bool ScanOctalDigits();
    738   bool ScanImplicitOctalDigits(int start_pos, NumberKind* kind);
    739 
    740   Token::Value ScanNumber(bool seen_period);
    741   Token::Value ScanIdentifierOrKeyword();
    742   Token::Value ScanIdentifierOrKeywordInner(LiteralScope* literal);
    743 
    744   Token::Value ScanString();
    745   Token::Value ScanPrivateName();
    746 
    747   // Scans an escape-sequence which is part of a string and adds the
    748   // decoded character to the current literal. Returns true if a pattern
    749   // is scanned.
    750   template <bool capture_raw>
    751   bool ScanEscape();
    752 
    753   // Decodes a Unicode escape-sequence which is part of an identifier.
    754   // If the escape sequence cannot be decoded the result is kBadChar.
    755   uc32 ScanIdentifierUnicodeEscape();
    756   // Helper for the above functions.
    757   template <bool capture_raw>
    758   uc32 ScanUnicodeEscape();
    759 
    760   Token::Value ScanTemplateSpan();
    761 
    762   // Return the current source position.
    763   int source_pos() {
    764     return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;
    765   }
    766 
    767   static bool LiteralContainsEscapes(const TokenDesc& token) {
    768     Location location = token.location;
    769     int source_length = (location.end_pos - location.beg_pos);
    770     if (token.token == Token::STRING) {
    771       // Subtract delimiters.
    772       source_length -= 2;
    773     }
    774     return token.literal_chars.is_used() &&
    775            (token.literal_chars.length() != source_length);
    776   }
    777 
    778 #ifdef DEBUG
    779   void SanityCheckTokenDesc(const TokenDesc&) const;
    780 #endif
    781 
    782   UnicodeCache* unicode_cache_;
    783 
    784   // Values parsed from magic comments.
    785   LiteralBuffer source_url_;
    786   LiteralBuffer source_mapping_url_;
    787 
    788   TokenDesc token_storage_[3];
    789 
    790   TokenDesc& next() { return *next_; }
    791 
    792   const TokenDesc& current() const { return *current_; }
    793   const TokenDesc& next() const { return *next_; }
    794   const TokenDesc& next_next() const { return *next_next_; }
    795 
    796   TokenDesc* current_;    // desc for current token (as returned by Next())
    797   TokenDesc* next_;       // desc for next token (one token look-ahead)
    798   TokenDesc* next_next_;  // desc for the token after next (after PeakAhead())
    799 
    800   // Input stream. Must be initialized to an Utf16CharacterStream.
    801   Utf16CharacterStream* const source_;
    802 
    803   // Last-seen positions of potentially problematic tokens.
    804   Location octal_pos_;
    805   MessageTemplate::Template octal_message_;
    806 
    807   // One Unicode character look-ahead; c0_ < 0 at the end of the input.
    808   uc32 c0_;
    809 
    810   // Whether this scanner encountered an HTML comment.
    811   bool found_html_comment_;
    812 
    813   // Harmony flags to allow ESNext features.
    814   bool allow_harmony_bigint_;
    815   bool allow_harmony_private_fields_;
    816   bool allow_harmony_numeric_separator_;
    817 
    818   const bool is_module_;
    819 
    820   MessageTemplate::Template scanner_error_;
    821   Location scanner_error_location_;
    822 };
    823 
    824 }  // namespace internal
    825 }  // namespace v8
    826 
    827 #endif  // V8_PARSING_SCANNER_H_
    828