Home | History | Annotate | Download | only in parsing
      1 // Copyright 2011 the V8 project authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Features shared by parsing and pre-parsing scanners.
      6 
      7 #ifndef V8_PARSING_SCANNER_H_
      8 #define V8_PARSING_SCANNER_H_
      9 
     10 #include "src/allocation.h"
     11 #include "src/base/hashmap.h"
     12 #include "src/base/logging.h"
     13 #include "src/char-predicates.h"
     14 #include "src/collector.h"
     15 #include "src/globals.h"
     16 #include "src/list.h"
     17 #include "src/messages.h"
     18 #include "src/parsing/token.h"
     19 #include "src/unicode-decoder.h"
     20 #include "src/unicode.h"
     21 
     22 namespace v8 {
     23 namespace internal {
     24 
     25 
     26 class AstRawString;
     27 class AstValueFactory;
     28 class ParserRecorder;
     29 class UnicodeCache;
     30 
     31 
     32 // ---------------------------------------------------------------------
     33 // Buffered stream of UTF-16 code units, using an internal UTF-16 buffer.
     34 // A code unit is a 16 bit value representing either a 16 bit code point
     35 // or one part of a surrogate pair that make a single 21 bit code point.
     36 
     37 class Utf16CharacterStream {
     38  public:
     39   Utf16CharacterStream() : pos_(0) { }
     40   virtual ~Utf16CharacterStream() { }
     41 
     42   // Returns and advances past the next UTF-16 code unit in the input
     43   // stream. If there are no more code units, it returns a negative
     44   // value.
     45   inline uc32 Advance() {
     46     if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
     47       pos_++;
     48       return static_cast<uc32>(*(buffer_cursor_++));
     49     }
     50     // Note: currently the following increment is necessary to avoid a
     51     // parser problem! The scanner treats the final kEndOfInput as
     52     // a code unit with a position, and does math relative to that
     53     // position.
     54     pos_++;
     55 
     56     return kEndOfInput;
     57   }
     58 
     59   // Return the current position in the code unit stream.
     60   // Starts at zero.
     61   inline size_t pos() const { return pos_; }
     62 
     63   // Skips forward past the next code_unit_count UTF-16 code units
     64   // in the input, or until the end of input if that comes sooner.
     65   // Returns the number of code units actually skipped. If less
     66   // than code_unit_count,
     67   inline size_t SeekForward(size_t code_unit_count) {
     68     size_t buffered_chars = buffer_end_ - buffer_cursor_;
     69     if (code_unit_count <= buffered_chars) {
     70       buffer_cursor_ += code_unit_count;
     71       pos_ += code_unit_count;
     72       return code_unit_count;
     73     }
     74     return SlowSeekForward(code_unit_count);
     75   }
     76 
     77   // Pushes back the most recently read UTF-16 code unit (or negative
     78   // value if at end of input), i.e., the value returned by the most recent
     79   // call to Advance.
     80   // Must not be used right after calling SeekForward.
     81   virtual void PushBack(int32_t code_unit) = 0;
     82 
     83   virtual bool SetBookmark();
     84   virtual void ResetToBookmark();
     85 
     86  protected:
     87   static const uc32 kEndOfInput = -1;
     88 
     89   // Ensures that the buffer_cursor_ points to the code_unit at
     90   // position pos_ of the input, if possible. If the position
     91   // is at or after the end of the input, return false. If there
     92   // are more code_units available, return true.
     93   virtual bool ReadBlock() = 0;
     94   virtual size_t SlowSeekForward(size_t code_unit_count) = 0;
     95 
     96   const uint16_t* buffer_cursor_;
     97   const uint16_t* buffer_end_;
     98   size_t pos_;
     99 };
    100 
    101 
    102 // ---------------------------------------------------------------------
    103 // DuplicateFinder discovers duplicate symbols.
    104 
    105 class DuplicateFinder {
    106  public:
    107   explicit DuplicateFinder(UnicodeCache* constants)
    108       : unicode_constants_(constants),
    109         backing_store_(16),
    110         map_(&Match) { }
    111 
    112   int AddOneByteSymbol(Vector<const uint8_t> key, int value);
    113   int AddTwoByteSymbol(Vector<const uint16_t> key, int value);
    114   // Add a a number literal by converting it (if necessary)
    115   // to the string that ToString(ToNumber(literal)) would generate.
    116   // and then adding that string with AddOneByteSymbol.
    117   // This string is the actual value used as key in an object literal,
    118   // and the one that must be different from the other keys.
    119   int AddNumber(Vector<const uint8_t> key, int value);
    120 
    121  private:
    122   int AddSymbol(Vector<const uint8_t> key, bool is_one_byte, int value);
    123   // Backs up the key and its length in the backing store.
    124   // The backup is stored with a base 127 encoding of the
    125   // length (plus a bit saying whether the string is one byte),
    126   // followed by the bytes of the key.
    127   uint8_t* BackupKey(Vector<const uint8_t> key, bool is_one_byte);
    128 
    129   // Compare two encoded keys (both pointing into the backing store)
    130   // for having the same base-127 encoded lengths and representation.
    131   // and then having the same 'length' bytes following.
    132   static bool Match(void* first, void* second);
    133   // Creates a hash from a sequence of bytes.
    134   static uint32_t Hash(Vector<const uint8_t> key, bool is_one_byte);
    135   // Checks whether a string containing a JS number is its canonical
    136   // form.
    137   static bool IsNumberCanonical(Vector<const uint8_t> key);
    138 
    139   // Size of buffer. Sufficient for using it to call DoubleToCString in
    140   // from conversions.h.
    141   static const int kBufferSize = 100;
    142 
    143   UnicodeCache* unicode_constants_;
    144   // Backing store used to store strings used as hashmap keys.
    145   SequenceCollector<unsigned char> backing_store_;
    146   base::HashMap map_;
    147   // Buffer used for string->number->canonical string conversions.
    148   char number_buffer_[kBufferSize];
    149 };
    150 
    151 // ----------------------------------------------------------------------------
    152 // LiteralBuffer -  Collector of chars of literals.
    153 
    154 const int kMaxAscii = 127;
    155 
    156 class LiteralBuffer {
    157  public:
    158   LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { }
    159 
    160   ~LiteralBuffer() { backing_store_.Dispose(); }
    161 
    162   INLINE(void AddChar(char code_unit)) {
    163     if (position_ >= backing_store_.length()) ExpandBuffer();
    164     DCHECK(is_one_byte_);
    165     DCHECK(0 <= code_unit && code_unit <= kMaxAscii);
    166     backing_store_[position_] = static_cast<byte>(code_unit);
    167     position_ += kOneByteSize;
    168     return;
    169   }
    170 
    171   INLINE(void AddChar(uc32 code_unit)) {
    172     if (position_ >= backing_store_.length()) ExpandBuffer();
    173     if (is_one_byte_) {
    174       if (code_unit <= unibrow::Latin1::kMaxChar) {
    175         backing_store_[position_] = static_cast<byte>(code_unit);
    176         position_ += kOneByteSize;
    177         return;
    178       }
    179       ConvertToTwoByte();
    180     }
    181     if (code_unit <= unibrow::Utf16::kMaxNonSurrogateCharCode) {
    182       *reinterpret_cast<uint16_t*>(&backing_store_[position_]) = code_unit;
    183       position_ += kUC16Size;
    184     } else {
    185       *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
    186           unibrow::Utf16::LeadSurrogate(code_unit);
    187       position_ += kUC16Size;
    188       if (position_ >= backing_store_.length()) ExpandBuffer();
    189       *reinterpret_cast<uint16_t*>(&backing_store_[position_]) =
    190           unibrow::Utf16::TrailSurrogate(code_unit);
    191       position_ += kUC16Size;
    192     }
    193   }
    194 
    195   bool is_one_byte() const { return is_one_byte_; }
    196 
    197   bool is_contextual_keyword(Vector<const char> keyword) const {
    198     return is_one_byte() && keyword.length() == position_ &&
    199         (memcmp(keyword.start(), backing_store_.start(), position_) == 0);
    200   }
    201 
    202   Vector<const uint16_t> two_byte_literal() const {
    203     DCHECK(!is_one_byte_);
    204     DCHECK((position_ & 0x1) == 0);
    205     return Vector<const uint16_t>(
    206         reinterpret_cast<const uint16_t*>(backing_store_.start()),
    207         position_ >> 1);
    208   }
    209 
    210   Vector<const uint8_t> one_byte_literal() const {
    211     DCHECK(is_one_byte_);
    212     return Vector<const uint8_t>(
    213         reinterpret_cast<const uint8_t*>(backing_store_.start()),
    214         position_);
    215   }
    216 
    217   int length() const {
    218     return is_one_byte_ ? position_ : (position_ >> 1);
    219   }
    220 
    221   void ReduceLength(int delta) {
    222     position_ -= delta * (is_one_byte_ ? kOneByteSize : kUC16Size);
    223   }
    224 
    225   void Reset() {
    226     position_ = 0;
    227     is_one_byte_ = true;
    228   }
    229 
    230   Handle<String> Internalize(Isolate* isolate) const;
    231 
    232   void CopyFrom(const LiteralBuffer* other) {
    233     if (other == nullptr) {
    234       Reset();
    235     } else {
    236       is_one_byte_ = other->is_one_byte_;
    237       position_ = other->position_;
    238       if (position_ < backing_store_.length()) {
    239         std::copy(other->backing_store_.begin(),
    240                   other->backing_store_.begin() + position_,
    241                   backing_store_.begin());
    242       } else {
    243         backing_store_.Dispose();
    244         backing_store_ = other->backing_store_.Clone();
    245       }
    246     }
    247   }
    248 
    249  private:
    250   static const int kInitialCapacity = 16;
    251   static const int kGrowthFactory = 4;
    252   static const int kMinConversionSlack = 256;
    253   static const int kMaxGrowth = 1 * MB;
    254   inline int NewCapacity(int min_capacity) {
    255     int capacity = Max(min_capacity, backing_store_.length());
    256     int new_capacity = Min(capacity * kGrowthFactory, capacity + kMaxGrowth);
    257     return new_capacity;
    258   }
    259 
    260   void ExpandBuffer() {
    261     Vector<byte> new_store = Vector<byte>::New(NewCapacity(kInitialCapacity));
    262     MemCopy(new_store.start(), backing_store_.start(), position_);
    263     backing_store_.Dispose();
    264     backing_store_ = new_store;
    265   }
    266 
    267   void ConvertToTwoByte() {
    268     DCHECK(is_one_byte_);
    269     Vector<byte> new_store;
    270     int new_content_size = position_ * kUC16Size;
    271     if (new_content_size >= backing_store_.length()) {
    272       // Ensure room for all currently read code units as UC16 as well
    273       // as the code unit about to be stored.
    274       new_store = Vector<byte>::New(NewCapacity(new_content_size));
    275     } else {
    276       new_store = backing_store_;
    277     }
    278     uint8_t* src = backing_store_.start();
    279     uint16_t* dst = reinterpret_cast<uint16_t*>(new_store.start());
    280     for (int i = position_ - 1; i >= 0; i--) {
    281       dst[i] = src[i];
    282     }
    283     if (new_store.start() != backing_store_.start()) {
    284       backing_store_.Dispose();
    285       backing_store_ = new_store;
    286     }
    287     position_ = new_content_size;
    288     is_one_byte_ = false;
    289   }
    290 
    291   bool is_one_byte_;
    292   int position_;
    293   Vector<byte> backing_store_;
    294 
    295   DISALLOW_COPY_AND_ASSIGN(LiteralBuffer);
    296 };
    297 
    298 
    299 // ----------------------------------------------------------------------------
    300 // JavaScript Scanner.
    301 
    302 class Scanner {
    303  public:
    304   // Scoped helper for literal recording. Automatically drops the literal
    305   // if aborting the scanning before it's complete.
    306   class LiteralScope {
    307    public:
    308     explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) {
    309       scanner_->StartLiteral();
    310     }
    311      ~LiteralScope() {
    312        if (!complete_) scanner_->DropLiteral();
    313      }
    314     void Complete() {
    315       complete_ = true;
    316     }
    317 
    318    private:
    319     Scanner* scanner_;
    320     bool complete_;
    321   };
    322 
    323   // Scoped helper for a re-settable bookmark.
    324   class BookmarkScope {
    325    public:
    326     explicit BookmarkScope(Scanner* scanner) : scanner_(scanner) {
    327       DCHECK_NOT_NULL(scanner_);
    328     }
    329     ~BookmarkScope() { scanner_->DropBookmark(); }
    330 
    331     bool Set() { return scanner_->SetBookmark(); }
    332     void Reset() { scanner_->ResetToBookmark(); }
    333     bool HasBeenSet() { return scanner_->BookmarkHasBeenSet(); }
    334     bool HasBeenReset() { return scanner_->BookmarkHasBeenReset(); }
    335 
    336    private:
    337     Scanner* scanner_;
    338 
    339     DISALLOW_COPY_AND_ASSIGN(BookmarkScope);
    340   };
    341 
    342   // Representation of an interval of source positions.
    343   struct Location {
    344     Location(int b, int e) : beg_pos(b), end_pos(e) { }
    345     Location() : beg_pos(0), end_pos(0) { }
    346 
    347     bool IsValid() const {
    348       return beg_pos >= 0 && end_pos >= beg_pos;
    349     }
    350 
    351     static Location invalid() { return Location(-1, -1); }
    352 
    353     int beg_pos;
    354     int end_pos;
    355   };
    356 
    357   // -1 is outside of the range of any real source code.
    358   static const int kNoOctalLocation = -1;
    359 
    360   explicit Scanner(UnicodeCache* scanner_contants);
    361 
    362   void Initialize(Utf16CharacterStream* source);
    363 
    364   // Returns the next token and advances input.
    365   Token::Value Next();
    366   // Returns the token following peek()
    367   Token::Value PeekAhead();
    368   // Returns the current token again.
    369   Token::Value current_token() { return current_.token; }
    370   // Returns the location information for the current token
    371   // (the token last returned by Next()).
    372   Location location() const { return current_.location; }
    373 
    374   bool has_error() const { return scanner_error_ != MessageTemplate::kNone; }
    375   MessageTemplate::Template error() const { return scanner_error_; }
    376   Location error_location() const { return scanner_error_location_; }
    377 
    378   // Similar functions for the upcoming token.
    379 
    380   // One token look-ahead (past the token returned by Next()).
    381   Token::Value peek() const { return next_.token; }
    382 
    383   Location peek_location() const { return next_.location; }
    384 
    385   bool literal_contains_escapes() const {
    386     return LiteralContainsEscapes(current_);
    387   }
    388   bool next_literal_contains_escapes() const {
    389     return LiteralContainsEscapes(next_);
    390   }
    391   bool is_literal_contextual_keyword(Vector<const char> keyword) {
    392     DCHECK_NOT_NULL(current_.literal_chars);
    393     return current_.literal_chars->is_contextual_keyword(keyword);
    394   }
    395   bool is_next_contextual_keyword(Vector<const char> keyword) {
    396     DCHECK_NOT_NULL(next_.literal_chars);
    397     return next_.literal_chars->is_contextual_keyword(keyword);
    398   }
    399 
    400   const AstRawString* CurrentSymbol(AstValueFactory* ast_value_factory);
    401   const AstRawString* NextSymbol(AstValueFactory* ast_value_factory);
    402   const AstRawString* CurrentRawSymbol(AstValueFactory* ast_value_factory);
    403 
    404   double DoubleValue();
    405   bool ContainsDot();
    406   bool LiteralMatches(const char* data, int length, bool allow_escapes = true) {
    407     if (is_literal_one_byte() &&
    408         literal_length() == length &&
    409         (allow_escapes || !literal_contains_escapes())) {
    410       const char* token =
    411           reinterpret_cast<const char*>(literal_one_byte_string().start());
    412       return !strncmp(token, data, length);
    413     }
    414     return false;
    415   }
    416   inline bool UnescapedLiteralMatches(const char* data, int length) {
    417     return LiteralMatches(data, length, false);
    418   }
    419 
    420   void IsGetOrSet(bool* is_get, bool* is_set) {
    421     if (is_literal_one_byte() &&
    422         literal_length() == 3 &&
    423         !literal_contains_escapes()) {
    424       const char* token =
    425           reinterpret_cast<const char*>(literal_one_byte_string().start());
    426       *is_get = strncmp(token, "get", 3) == 0;
    427       *is_set = !*is_get && strncmp(token, "set", 3) == 0;
    428     }
    429   }
    430 
    431   int FindSymbol(DuplicateFinder* finder, int value);
    432 
    433   UnicodeCache* unicode_cache() { return unicode_cache_; }
    434 
    435   // Returns the location of the last seen octal literal.
    436   Location octal_position() const { return octal_pos_; }
    437   void clear_octal_position() { octal_pos_ = Location::invalid(); }
    438   // Returns the location of the last seen decimal literal with a leading zero.
    439   Location decimal_with_leading_zero_position() const {
    440     return decimal_with_leading_zero_pos_;
    441   }
    442   void clear_decimal_with_leading_zero_position() {
    443     decimal_with_leading_zero_pos_ = Location::invalid();
    444   }
    445 
    446   // Returns the value of the last smi that was scanned.
    447   int smi_value() const { return current_.smi_value_; }
    448 
    449   // Seek forward to the given position.  This operation does not
    450   // work in general, for instance when there are pushed back
    451   // characters, but works for seeking forward until simple delimiter
    452   // tokens, which is what it is used for.
    453   void SeekForward(int pos);
    454 
    455   // Returns true if there was a line terminator before the peek'ed token,
    456   // possibly inside a multi-line comment.
    457   bool HasAnyLineTerminatorBeforeNext() const {
    458     return has_line_terminator_before_next_ ||
    459            has_multiline_comment_before_next_;
    460   }
    461 
    462   bool HasAnyLineTerminatorAfterNext() {
    463     Token::Value ensure_next_next = PeekAhead();
    464     USE(ensure_next_next);
    465     return has_line_terminator_after_next_;
    466   }
    467 
    468   // Scans the input as a regular expression pattern, previous
    469   // character(s) must be /(=). Returns true if a pattern is scanned.
    470   bool ScanRegExpPattern(bool seen_equal);
    471   // Scans the input as regular expression flags. Returns the flags on success.
    472   Maybe<RegExp::Flags> ScanRegExpFlags();
    473 
    474   // Scans the input as a template literal
    475   Token::Value ScanTemplateStart();
    476   Token::Value ScanTemplateContinuation();
    477 
    478   const LiteralBuffer* source_url() const { return &source_url_; }
    479   const LiteralBuffer* source_mapping_url() const {
    480     return &source_mapping_url_;
    481   }
    482 
    483   bool IdentifierIsFutureStrictReserved(const AstRawString* string) const;
    484 
    485   bool FoundHtmlComment() const { return found_html_comment_; }
    486 
    487 #define DECLARE_ACCESSORS(name)                                \
    488   inline bool allow_##name() const { return allow_##name##_; } \
    489   inline void set_allow_##name(bool allow) { allow_##name##_ = allow; }
    490   DECLARE_ACCESSORS(harmony_exponentiation_operator)
    491 #undef ACCESSOR
    492 
    493  private:
    494   // The current and look-ahead token.
    495   struct TokenDesc {
    496     Token::Value token;
    497     Location location;
    498     LiteralBuffer* literal_chars;
    499     LiteralBuffer* raw_literal_chars;
    500     int smi_value_;
    501   };
    502 
    503   static const int kCharacterLookaheadBufferSize = 1;
    504 
    505   // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
    506   template <bool capture_raw>
    507   uc32 ScanOctalEscape(uc32 c, int length);
    508 
    509   // Call this after setting source_ to the input.
    510   void Init() {
    511     // Set c0_ (one character ahead)
    512     STATIC_ASSERT(kCharacterLookaheadBufferSize == 1);
    513     Advance();
    514     // Initialize current_ to not refer to a literal.
    515     current_.literal_chars = NULL;
    516     current_.raw_literal_chars = NULL;
    517     next_next_.token = Token::UNINITIALIZED;
    518     found_html_comment_ = false;
    519     scanner_error_ = MessageTemplate::kNone;
    520   }
    521 
    522   // Support BookmarkScope functionality.
    523   bool SetBookmark();
    524   void ResetToBookmark();
    525   bool BookmarkHasBeenSet();
    526   bool BookmarkHasBeenReset();
    527   void DropBookmark();
    528   static void CopyTokenDesc(TokenDesc* to, TokenDesc* from);
    529 
    530   void ReportScannerError(const Location& location,
    531                           MessageTemplate::Template error) {
    532     if (has_error()) return;
    533     scanner_error_ = error;
    534     scanner_error_location_ = location;
    535   }
    536 
    537   void ReportScannerError(int pos, MessageTemplate::Template error) {
    538     if (has_error()) return;
    539     scanner_error_ = error;
    540     scanner_error_location_ = Location(pos, pos + 1);
    541   }
    542 
    543   // Literal buffer support
    544   inline void StartLiteral() {
    545     LiteralBuffer* free_buffer =
    546         (current_.literal_chars == &literal_buffer0_)
    547             ? &literal_buffer1_
    548             : (current_.literal_chars == &literal_buffer1_) ? &literal_buffer2_
    549                                                             : &literal_buffer0_;
    550     free_buffer->Reset();
    551     next_.literal_chars = free_buffer;
    552   }
    553 
    554   inline void StartRawLiteral() {
    555     LiteralBuffer* free_buffer =
    556         (current_.raw_literal_chars == &raw_literal_buffer0_)
    557             ? &raw_literal_buffer1_
    558             : (current_.raw_literal_chars == &raw_literal_buffer1_)
    559                   ? &raw_literal_buffer2_
    560                   : &raw_literal_buffer0_;
    561     free_buffer->Reset();
    562     next_.raw_literal_chars = free_buffer;
    563   }
    564 
    565   INLINE(void AddLiteralChar(uc32 c)) {
    566     DCHECK_NOT_NULL(next_.literal_chars);
    567     next_.literal_chars->AddChar(c);
    568   }
    569 
    570   INLINE(void AddLiteralChar(char c)) {
    571     DCHECK_NOT_NULL(next_.literal_chars);
    572     next_.literal_chars->AddChar(c);
    573   }
    574 
    575   INLINE(void AddRawLiteralChar(uc32 c)) {
    576     DCHECK_NOT_NULL(next_.raw_literal_chars);
    577     next_.raw_literal_chars->AddChar(c);
    578   }
    579 
    580   INLINE(void ReduceRawLiteralLength(int delta)) {
    581     DCHECK_NOT_NULL(next_.raw_literal_chars);
    582     next_.raw_literal_chars->ReduceLength(delta);
    583   }
    584 
    585   // Stops scanning of a literal and drop the collected characters,
    586   // e.g., due to an encountered error.
    587   inline void DropLiteral() {
    588     next_.literal_chars = NULL;
    589     next_.raw_literal_chars = NULL;
    590   }
    591 
    592   inline void AddLiteralCharAdvance() {
    593     AddLiteralChar(c0_);
    594     Advance();
    595   }
    596 
    597   // Low-level scanning support.
    598   template <bool capture_raw = false, bool check_surrogate = true>
    599   void Advance() {
    600     if (capture_raw) {
    601       AddRawLiteralChar(c0_);
    602     }
    603     c0_ = source_->Advance();
    604     if (check_surrogate) HandleLeadSurrogate();
    605   }
    606 
    607   void HandleLeadSurrogate() {
    608     if (unibrow::Utf16::IsLeadSurrogate(c0_)) {
    609       uc32 c1 = source_->Advance();
    610       if (!unibrow::Utf16::IsTrailSurrogate(c1)) {
    611         source_->PushBack(c1);
    612       } else {
    613         c0_ = unibrow::Utf16::CombineSurrogatePair(c0_, c1);
    614       }
    615     }
    616   }
    617 
    618   void PushBack(uc32 ch) {
    619     if (c0_ > static_cast<uc32>(unibrow::Utf16::kMaxNonSurrogateCharCode)) {
    620       source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_));
    621       source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_));
    622     } else {
    623       source_->PushBack(c0_);
    624     }
    625     c0_ = ch;
    626   }
    627 
    628   inline Token::Value Select(Token::Value tok) {
    629     Advance();
    630     return tok;
    631   }
    632 
    633   inline Token::Value Select(uc32 next, Token::Value then, Token::Value else_) {
    634     Advance();
    635     if (c0_ == next) {
    636       Advance();
    637       return then;
    638     } else {
    639       return else_;
    640     }
    641   }
    642 
    643   // Returns the literal string, if any, for the current token (the
    644   // token last returned by Next()). The string is 0-terminated.
    645   // Literal strings are collected for identifiers, strings, numbers as well
    646   // as for template literals. For template literals we also collect the raw
    647   // form.
    648   // These functions only give the correct result if the literal was scanned
    649   // when a LiteralScope object is alive.
    650   Vector<const uint8_t> literal_one_byte_string() {
    651     DCHECK_NOT_NULL(current_.literal_chars);
    652     return current_.literal_chars->one_byte_literal();
    653   }
    654   Vector<const uint16_t> literal_two_byte_string() {
    655     DCHECK_NOT_NULL(current_.literal_chars);
    656     return current_.literal_chars->two_byte_literal();
    657   }
    658   bool is_literal_one_byte() {
    659     DCHECK_NOT_NULL(current_.literal_chars);
    660     return current_.literal_chars->is_one_byte();
    661   }
    662   int literal_length() const {
    663     DCHECK_NOT_NULL(current_.literal_chars);
    664     return current_.literal_chars->length();
    665   }
    666   // Returns the literal string for the next token (the token that
    667   // would be returned if Next() were called).
    668   Vector<const uint8_t> next_literal_one_byte_string() {
    669     DCHECK_NOT_NULL(next_.literal_chars);
    670     return next_.literal_chars->one_byte_literal();
    671   }
    672   Vector<const uint16_t> next_literal_two_byte_string() {
    673     DCHECK_NOT_NULL(next_.literal_chars);
    674     return next_.literal_chars->two_byte_literal();
    675   }
    676   bool is_next_literal_one_byte() {
    677     DCHECK_NOT_NULL(next_.literal_chars);
    678     return next_.literal_chars->is_one_byte();
    679   }
    680   Vector<const uint8_t> raw_literal_one_byte_string() {
    681     DCHECK_NOT_NULL(current_.raw_literal_chars);
    682     return current_.raw_literal_chars->one_byte_literal();
    683   }
    684   Vector<const uint16_t> raw_literal_two_byte_string() {
    685     DCHECK_NOT_NULL(current_.raw_literal_chars);
    686     return current_.raw_literal_chars->two_byte_literal();
    687   }
    688   bool is_raw_literal_one_byte() {
    689     DCHECK_NOT_NULL(current_.raw_literal_chars);
    690     return current_.raw_literal_chars->is_one_byte();
    691   }
    692 
    693   template <bool capture_raw, bool unicode = false>
    694   uc32 ScanHexNumber(int expected_length);
    695   // Scan a number of any length but not bigger than max_value. For example, the
    696   // number can be 000000001, so it's very long in characters but its value is
    697   // small.
    698   template <bool capture_raw>
    699   uc32 ScanUnlimitedLengthHexNumber(int max_value, int beg_pos);
    700 
    701   // Scans a single JavaScript token.
    702   void Scan();
    703 
    704   bool SkipWhiteSpace();
    705   Token::Value SkipSingleLineComment();
    706   Token::Value SkipSourceURLComment();
    707   void TryToParseSourceURLComment();
    708   Token::Value SkipMultiLineComment();
    709   // Scans a possible HTML comment -- begins with '<!'.
    710   Token::Value ScanHtmlComment();
    711 
    712   void ScanDecimalDigits();
    713   Token::Value ScanNumber(bool seen_period);
    714   Token::Value ScanIdentifierOrKeyword();
    715   Token::Value ScanIdentifierSuffix(LiteralScope* literal, bool escaped);
    716 
    717   Token::Value ScanString();
    718 
    719   // Scans an escape-sequence which is part of a string and adds the
    720   // decoded character to the current literal. Returns true if a pattern
    721   // is scanned.
    722   template <bool capture_raw, bool in_template_literal>
    723   bool ScanEscape();
    724 
    725   // Decodes a Unicode escape-sequence which is part of an identifier.
    726   // If the escape sequence cannot be decoded the result is kBadChar.
    727   uc32 ScanIdentifierUnicodeEscape();
    728   // Helper for the above functions.
    729   template <bool capture_raw>
    730   uc32 ScanUnicodeEscape();
    731 
    732   Token::Value ScanTemplateSpan();
    733 
    734   // Return the current source position.
    735   int source_pos() {
    736     return static_cast<int>(source_->pos()) - kCharacterLookaheadBufferSize;
    737   }
    738 
    739   static bool LiteralContainsEscapes(const TokenDesc& token) {
    740     Location location = token.location;
    741     int source_length = (location.end_pos - location.beg_pos);
    742     if (token.token == Token::STRING) {
    743       // Subtract delimiters.
    744       source_length -= 2;
    745     }
    746     return token.literal_chars->length() != source_length;
    747   }
    748 
    749   UnicodeCache* unicode_cache_;
    750 
    751   // Buffers collecting literal strings, numbers, etc.
    752   LiteralBuffer literal_buffer0_;
    753   LiteralBuffer literal_buffer1_;
    754   LiteralBuffer literal_buffer2_;
    755 
    756   // Values parsed from magic comments.
    757   LiteralBuffer source_url_;
    758   LiteralBuffer source_mapping_url_;
    759 
    760   // Buffer to store raw string values
    761   LiteralBuffer raw_literal_buffer0_;
    762   LiteralBuffer raw_literal_buffer1_;
    763   LiteralBuffer raw_literal_buffer2_;
    764 
    765   TokenDesc current_;    // desc for current token (as returned by Next())
    766   TokenDesc next_;       // desc for next token (one token look-ahead)
    767   TokenDesc next_next_;  // desc for the token after next (after PeakAhead())
    768 
    769   // Variables for Scanner::BookmarkScope and the *Bookmark implementation.
    770   // These variables contain the scanner state when a bookmark is set.
    771   //
    772   // We will use bookmark_c0_ as a 'control' variable, where:
    773   // - bookmark_c0_ >= 0: A bookmark has been set and this contains c0_.
    774   // - bookmark_c0_ == -1: No bookmark has been set.
    775   // - bookmark_c0_ == -2: The bookmark has been applied (ResetToBookmark).
    776   //
    777   // Which state is being bookmarked? The parser state is distributed over
    778   // several variables, roughly like this:
    779   //   ...    1234        +       5678 ..... [character stream]
    780   //       [current_] [next_] c0_ |      [scanner state]
    781   // So when the scanner is logically at the beginning of an expression
    782   // like "1234 + 4567", then:
    783   // - current_ contains "1234"
    784   // - next_ contains "+"
    785   // - c0_ contains ' ' (the space between "+" and "5678",
    786   // - the source_ character stream points to the beginning of "5678".
    787   // To be able to restore this state, we will keep copies of current_, next_,
    788   // and c0_; we'll ask the stream to bookmark itself, and we'll copy the
    789   // contents of current_'s and next_'s literal buffers to bookmark_*_literal_.
    790   static const uc32 kNoBookmark = -1;
    791   static const uc32 kBookmarkWasApplied = -2;
    792   uc32 bookmark_c0_;
    793   TokenDesc bookmark_current_;
    794   TokenDesc bookmark_next_;
    795   LiteralBuffer bookmark_current_literal_;
    796   LiteralBuffer bookmark_current_raw_literal_;
    797   LiteralBuffer bookmark_next_literal_;
    798   LiteralBuffer bookmark_next_raw_literal_;
    799 
    800   // Input stream. Must be initialized to an Utf16CharacterStream.
    801   Utf16CharacterStream* source_;
    802 
    803   // Last-seen positions of potentially problematic tokens.
    804   Location octal_pos_;
    805   Location decimal_with_leading_zero_pos_;
    806 
    807   // One Unicode character look-ahead; c0_ < 0 at the end of the input.
    808   uc32 c0_;
    809 
    810   // Whether there is a line terminator whitespace character after
    811   // the current token, and  before the next. Does not count newlines
    812   // inside multiline comments.
    813   bool has_line_terminator_before_next_;
    814   // Whether there is a multi-line comment that contains a
    815   // line-terminator after the current token, and before the next.
    816   bool has_multiline_comment_before_next_;
    817   bool has_line_terminator_after_next_;
    818 
    819   // Whether this scanner encountered an HTML comment.
    820   bool found_html_comment_;
    821 
    822   bool allow_harmony_exponentiation_operator_;
    823 
    824   MessageTemplate::Template scanner_error_;
    825   Location scanner_error_location_;
    826 };
    827 
    828 }  // namespace internal
    829 }  // namespace v8
    830 
    831 #endif  // V8_PARSING_SCANNER_H_
    832