Home | History | Annotate | Download | only in parser
      1 /*
      2  *  Copyright (C) 1999-2000 Harri Porten (porten (at) kde.org)
      3  *  Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
      4  *  Copyright (C) 2010 Zoltan Herczeg (zherczeg (at) inf.u-szeged.hu)
      5  *
      6  *  This library is free software; you can redistribute it and/or
      7  *  modify it under the terms of the GNU Library General Public
      8  *  License as published by the Free Software Foundation; either
      9  *  version 2 of the License, or (at your option) any later version.
     10  *
     11  *  This library is distributed in the hope that it will be useful,
     12  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     13  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     14  *  Library General Public License for more details.
     15  *
     16  *  You should have received a copy of the GNU Library General Public License
     17  *  along with this library; see the file COPYING.LIB.  If not, write to
     18  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     19  *  Boston, MA 02110-1301, USA.
     20  *
     21  */
     22 
     23 #ifndef Lexer_h
     24 #define Lexer_h
     25 
     26 #include "JSParser.h"
     27 #include "Lookup.h"
     28 #include "ParserArena.h"
     29 #include "SourceCode.h"
     30 #include <wtf/ASCIICType.h>
     31 #include <wtf/AlwaysInline.h>
     32 #include <wtf/SegmentedVector.h>
     33 #include <wtf/Vector.h>
     34 #include <wtf/unicode/Unicode.h>
     35 
     36 namespace JSC {
     37 
     38     class RegExp;
     39 
     40     class Lexer {
     41         WTF_MAKE_NONCOPYABLE(Lexer); WTF_MAKE_FAST_ALLOCATED;
     42     public:
     43         // Character manipulation functions.
     44         static bool isWhiteSpace(int character);
     45         static bool isLineTerminator(int character);
     46         static unsigned char convertHex(int c1, int c2);
     47         static UChar convertUnicode(int c1, int c2, int c3, int c4);
     48 
     49         // Functions to set up parsing.
     50         void setCode(const SourceCode&, ParserArena&);
     51         void setIsReparsing() { m_isReparsing = true; }
     52         bool isReparsing() const { return m_isReparsing; }
     53 
     54         // Functions for the parser itself.
     55         enum LexType { IdentifyReservedWords, IgnoreReservedWords };
     56         JSTokenType lex(JSTokenData* lvalp, JSTokenInfo* llocp, LexType, bool strictMode);
     57         bool nextTokenIsColon();
     58         int lineNumber() const { return m_lineNumber; }
     59         void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; }
     60         int lastLineNumber() const { return m_lastLineNumber; }
     61         bool prevTerminator() const { return m_terminator; }
     62         SourceCode sourceCode(int openBrace, int closeBrace, int firstLine);
     63         bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
     64         bool skipRegExp();
     65 
     66         // Functions for use after parsing.
     67         bool sawError() const { return m_error; }
     68         void clear();
     69         int currentOffset() { return m_code - m_codeStart; }
     70         void setOffset(int offset)
     71         {
     72             m_error = 0;
     73             m_code = m_codeStart + offset;
     74             m_buffer8.resize(0);
     75             m_buffer16.resize(0);
     76             // Faster than an if-else sequence
     77             m_current = -1;
     78             if (LIKELY(m_code < m_codeEnd))
     79                 m_current = *m_code;
     80         }
     81         void setLineNumber(int line)
     82         {
     83             m_lineNumber = line;
     84         }
     85 
     86         SourceProvider* sourceProvider() const { return m_source->provider(); }
     87 
     88     private:
     89         friend class JSGlobalData;
     90 
     91         Lexer(JSGlobalData*);
     92         ~Lexer();
     93 
     94         void record8(int);
     95         void record16(int);
     96         void record16(UChar);
     97 
     98         void copyCodeWithoutBOMs();
     99 
    100         ALWAYS_INLINE void shift();
    101         ALWAYS_INLINE int peek(int offset);
    102         int getUnicodeCharacter();
    103         void shiftLineTerminator();
    104 
    105         ALWAYS_INLINE const UChar* currentCharacter() const;
    106         ALWAYS_INLINE int currentOffset() const;
    107 
    108         ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length);
    109 
    110         ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
    111 
    112         ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, LexType);
    113         ALWAYS_INLINE bool parseString(JSTokenData* lvalp, bool strictMode);
    114         ALWAYS_INLINE void parseHex(double& returnValue);
    115         ALWAYS_INLINE bool parseOctal(double& returnValue);
    116         ALWAYS_INLINE bool parseDecimal(double& returnValue);
    117         ALWAYS_INLINE void parseNumberAfterDecimalPoint();
    118         ALWAYS_INLINE bool parseNumberAfterExponentIndicator();
    119         ALWAYS_INLINE bool parseMultilineComment();
    120 
    121         static const size_t initialReadBufferCapacity = 32;
    122 
    123         int m_lineNumber;
    124         int m_lastLineNumber;
    125 
    126         Vector<char> m_buffer8;
    127         Vector<UChar> m_buffer16;
    128         bool m_terminator;
    129         bool m_delimited; // encountered delimiter like "'" and "}" on last run
    130         int m_lastToken;
    131 
    132         const SourceCode* m_source;
    133         const UChar* m_code;
    134         const UChar* m_codeStart;
    135         const UChar* m_codeEnd;
    136         bool m_isReparsing;
    137         bool m_atLineStart;
    138         bool m_error;
    139 
    140         // current and following unicode characters (int to allow for -1 for end-of-file marker)
    141         int m_current;
    142 
    143         IdentifierArena* m_arena;
    144 
    145         JSGlobalData* m_globalData;
    146 
    147         const HashTable m_keywordTable;
    148     };
    149 
    150     inline bool Lexer::isWhiteSpace(int ch)
    151     {
    152         return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : (WTF::Unicode::isSeparatorSpace(ch) || ch == 0xFEFF);
    153     }
    154 
    155     inline bool Lexer::isLineTerminator(int ch)
    156     {
    157         return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
    158     }
    159 
    160     inline unsigned char Lexer::convertHex(int c1, int c2)
    161     {
    162         return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
    163     }
    164 
    165     inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
    166     {
    167         return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
    168     }
    169 
    170 } // namespace JSC
    171 
    172 #endif // Lexer_h
    173