Home | History | Annotate | Download | only in parser
      1 /*
      2  *  Copyright (C) 1999-2000 Harri Porten (porten (at) kde.org)
      3  *  Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
      4  *
      5  *  This library is free software; you can redistribute it and/or
      6  *  modify it under the terms of the GNU Library General Public
      7  *  License as published by the Free Software Foundation; either
      8  *  version 2 of the License, or (at your option) any later version.
      9  *
     10  *  This library is distributed in the hope that it will be useful,
     11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  *  Library General Public License for more details.
     14  *
     15  *  You should have received a copy of the GNU Library General Public License
     16  *  along with this library; see the file COPYING.LIB.  If not, write to
     17  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18  *  Boston, MA 02110-1301, USA.
     19  *
     20  */
     21 
     22 #ifndef Lexer_h
     23 #define Lexer_h
     24 
     25 #include "Lookup.h"
     26 #include "ParserArena.h"
     27 #include "SourceCode.h"
     28 #include <wtf/ASCIICType.h>
     29 #include <wtf/SegmentedVector.h>
     30 #include <wtf/Vector.h>
     31 #include <wtf/unicode/Unicode.h>
     32 
     33 namespace JSC {
     34 
     35     class RegExp;
     36 
     37     class Lexer : public Noncopyable {
     38     public:
     39         // Character manipulation functions.
     40         static bool isWhiteSpace(int character);
     41         static bool isLineTerminator(int character);
     42         static unsigned char convertHex(int c1, int c2);
     43         static UChar convertUnicode(int c1, int c2, int c3, int c4);
     44 
     45         // Functions to set up parsing.
     46         void setCode(const SourceCode&, ParserArena&);
     47         void setIsReparsing() { m_isReparsing = true; }
     48 
     49         // Functions for the parser itself.
     50         int lex(void* lvalp, void* llocp);
     51         int lineNumber() const { return m_lineNumber; }
     52         bool prevTerminator() const { return m_terminator; }
     53         SourceCode sourceCode(int openBrace, int closeBrace, int firstLine);
     54         bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0);
     55         bool skipRegExp();
     56 
     57         // Functions for use after parsing.
     58         bool sawError() const { return m_error; }
     59         void clear();
     60 
     61     private:
     62         friend class JSGlobalData;
     63 
     64         Lexer(JSGlobalData*);
     65         ~Lexer();
     66 
     67         void shift1();
     68         void shift2();
     69         void shift3();
     70         void shift4();
     71         void shiftLineTerminator();
     72 
     73         void record8(int);
     74         void record16(int);
     75         void record16(UChar);
     76 
     77         void copyCodeWithoutBOMs();
     78 
     79         int currentOffset() const;
     80         const UChar* currentCharacter() const;
     81 
     82         const Identifier* makeIdentifier(const UChar* characters, size_t length);
     83 
     84         bool lastTokenWasRestrKeyword() const;
     85 
     86         static const size_t initialReadBufferCapacity = 32;
     87 
     88         int m_lineNumber;
     89 
     90         Vector<char> m_buffer8;
     91         Vector<UChar> m_buffer16;
     92         bool m_terminator;
     93         bool m_delimited; // encountered delimiter like "'" and "}" on last run
     94         int m_lastToken;
     95 
     96         const SourceCode* m_source;
     97         const UChar* m_code;
     98         const UChar* m_codeStart;
     99         const UChar* m_codeEnd;
    100         bool m_isReparsing;
    101         bool m_atLineStart;
    102         bool m_error;
    103 
    104         // current and following unicode characters (int to allow for -1 for end-of-file marker)
    105         int m_current;
    106         int m_next1;
    107         int m_next2;
    108         int m_next3;
    109 
    110         IdentifierArena* m_arena;
    111 
    112         JSGlobalData* m_globalData;
    113 
    114         const HashTable m_keywordTable;
    115 
    116         Vector<UChar> m_codeWithoutBOMs;
    117     };
    118 
    119     inline bool Lexer::isWhiteSpace(int ch)
    120     {
    121         return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : WTF::Unicode::isSeparatorSpace(ch);
    122     }
    123 
    124     inline bool Lexer::isLineTerminator(int ch)
    125     {
    126         return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028;
    127     }
    128 
    129     inline unsigned char Lexer::convertHex(int c1, int c2)
    130     {
    131         return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2);
    132     }
    133 
    134     inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
    135     {
    136         return (convertHex(c1, c2) << 8) | convertHex(c3, c4);
    137     }
    138 
    139     // A bridge for yacc from the C world to the C++ world.
    140     inline int jscyylex(void* lvalp, void* llocp, void* globalData)
    141     {
    142         return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
    143     }
    144 
    145 } // namespace JSC
    146 
    147 #endif // Lexer_h
    148