1 /* 2 * Copyright (C) 1999-2000 Harri Porten (porten (at) kde.org) 3 * Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 4 * Copyright (C) 2010 Zoltan Herczeg (zherczeg (at) inf.u-szeged.hu) 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23 #ifndef Lexer_h 24 #define Lexer_h 25 26 #include "JSParser.h" 27 #include "Lookup.h" 28 #include "ParserArena.h" 29 #include "SourceCode.h" 30 #include <wtf/ASCIICType.h> 31 #include <wtf/AlwaysInline.h> 32 #include <wtf/SegmentedVector.h> 33 #include <wtf/Vector.h> 34 #include <wtf/unicode/Unicode.h> 35 36 namespace JSC { 37 38 class RegExp; 39 40 class Lexer { 41 WTF_MAKE_NONCOPYABLE(Lexer); WTF_MAKE_FAST_ALLOCATED; 42 public: 43 // Character manipulation functions. 44 static bool isWhiteSpace(int character); 45 static bool isLineTerminator(int character); 46 static unsigned char convertHex(int c1, int c2); 47 static UChar convertUnicode(int c1, int c2, int c3, int c4); 48 49 // Functions to set up parsing. 50 void setCode(const SourceCode&, ParserArena&); 51 void setIsReparsing() { m_isReparsing = true; } 52 bool isReparsing() const { return m_isReparsing; } 53 54 // Functions for the parser itself. 55 enum LexType { IdentifyReservedWords, IgnoreReservedWords }; 56 JSTokenType lex(JSTokenData* lvalp, JSTokenInfo* llocp, LexType, bool strictMode); 57 bool nextTokenIsColon(); 58 int lineNumber() const { return m_lineNumber; } 59 void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; } 60 int lastLineNumber() const { return m_lastLineNumber; } 61 bool prevTerminator() const { return m_terminator; } 62 SourceCode sourceCode(int openBrace, int closeBrace, int firstLine); 63 bool scanRegExp(const Identifier*& pattern, const Identifier*& flags, UChar patternPrefix = 0); 64 bool skipRegExp(); 65 66 // Functions for use after parsing. 67 bool sawError() const { return m_error; } 68 void clear(); 69 int currentOffset() { return m_code - m_codeStart; } 70 void setOffset(int offset) 71 { 72 m_error = 0; 73 m_code = m_codeStart + offset; 74 m_buffer8.resize(0); 75 m_buffer16.resize(0); 76 // Faster than an if-else sequence 77 m_current = -1; 78 if (LIKELY(m_code < m_codeEnd)) 79 m_current = *m_code; 80 } 81 void setLineNumber(int line) 82 { 83 m_lineNumber = line; 84 } 85 86 SourceProvider* sourceProvider() const { return m_source->provider(); } 87 88 private: 89 friend class JSGlobalData; 90 91 Lexer(JSGlobalData*); 92 ~Lexer(); 93 94 void record8(int); 95 void record16(int); 96 void record16(UChar); 97 98 void copyCodeWithoutBOMs(); 99 100 ALWAYS_INLINE void shift(); 101 ALWAYS_INLINE int peek(int offset); 102 int getUnicodeCharacter(); 103 void shiftLineTerminator(); 104 105 ALWAYS_INLINE const UChar* currentCharacter() const; 106 ALWAYS_INLINE int currentOffset() const; 107 108 ALWAYS_INLINE const Identifier* makeIdentifier(const UChar* characters, size_t length); 109 110 ALWAYS_INLINE bool lastTokenWasRestrKeyword() const; 111 112 ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, LexType); 113 ALWAYS_INLINE bool parseString(JSTokenData* lvalp, bool strictMode); 114 ALWAYS_INLINE void parseHex(double& returnValue); 115 ALWAYS_INLINE bool parseOctal(double& returnValue); 116 ALWAYS_INLINE bool parseDecimal(double& returnValue); 117 ALWAYS_INLINE void parseNumberAfterDecimalPoint(); 118 ALWAYS_INLINE bool parseNumberAfterExponentIndicator(); 119 ALWAYS_INLINE bool parseMultilineComment(); 120 121 static const size_t initialReadBufferCapacity = 32; 122 123 int m_lineNumber; 124 int m_lastLineNumber; 125 126 Vector<char> m_buffer8; 127 Vector<UChar> m_buffer16; 128 bool m_terminator; 129 bool m_delimited; // encountered delimiter like "'" and "}" on last run 130 int m_lastToken; 131 132 const SourceCode* m_source; 133 const UChar* m_code; 134 const UChar* m_codeStart; 135 const UChar* m_codeEnd; 136 bool m_isReparsing; 137 bool m_atLineStart; 138 bool m_error; 139 140 // current and following unicode characters (int to allow for -1 for end-of-file marker) 141 int m_current; 142 143 IdentifierArena* m_arena; 144 145 JSGlobalData* m_globalData; 146 147 const HashTable m_keywordTable; 148 }; 149 150 inline bool Lexer::isWhiteSpace(int ch) 151 { 152 return isASCII(ch) ? (ch == ' ' || ch == '\t' || ch == 0xB || ch == 0xC) : (WTF::Unicode::isSeparatorSpace(ch) || ch == 0xFEFF); 153 } 154 155 inline bool Lexer::isLineTerminator(int ch) 156 { 157 return ch == '\r' || ch == '\n' || (ch & ~1) == 0x2028; 158 } 159 160 inline unsigned char Lexer::convertHex(int c1, int c2) 161 { 162 return (toASCIIHexValue(c1) << 4) | toASCIIHexValue(c2); 163 } 164 165 inline UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4) 166 { 167 return (convertHex(c1, c2) << 8) | convertHex(c3, c4); 168 } 169 170 } // namespace JSC 171 172 #endif // Lexer_h 173