1 /* 2 * Copyright (C) 2003 Lars Knoll (knoll (at) kde.org) 3 * Copyright (C) 2004, 2005, 2006, 2008, 2009, 2010 Apple Inc. All rights reserved. 4 * Copyright (C) 2008 Eric Seidel <eric (at) webkit.org> 5 * Copyright (C) 2009 - 2010 Torch Mobile (Beijing) Co. Ltd. All rights reserved. 6 * 7 * This library is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Library General Public 9 * License as published by the Free Software Foundation; either 10 * version 2 of the License, or (at your option) any later version. 11 * 12 * This library is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Library General Public License for more details. 16 * 17 * You should have received a copy of the GNU Library General Public License 18 * along with this library; see the file COPYING.LIB. If not, write to 19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 20 * Boston, MA 02110-1301, USA. 21 */ 22 23 #ifndef CSSTokenizer_h 24 #define CSSTokenizer_h 25 26 #include "wtf/Noncopyable.h" 27 #include "wtf/OwnPtr.h" 28 #include "wtf/text/WTFString.h" 29 30 namespace WebCore { 31 32 class BisonCSSParser; 33 struct CSSParserLocation; 34 struct CSSParserString; 35 36 class CSSTokenizer { 37 WTF_MAKE_NONCOPYABLE(CSSTokenizer); 38 public: 39 // FIXME: This should not be needed but there are still some ties between the 2 classes. 40 friend class BisonCSSParser; 41 42 CSSTokenizer(BisonCSSParser& parser) 43 : m_parser(parser) 44 , m_parsedTextPrefixLength(0) 45 , m_parsedTextSuffixLength(0) 46 , m_parsingMode(NormalMode) 47 , m_is8BitSource(false) 48 , m_length(0) 49 , m_token(0) 50 , m_lineNumber(0) 51 , m_tokenStartLineNumber(0) 52 , m_internal(true) 53 { 54 m_tokenStart.ptr8 = 0; 55 } 56 57 void setupTokenizer(const char* prefix, unsigned prefixLength, const String&, const char* suffix, unsigned suffixLength); 58 59 CSSParserLocation currentLocation(); 60 61 inline int lex(void* yylval) { return (this->*m_lexFunc)(yylval); } 62 63 inline unsigned safeUserStringTokenOffset() 64 { 65 return std::min(tokenStartOffset(), static_cast<unsigned>(m_length - 1 - m_parsedTextSuffixLength)) - m_parsedTextPrefixLength; 66 } 67 68 bool is8BitSource() const { return m_is8BitSource; } 69 70 // FIXME: These 2 functions should be private so that we don't need the definitions below. 71 template <typename CharacterType> 72 inline CharacterType* tokenStart(); 73 74 inline unsigned tokenStartOffset(); 75 76 private: 77 UChar* allocateStringBuffer16(size_t len); 78 79 template <typename CharacterType> 80 inline CharacterType*& currentCharacter(); 81 82 template <typename CharacterType> 83 inline CharacterType* dataStart(); 84 85 template <typename CharacterType> 86 inline void setTokenStart(CharacterType*); 87 88 template <typename CharacterType> 89 inline bool isIdentifierStart(); 90 91 template <typename CharacterType> 92 inline CSSParserLocation tokenLocation(); 93 94 template <typename CharacterType> 95 static unsigned parseEscape(CharacterType*&); 96 template <typename DestCharacterType> 97 static inline void UnicodeToChars(DestCharacterType*&, unsigned); 98 99 template <typename SrcCharacterType, typename DestCharacterType> 100 static inline bool parseIdentifierInternal(SrcCharacterType*&, DestCharacterType*&, bool&); 101 template <typename SrcCharacterType> 102 static size_t peekMaxIdentifierLen(SrcCharacterType*); 103 template <typename CharacterType> 104 inline void parseIdentifier(CharacterType*&, CSSParserString&, bool&); 105 106 template <typename SrcCharacterType> 107 static size_t peekMaxStringLen(SrcCharacterType*, UChar quote); 108 template <typename SrcCharacterType, typename DestCharacterType> 109 static inline bool parseStringInternal(SrcCharacterType*&, DestCharacterType*&, UChar); 110 template <typename CharacterType> 111 inline void parseString(CharacterType*&, CSSParserString& resultString, UChar); 112 113 template <typename CharacterType> 114 inline bool findURI(CharacterType*& start, CharacterType*& end, UChar& quote); 115 template <typename SrcCharacterType> 116 static size_t peekMaxURILen(SrcCharacterType*, UChar quote); 117 template <typename SrcCharacterType, typename DestCharacterType> 118 static inline bool parseURIInternal(SrcCharacterType*&, DestCharacterType*&, UChar quote); 119 template <typename CharacterType> 120 inline void parseURI(CSSParserString&); 121 122 template <typename CharacterType> 123 inline bool parseUnicodeRange(); 124 template <typename CharacterType> 125 bool parseNthChild(); 126 template <typename CharacterType> 127 bool parseNthChildExtra(); 128 template <typename CharacterType> 129 inline bool detectFunctionTypeToken(int); 130 template <typename CharacterType> 131 inline void detectMediaQueryToken(int); 132 template <typename CharacterType> 133 inline void detectNumberToken(CharacterType*, int); 134 template <typename CharacterType> 135 inline void detectDashToken(int); 136 template <typename CharacterType> 137 inline void detectAtToken(int, bool); 138 template <typename CharacterType> 139 inline void detectSupportsToken(int); 140 141 template <typename SourceCharacterType> 142 int realLex(void* yylval); 143 144 BisonCSSParser& m_parser; 145 146 size_t m_parsedTextPrefixLength; 147 size_t m_parsedTextSuffixLength; 148 149 enum ParsingMode { 150 NormalMode, 151 MediaQueryMode, 152 SupportsMode, 153 NthChildMode 154 }; 155 156 ParsingMode m_parsingMode; 157 bool m_is8BitSource; 158 OwnPtr<LChar[]> m_dataStart8; 159 OwnPtr<UChar[]> m_dataStart16; 160 LChar* m_currentCharacter8; 161 UChar* m_currentCharacter16; 162 163 // During parsing of an ASCII stylesheet we might locate escape 164 // sequences that expand into UTF-16 code points. Strings, 165 // identifiers and URIs containing such escape sequences are 166 // stored in m_cssStrings16 so that we don't have to store the 167 // whole stylesheet as UTF-16. 168 Vector<OwnPtr<UChar[]> > m_cssStrings16; 169 union { 170 LChar* ptr8; 171 UChar* ptr16; 172 } m_tokenStart; 173 unsigned m_length; 174 int m_token; 175 int m_lineNumber; 176 int m_tokenStartLineNumber; 177 178 // FIXME: This boolean is misnamed. Also it would be nice if we could consolidate it 179 // with the CSSParserMode logic to determine if internal properties are allowed. 180 bool m_internal; 181 182 int (CSSTokenizer::*m_lexFunc)(void*); 183 }; 184 185 inline unsigned CSSTokenizer::tokenStartOffset() 186 { 187 if (is8BitSource()) 188 return m_tokenStart.ptr8 - m_dataStart8.get(); 189 return m_tokenStart.ptr16 - m_dataStart16.get(); 190 } 191 192 template <> 193 inline LChar* CSSTokenizer::tokenStart<LChar>() 194 { 195 return m_tokenStart.ptr8; 196 } 197 198 template <> 199 inline UChar* CSSTokenizer::tokenStart<UChar>() 200 { 201 return m_tokenStart.ptr16; 202 } 203 204 } // namespace WebCore 205 206 #endif // CSSTokenizer_h 207