Home | History | Annotate | Download | only in css
      1 /*
      2  * Copyright (C) 2003 Lars Knoll (knoll (at) kde.org)
      3  * Copyright (C) 2004, 2005, 2006, 2008, 2009, 2010 Apple Inc. All rights reserved.
      4  * Copyright (C) 2008 Eric Seidel <eric (at) webkit.org>
      5  * Copyright (C) 2009 - 2010  Torch Mobile (Beijing) Co. Ltd. All rights reserved.
      6  *
      7  * This library is free software; you can redistribute it and/or
      8  * modify it under the terms of the GNU Library General Public
      9  * License as published by the Free Software Foundation; either
     10  * version 2 of the License, or (at your option) any later version.
     11  *
     12  * This library is distributed in the hope that it will be useful,
     13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15  * Library General Public License for more details.
     16  *
     17  * You should have received a copy of the GNU Library General Public License
     18  * along with this library; see the file COPYING.LIB.  If not, write to
     19  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     20  * Boston, MA 02110-1301, USA.
     21  */
     22 
     23 #ifndef CSSTokenizer_h
     24 #define CSSTokenizer_h
     25 
     26 #include "wtf/Noncopyable.h"
     27 #include "wtf/OwnPtr.h"
     28 #include "wtf/text/WTFString.h"
     29 
     30 namespace WebCore {
     31 
     32 class BisonCSSParser;
     33 struct CSSParserLocation;
     34 struct CSSParserString;
     35 
     36 class CSSTokenizer {
     37     WTF_MAKE_NONCOPYABLE(CSSTokenizer);
     38 public:
     39     // FIXME: This should not be needed but there are still some ties between the 2 classes.
     40     friend class BisonCSSParser;
     41 
     42     CSSTokenizer(BisonCSSParser& parser)
     43         : m_parser(parser)
     44         , m_parsedTextPrefixLength(0)
     45         , m_parsedTextSuffixLength(0)
     46         , m_parsingMode(NormalMode)
     47         , m_is8BitSource(false)
     48         , m_length(0)
     49         , m_token(0)
     50         , m_lineNumber(0)
     51         , m_tokenStartLineNumber(0)
     52         , m_internal(true)
     53     {
     54         m_tokenStart.ptr8 = 0;
     55     }
     56 
     57     void setupTokenizer(const char* prefix, unsigned prefixLength, const String&, const char* suffix, unsigned suffixLength);
     58 
     59     CSSParserLocation currentLocation();
     60 
     61     inline int lex(void* yylval) { return (this->*m_lexFunc)(yylval); }
     62 
     63     inline unsigned safeUserStringTokenOffset()
     64     {
     65         return std::min(tokenStartOffset(), static_cast<unsigned>(m_length - 1 - m_parsedTextSuffixLength)) - m_parsedTextPrefixLength;
     66     }
     67 
     68     bool is8BitSource() const { return m_is8BitSource; }
     69 
     70     // FIXME: These 2 functions should be private so that we don't need the definitions below.
     71     template <typename CharacterType>
     72     inline CharacterType* tokenStart();
     73 
     74     inline unsigned tokenStartOffset();
     75 
     76 private:
     77     UChar* allocateStringBuffer16(size_t len);
     78 
     79     template <typename CharacterType>
     80     inline CharacterType*& currentCharacter();
     81 
     82     template <typename CharacterType>
     83     inline CharacterType* dataStart();
     84 
     85     template <typename CharacterType>
     86     inline void setTokenStart(CharacterType*);
     87 
     88     template <typename CharacterType>
     89     inline bool isIdentifierStart();
     90 
     91     template <typename CharacterType>
     92     inline CSSParserLocation tokenLocation();
     93 
     94     template <typename CharacterType>
     95     static unsigned parseEscape(CharacterType*&);
     96     template <typename DestCharacterType>
     97     static inline void UnicodeToChars(DestCharacterType*&, unsigned);
     98 
     99     template <typename SrcCharacterType, typename DestCharacterType>
    100     static inline bool parseIdentifierInternal(SrcCharacterType*&, DestCharacterType*&, bool&);
    101     template <typename SrcCharacterType>
    102     static size_t peekMaxIdentifierLen(SrcCharacterType*);
    103     template <typename CharacterType>
    104     inline void parseIdentifier(CharacterType*&, CSSParserString&, bool&);
    105 
    106     template <typename SrcCharacterType>
    107     static size_t peekMaxStringLen(SrcCharacterType*, UChar quote);
    108     template <typename SrcCharacterType, typename DestCharacterType>
    109     static inline bool parseStringInternal(SrcCharacterType*&, DestCharacterType*&, UChar);
    110     template <typename CharacterType>
    111     inline void parseString(CharacterType*&, CSSParserString& resultString, UChar);
    112 
    113     template <typename CharacterType>
    114     inline bool findURI(CharacterType*& start, CharacterType*& end, UChar& quote);
    115     template <typename SrcCharacterType>
    116     static size_t peekMaxURILen(SrcCharacterType*, UChar quote);
    117     template <typename SrcCharacterType, typename DestCharacterType>
    118     static inline bool parseURIInternal(SrcCharacterType*&, DestCharacterType*&, UChar quote);
    119     template <typename CharacterType>
    120     inline void parseURI(CSSParserString&);
    121 
    122     template <typename CharacterType>
    123     inline bool parseUnicodeRange();
    124     template <typename CharacterType>
    125     bool parseNthChild();
    126     template <typename CharacterType>
    127     bool parseNthChildExtra();
    128     template <typename CharacterType>
    129     inline bool detectFunctionTypeToken(int);
    130     template <typename CharacterType>
    131     inline void detectMediaQueryToken(int);
    132     template <typename CharacterType>
    133     inline void detectNumberToken(CharacterType*, int);
    134     template <typename CharacterType>
    135     inline void detectDashToken(int);
    136     template <typename CharacterType>
    137     inline void detectAtToken(int, bool);
    138     template <typename CharacterType>
    139     inline void detectSupportsToken(int);
    140 
    141     template <typename SourceCharacterType>
    142     int realLex(void* yylval);
    143 
    144     BisonCSSParser& m_parser;
    145 
    146     size_t m_parsedTextPrefixLength;
    147     size_t m_parsedTextSuffixLength;
    148 
    149     enum ParsingMode {
    150         NormalMode,
    151         MediaQueryMode,
    152         SupportsMode,
    153         NthChildMode
    154     };
    155 
    156     ParsingMode m_parsingMode;
    157     bool m_is8BitSource;
    158     OwnPtr<LChar[]> m_dataStart8;
    159     OwnPtr<UChar[]> m_dataStart16;
    160     LChar* m_currentCharacter8;
    161     UChar* m_currentCharacter16;
    162 
    163     // During parsing of an ASCII stylesheet we might locate escape
    164     // sequences that expand into UTF-16 code points. Strings,
    165     // identifiers and URIs containing such escape sequences are
    166     // stored in m_cssStrings16 so that we don't have to store the
    167     // whole stylesheet as UTF-16.
    168     Vector<OwnPtr<UChar[]> > m_cssStrings16;
    169     union {
    170         LChar* ptr8;
    171         UChar* ptr16;
    172     } m_tokenStart;
    173     unsigned m_length;
    174     int m_token;
    175     int m_lineNumber;
    176     int m_tokenStartLineNumber;
    177 
    178     // FIXME: This boolean is misnamed. Also it would be nice if we could consolidate it
    179     // with the CSSParserMode logic to determine if internal properties are allowed.
    180     bool m_internal;
    181 
    182     int (CSSTokenizer::*m_lexFunc)(void*);
    183 };
    184 
    185 inline unsigned CSSTokenizer::tokenStartOffset()
    186 {
    187     if (is8BitSource())
    188         return m_tokenStart.ptr8 - m_dataStart8.get();
    189     return m_tokenStart.ptr16 - m_dataStart16.get();
    190 }
    191 
    192 template <>
    193 inline LChar* CSSTokenizer::tokenStart<LChar>()
    194 {
    195     return m_tokenStart.ptr8;
    196 }
    197 
    198 template <>
    199 inline UChar* CSSTokenizer::tokenStart<UChar>()
    200 {
    201     return m_tokenStart.ptr16;
    202 }
    203 
    204 } // namespace WebCore
    205 
    206 #endif // CSSTokenizer_h
    207