Home | History | Annotate | Download | only in css
      1 /*
      2  * Copyright (C) 2003 Lars Knoll (knoll (at) kde.org)
      3  * Copyright (C) 2005 Allan Sandfeld Jensen (kde (at) carewolf.com)
      4  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved.
      5  * Copyright (C) 2007 Nicholas Shanks <webkit (at) nickshanks.com>
      6  * Copyright (C) 2008 Eric Seidel <eric (at) webkit.org>
      7  * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
      8  * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved.
      9  * Copyright (C) 2012 Intel Corporation. All rights reserved.
     10  *
     11  * This library is free software; you can redistribute it and/or
     12  * modify it under the terms of the GNU Library General Public
     13  * License as published by the Free Software Foundation; either
     14  * version 2 of the License, or (at your option) any later version.
     15  *
     16  * This library is distributed in the hope that it will be useful,
     17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     19  * Library General Public License for more details.
     20  *
     21  * You should have received a copy of the GNU Library General Public License
     22  * along with this library; see the file COPYING.LIB.  If not, write to
     23  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     24  * Boston, MA 02110-1301, USA.
     25  */
     26 
     27 #include "config.h"
     28 #include "core/css/CSSTokenizer.h"
     29 
     30 #include "core/css/CSSKeyframeRule.h"
     31 #include "core/css/parser/BisonCSSParser.h"
     32 #include "core/css/CSSParserValues.h"
     33 #include "core/css/MediaQuery.h"
     34 #include "core/css/StyleRule.h"
     35 #include "core/html/parser/HTMLParserIdioms.h"
     36 #include "core/svg/SVGParserUtilities.h"
     37 
     38 namespace WebCore {
     39 
     40 #include "core/CSSGrammar.h"
     41 
     42 enum CharacterType {
     43     // Types for the main switch.
     44 
     45     // The first 4 types must be grouped together, as they
     46     // represent the allowed chars in an identifier.
     47     CharacterCaselessU,
     48     CharacterIdentifierStart,
     49     CharacterNumber,
     50     CharacterDash,
     51 
     52     CharacterOther,
     53     CharacterNull,
     54     CharacterWhiteSpace,
     55     CharacterEndMediaQueryOrSupports,
     56     CharacterEndNthChild,
     57     CharacterQuote,
     58     CharacterExclamationMark,
     59     CharacterHashmark,
     60     CharacterDollar,
     61     CharacterAsterisk,
     62     CharacterPlus,
     63     CharacterDot,
     64     CharacterSlash,
     65     CharacterLess,
     66     CharacterAt,
     67     CharacterBackSlash,
     68     CharacterXor,
     69     CharacterVerticalBar,
     70     CharacterTilde,
     71 };
     72 
     73 // 128 ASCII codes
     74 static const CharacterType typesOfASCIICharacters[128] = {
     75 /*   0 - Null               */ CharacterNull,
     76 /*   1 - Start of Heading   */ CharacterOther,
     77 /*   2 - Start of Text      */ CharacterOther,
     78 /*   3 - End of Text        */ CharacterOther,
     79 /*   4 - End of Transm.     */ CharacterOther,
     80 /*   5 - Enquiry            */ CharacterOther,
     81 /*   6 - Acknowledgment     */ CharacterOther,
     82 /*   7 - Bell               */ CharacterOther,
     83 /*   8 - Back Space         */ CharacterOther,
     84 /*   9 - Horizontal Tab     */ CharacterWhiteSpace,
     85 /*  10 - Line Feed          */ CharacterWhiteSpace,
     86 /*  11 - Vertical Tab       */ CharacterOther,
     87 /*  12 - Form Feed          */ CharacterWhiteSpace,
     88 /*  13 - Carriage Return    */ CharacterWhiteSpace,
     89 /*  14 - Shift Out          */ CharacterOther,
     90 /*  15 - Shift In           */ CharacterOther,
     91 /*  16 - Data Line Escape   */ CharacterOther,
     92 /*  17 - Device Control 1   */ CharacterOther,
     93 /*  18 - Device Control 2   */ CharacterOther,
     94 /*  19 - Device Control 3   */ CharacterOther,
     95 /*  20 - Device Control 4   */ CharacterOther,
     96 /*  21 - Negative Ack.      */ CharacterOther,
     97 /*  22 - Synchronous Idle   */ CharacterOther,
     98 /*  23 - End of Transmit    */ CharacterOther,
     99 /*  24 - Cancel             */ CharacterOther,
    100 /*  25 - End of Medium      */ CharacterOther,
    101 /*  26 - Substitute         */ CharacterOther,
    102 /*  27 - Escape             */ CharacterOther,
    103 /*  28 - File Separator     */ CharacterOther,
    104 /*  29 - Group Separator    */ CharacterOther,
    105 /*  30 - Record Separator   */ CharacterOther,
    106 /*  31 - Unit Separator     */ CharacterOther,
    107 /*  32 - Space              */ CharacterWhiteSpace,
    108 /*  33 - !                  */ CharacterExclamationMark,
    109 /*  34 - "                  */ CharacterQuote,
    110 /*  35 - #                  */ CharacterHashmark,
    111 /*  36 - $                  */ CharacterDollar,
    112 /*  37 - %                  */ CharacterOther,
    113 /*  38 - &                  */ CharacterOther,
    114 /*  39 - '                  */ CharacterQuote,
    115 /*  40 - (                  */ CharacterOther,
    116 /*  41 - )                  */ CharacterEndNthChild,
    117 /*  42 - *                  */ CharacterAsterisk,
    118 /*  43 - +                  */ CharacterPlus,
    119 /*  44 - ,                  */ CharacterOther,
    120 /*  45 - -                  */ CharacterDash,
    121 /*  46 - .                  */ CharacterDot,
    122 /*  47 - /                  */ CharacterSlash,
    123 /*  48 - 0                  */ CharacterNumber,
    124 /*  49 - 1                  */ CharacterNumber,
    125 /*  50 - 2                  */ CharacterNumber,
    126 /*  51 - 3                  */ CharacterNumber,
    127 /*  52 - 4                  */ CharacterNumber,
    128 /*  53 - 5                  */ CharacterNumber,
    129 /*  54 - 6                  */ CharacterNumber,
    130 /*  55 - 7                  */ CharacterNumber,
    131 /*  56 - 8                  */ CharacterNumber,
    132 /*  57 - 9                  */ CharacterNumber,
    133 /*  58 - :                  */ CharacterOther,
    134 /*  59 - ;                  */ CharacterEndMediaQueryOrSupports,
    135 /*  60 - <                  */ CharacterLess,
    136 /*  61 - =                  */ CharacterOther,
    137 /*  62 - >                  */ CharacterOther,
    138 /*  63 - ?                  */ CharacterOther,
    139 /*  64 - @                  */ CharacterAt,
    140 /*  65 - A                  */ CharacterIdentifierStart,
    141 /*  66 - B                  */ CharacterIdentifierStart,
    142 /*  67 - C                  */ CharacterIdentifierStart,
    143 /*  68 - D                  */ CharacterIdentifierStart,
    144 /*  69 - E                  */ CharacterIdentifierStart,
    145 /*  70 - F                  */ CharacterIdentifierStart,
    146 /*  71 - G                  */ CharacterIdentifierStart,
    147 /*  72 - H                  */ CharacterIdentifierStart,
    148 /*  73 - I                  */ CharacterIdentifierStart,
    149 /*  74 - J                  */ CharacterIdentifierStart,
    150 /*  75 - K                  */ CharacterIdentifierStart,
    151 /*  76 - L                  */ CharacterIdentifierStart,
    152 /*  77 - M                  */ CharacterIdentifierStart,
    153 /*  78 - N                  */ CharacterIdentifierStart,
    154 /*  79 - O                  */ CharacterIdentifierStart,
    155 /*  80 - P                  */ CharacterIdentifierStart,
    156 /*  81 - Q                  */ CharacterIdentifierStart,
    157 /*  82 - R                  */ CharacterIdentifierStart,
    158 /*  83 - S                  */ CharacterIdentifierStart,
    159 /*  84 - T                  */ CharacterIdentifierStart,
    160 /*  85 - U                  */ CharacterCaselessU,
    161 /*  86 - V                  */ CharacterIdentifierStart,
    162 /*  87 - W                  */ CharacterIdentifierStart,
    163 /*  88 - X                  */ CharacterIdentifierStart,
    164 /*  89 - Y                  */ CharacterIdentifierStart,
    165 /*  90 - Z                  */ CharacterIdentifierStart,
    166 /*  91 - [                  */ CharacterOther,
    167 /*  92 - \                  */ CharacterBackSlash,
    168 /*  93 - ]                  */ CharacterOther,
    169 /*  94 - ^                  */ CharacterXor,
    170 /*  95 - _                  */ CharacterIdentifierStart,
    171 /*  96 - `                  */ CharacterOther,
    172 /*  97 - a                  */ CharacterIdentifierStart,
    173 /*  98 - b                  */ CharacterIdentifierStart,
    174 /*  99 - c                  */ CharacterIdentifierStart,
    175 /* 100 - d                  */ CharacterIdentifierStart,
    176 /* 101 - e                  */ CharacterIdentifierStart,
    177 /* 102 - f                  */ CharacterIdentifierStart,
    178 /* 103 - g                  */ CharacterIdentifierStart,
    179 /* 104 - h                  */ CharacterIdentifierStart,
    180 /* 105 - i                  */ CharacterIdentifierStart,
    181 /* 106 - j                  */ CharacterIdentifierStart,
    182 /* 107 - k                  */ CharacterIdentifierStart,
    183 /* 108 - l                  */ CharacterIdentifierStart,
    184 /* 109 - m                  */ CharacterIdentifierStart,
    185 /* 110 - n                  */ CharacterIdentifierStart,
    186 /* 111 - o                  */ CharacterIdentifierStart,
    187 /* 112 - p                  */ CharacterIdentifierStart,
    188 /* 113 - q                  */ CharacterIdentifierStart,
    189 /* 114 - r                  */ CharacterIdentifierStart,
    190 /* 115 - s                  */ CharacterIdentifierStart,
    191 /* 116 - t                  */ CharacterIdentifierStart,
    192 /* 117 - u                  */ CharacterCaselessU,
    193 /* 118 - v                  */ CharacterIdentifierStart,
    194 /* 119 - w                  */ CharacterIdentifierStart,
    195 /* 120 - x                  */ CharacterIdentifierStart,
    196 /* 121 - y                  */ CharacterIdentifierStart,
    197 /* 122 - z                  */ CharacterIdentifierStart,
    198 /* 123 - {                  */ CharacterEndMediaQueryOrSupports,
    199 /* 124 - |                  */ CharacterVerticalBar,
    200 /* 125 - }                  */ CharacterOther,
    201 /* 126 - ~                  */ CharacterTilde,
    202 /* 127 - Delete             */ CharacterOther,
    203 };
    204 
    205 // Utility functions for the CSS tokenizer.
    206 
    207 template <typename CharacterType>
    208 static inline bool isCSSLetter(CharacterType character)
    209 {
    210     return character >= 128 || typesOfASCIICharacters[character] <= CharacterDash;
    211 }
    212 
    213 template <typename CharacterType>
    214 static inline bool isCSSEscape(CharacterType character)
    215 {
    216     return character >= ' ' && character != 127;
    217 }
    218 
    219 template <typename CharacterType>
    220 static inline bool isURILetter(CharacterType character)
    221 {
    222     return (character >= '*' && character != 127) || (character >= '#' && character <= '&') || character == '!';
    223 }
    224 
    225 template <typename CharacterType>
    226 static inline bool isIdentifierStartAfterDash(CharacterType* currentCharacter)
    227 {
    228     return isASCIIAlpha(currentCharacter[0]) || currentCharacter[0] == '_' || currentCharacter[0] >= 128
    229         || (currentCharacter[0] == '\\' && isCSSEscape(currentCharacter[1]));
    230 }
    231 
    232 template <typename CharacterType>
    233 static inline bool isEqualToCSSIdentifier(CharacterType* cssString, const char* constantString)
    234 {
    235     // Compare an character memory data with a zero terminated string.
    236     do {
    237         // The input must be part of an identifier if constantChar or constString
    238         // contains '-'. Otherwise toASCIILowerUnchecked('\r') would be equal to '-'.
    239         ASSERT((*constantString >= 'a' && *constantString <= 'z') || *constantString == '-');
    240         ASSERT(*constantString != '-' || isCSSLetter(*cssString));
    241         if (toASCIILowerUnchecked(*cssString++) != (*constantString++))
    242             return false;
    243     } while (*constantString);
    244     return true;
    245 }
    246 
    247 template <typename CharacterType>
    248 static inline bool isEqualToCSSCaseSensitiveIdentifier(CharacterType* string, const char* constantString)
    249 {
    250     ASSERT(*constantString);
    251 
    252     do {
    253         if (*string++ != *constantString++)
    254             return false;
    255     } while (*constantString);
    256     return true;
    257 }
    258 
    259 template <typename CharacterType>
    260 static CharacterType* checkAndSkipEscape(CharacterType* currentCharacter)
    261 {
    262     // Returns with 0, if escape check is failed. Otherwise
    263     // it returns with the following character.
    264     ASSERT(*currentCharacter == '\\');
    265 
    266     ++currentCharacter;
    267     if (!isCSSEscape(*currentCharacter))
    268         return 0;
    269 
    270     if (isASCIIHexDigit(*currentCharacter)) {
    271         int length = 6;
    272 
    273         do {
    274             ++currentCharacter;
    275         } while (isASCIIHexDigit(*currentCharacter) && --length);
    276 
    277         // Optional space after the escape sequence.
    278         if (isHTMLSpace<CharacterType>(*currentCharacter))
    279             ++currentCharacter;
    280         return currentCharacter;
    281     }
    282     return currentCharacter + 1;
    283 }
    284 
    285 template <typename CharacterType>
    286 static inline CharacterType* skipWhiteSpace(CharacterType* currentCharacter)
    287 {
    288     while (isHTMLSpace<CharacterType>(*currentCharacter))
    289         ++currentCharacter;
    290     return currentCharacter;
    291 }
    292 
    293 // Main CSS tokenizer functions.
    294 
    295 template <>
    296 inline LChar*& CSSTokenizer::currentCharacter<LChar>()
    297 {
    298     return m_currentCharacter8;
    299 }
    300 
    301 template <>
    302 inline UChar*& CSSTokenizer::currentCharacter<UChar>()
    303 {
    304     return m_currentCharacter16;
    305 }
    306 
    307 UChar* CSSTokenizer::allocateStringBuffer16(size_t len)
    308 {
    309     // Allocates and returns a CSSTokenizer owned buffer for storing
    310     // UTF-16 data. Used to get a suitable life span for UTF-16
    311     // strings, identifiers and URIs created by the tokenizer.
    312     OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]);
    313 
    314     UChar* bufferPtr = buffer.get();
    315 
    316     m_cssStrings16.append(buffer.release());
    317     return bufferPtr;
    318 }
    319 
    320 template <>
    321 inline LChar* CSSTokenizer::dataStart<LChar>()
    322 {
    323     return m_dataStart8.get();
    324 }
    325 
    326 template <>
    327 inline UChar* CSSTokenizer::dataStart<UChar>()
    328 {
    329     return m_dataStart16.get();
    330 }
    331 
    332 template <typename CharacterType>
    333 inline CSSParserLocation CSSTokenizer::tokenLocation()
    334 {
    335     CSSParserLocation location;
    336     location.token.init(tokenStart<CharacterType>(), currentCharacter<CharacterType>() - tokenStart<CharacterType>());
    337     location.lineNumber = m_tokenStartLineNumber;
    338     location.offset = tokenStart<CharacterType>() - dataStart<CharacterType>();
    339     return location;
    340 }
    341 
    342 CSSParserLocation CSSTokenizer::currentLocation()
    343 {
    344     if (is8BitSource())
    345         return tokenLocation<LChar>();
    346     return tokenLocation<UChar>();
    347 }
    348 
    349 template <typename CharacterType>
    350 inline bool CSSTokenizer::isIdentifierStart()
    351 {
    352     // Check whether an identifier is started.
    353     return isIdentifierStartAfterDash((*currentCharacter<CharacterType>() != '-') ? currentCharacter<CharacterType>() : currentCharacter<CharacterType>() + 1);
    354 }
    355 
    356 enum CheckStringValidationMode {
    357     AbortIfInvalid,
    358     SkipInvalid
    359 };
    360 
    361 template <typename CharacterType>
    362 static inline CharacterType* checkAndSkipString(CharacterType* currentCharacter, int quote, CheckStringValidationMode mode)
    363 {
    364     // If mode is AbortIfInvalid and the string check fails it returns
    365     // with 0. Otherwise it returns with a pointer to the first
    366     // character after the string.
    367     while (true) {
    368         if (UNLIKELY(*currentCharacter == quote)) {
    369             // String parsing is successful.
    370             return currentCharacter + 1;
    371         }
    372         if (UNLIKELY(!*currentCharacter)) {
    373             // String parsing is successful up to end of input.
    374             return currentCharacter;
    375         }
    376         if (mode == AbortIfInvalid && UNLIKELY(*currentCharacter <= '\r' && (*currentCharacter == '\n' || (*currentCharacter | 0x1) == '\r'))) {
    377             // String parsing is failed for character '\n', '\f' or '\r'.
    378             return 0;
    379         }
    380 
    381         if (LIKELY(currentCharacter[0] != '\\')) {
    382             ++currentCharacter;
    383         } else if (currentCharacter[1] == '\n' || currentCharacter[1] == '\f') {
    384             currentCharacter += 2;
    385         } else if (currentCharacter[1] == '\r') {
    386             currentCharacter += currentCharacter[2] == '\n' ? 3 : 2;
    387         } else {
    388             CharacterType* next = checkAndSkipEscape(currentCharacter);
    389             if (!next) {
    390                 if (mode == AbortIfInvalid)
    391                     return 0;
    392                 next = currentCharacter + 1;
    393             }
    394             currentCharacter = next;
    395         }
    396     }
    397 }
    398 
    399 template <typename CharacterType>
    400 unsigned CSSTokenizer::parseEscape(CharacterType*& src)
    401 {
    402     ASSERT(*src == '\\' && isCSSEscape(src[1]));
    403 
    404     unsigned unicode = 0;
    405 
    406     ++src;
    407     if (isASCIIHexDigit(*src)) {
    408 
    409         int length = 6;
    410 
    411         do {
    412             unicode = (unicode << 4) + toASCIIHexValue(*src++);
    413         } while (--length && isASCIIHexDigit(*src));
    414 
    415         // Characters above 0x10ffff are not handled.
    416         if (unicode > 0x10ffff)
    417             unicode = 0xfffd;
    418 
    419         // Optional space after the escape sequence.
    420         if (isHTMLSpace<CharacterType>(*src))
    421             ++src;
    422 
    423         return unicode;
    424     }
    425 
    426     return *src++;
    427 }
    428 
    429 template <>
    430 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode)
    431 {
    432     ASSERT(unicode <= 0xff);
    433     *result = unicode;
    434 
    435     ++result;
    436 }
    437 
    438 template <>
    439 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode)
    440 {
    441     // Replace unicode with a surrogate pairs when it is bigger than 0xffff
    442     if (U16_LENGTH(unicode) == 2) {
    443         *result++ = U16_LEAD(unicode);
    444         *result = U16_TRAIL(unicode);
    445     } else {
    446         *result = unicode;
    447     }
    448 
    449     ++result;
    450 }
    451 
    452 template <typename SrcCharacterType>
    453 size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src)
    454 {
    455     // The decoded form of an identifier (after resolving escape
    456     // sequences) will not contain more characters (ASCII or UTF-16
    457     // codepoints) than the input. This code can therefore ignore
    458     // escape sequences completely.
    459     SrcCharacterType* start = src;
    460     do {
    461         if (LIKELY(*src != '\\'))
    462             src++;
    463         else
    464             parseEscape<SrcCharacterType>(src);
    465     } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1])));
    466 
    467     return src - start;
    468 }
    469 
    470 template <typename SrcCharacterType, typename DestCharacterType>
    471 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCharacterType*& result, bool& hasEscape)
    472 {
    473     hasEscape = false;
    474     do {
    475         if (LIKELY(*src != '\\')) {
    476             *result++ = *src++;
    477         } else {
    478             hasEscape = true;
    479             SrcCharacterType* savedEscapeStart = src;
    480             unsigned unicode = parseEscape<SrcCharacterType>(src);
    481             if (unicode > 0xff && sizeof(DestCharacterType) == 1) {
    482                 src = savedEscapeStart;
    483                 return false;
    484             }
    485             UnicodeToChars(result, unicode);
    486         }
    487     } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1])));
    488 
    489     return true;
    490 }
    491 
    492 template <typename CharacterType>
    493 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserString& resultString, bool& hasEscape)
    494 {
    495     // If a valid identifier start is found, we can safely
    496     // parse the identifier until the next invalid character.
    497     ASSERT(isIdentifierStart<CharacterType>());
    498 
    499     CharacterType* start = currentCharacter<CharacterType>();
    500     if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), result, hasEscape))) {
    501         // Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue
    502         ASSERT(is8BitSource());
    503         UChar* result16 = allocateStringBuffer16((result - start) + peekMaxIdentifierLen(currentCharacter<CharacterType>()));
    504         UChar* start16 = result16;
    505         int i = 0;
    506         for (; i < result - start; i++)
    507             result16[i] = start[i];
    508 
    509         result16 += i;
    510 
    511         parseIdentifierInternal(currentCharacter<CharacterType>(), result16, hasEscape);
    512 
    513         resultString.init(start16, result16 - start16);
    514 
    515         return;
    516     }
    517 
    518     resultString.init(start, result - start);
    519 }
    520 
    521 template <typename SrcCharacterType>
    522 size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote)
    523 {
    524     // The decoded form of a CSS string (after resolving escape
    525     // sequences) will not contain more characters (ASCII or UTF-16
    526     // codepoints) than the input. This code can therefore ignore
    527     // escape sequences completely and just return the length of the
    528     // input string (possibly including terminating quote if any).
    529     SrcCharacterType* end = checkAndSkipString(src, quote, SkipInvalid);
    530     return end ? end - src : 0;
    531 }
    532 
    533 template <typename SrcCharacterType, typename DestCharacterType>
    534 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharacterType*& result, UChar quote)
    535 {
    536     while (true) {
    537         if (UNLIKELY(*src == quote)) {
    538             // String parsing is done.
    539             ++src;
    540             return true;
    541         }
    542         if (UNLIKELY(!*src)) {
    543             // String parsing is done, but don't advance pointer if at the end of input.
    544             return true;
    545         }
    546         if (LIKELY(src[0] != '\\')) {
    547             *result++ = *src++;
    548         } else if (src[1] == '\n' || src[1] == '\f') {
    549             src += 2;
    550         } else if (src[1] == '\r') {
    551             src += src[2] == '\n' ? 3 : 2;
    552         } else {
    553             SrcCharacterType* savedEscapeStart = src;
    554             unsigned unicode = parseEscape<SrcCharacterType>(src);
    555             if (unicode > 0xff && sizeof(DestCharacterType) == 1) {
    556                 src = savedEscapeStart;
    557                 return false;
    558             }
    559             UnicodeToChars(result, unicode);
    560         }
    561     }
    562 
    563     return true;
    564 }
    565 
    566 template <typename CharacterType>
    567 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& resultString, UChar quote)
    568 {
    569     CharacterType* start = currentCharacter<CharacterType>();
    570 
    571     if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) {
    572         // Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue
    573         ASSERT(is8BitSource());
    574         UChar* result16 = allocateStringBuffer16((result - start) + peekMaxStringLen(currentCharacter<CharacterType>(), quote));
    575         UChar* start16 = result16;
    576         int i = 0;
    577         for (; i < result - start; i++)
    578             result16[i] = start[i];
    579 
    580         result16 += i;
    581 
    582         parseStringInternal(currentCharacter<CharacterType>(), result16, quote);
    583 
    584         resultString.init(start16, result16 - start16);
    585         return;
    586     }
    587 
    588     resultString.init(start, result - start);
    589 }
    590 
    591 template <typename CharacterType>
    592 inline bool CSSTokenizer::findURI(CharacterType*& start, CharacterType*& end, UChar& quote)
    593 {
    594     start = skipWhiteSpace(currentCharacter<CharacterType>());
    595 
    596     if (*start == '"' || *start == '\'') {
    597         quote = *start++;
    598         end = checkAndSkipString(start, quote, AbortIfInvalid);
    599         if (!end)
    600             return false;
    601     } else {
    602         quote = 0;
    603         end = start;
    604         while (isURILetter(*end)) {
    605             if (LIKELY(*end != '\\')) {
    606                 ++end;
    607             } else {
    608                 end = checkAndSkipEscape(end);
    609                 if (!end)
    610                     return false;
    611             }
    612         }
    613     }
    614 
    615     end = skipWhiteSpace(end);
    616     if (*end != ')')
    617         return false;
    618 
    619     return true;
    620 }
    621 
    622 template <typename SrcCharacterType>
    623 inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote)
    624 {
    625     // The decoded form of a URI (after resolving escape sequences)
    626     // will not contain more characters (ASCII or UTF-16 codepoints)
    627     // than the input. This code can therefore ignore escape sequences
    628     // completely.
    629     SrcCharacterType* start = src;
    630     if (quote) {
    631         ASSERT(quote == '"' || quote == '\'');
    632         return peekMaxStringLen(src, quote);
    633     }
    634 
    635     while (isURILetter(*src)) {
    636         if (LIKELY(*src != '\\'))
    637             src++;
    638         else
    639             parseEscape<SrcCharacterType>(src);
    640     }
    641 
    642     return src - start;
    643 }
    644 
    645 template <typename SrcCharacterType, typename DestCharacterType>
    646 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacterType*& dest, UChar quote)
    647 {
    648     if (quote) {
    649         ASSERT(quote == '"' || quote == '\'');
    650         return parseStringInternal(src, dest, quote);
    651     }
    652 
    653     while (isURILetter(*src)) {
    654         if (LIKELY(*src != '\\')) {
    655             *dest++ = *src++;
    656         } else {
    657             unsigned unicode = parseEscape<SrcCharacterType>(src);
    658             if (unicode > 0xff && sizeof(DestCharacterType) == 1)
    659                 return false;
    660             UnicodeToChars(dest, unicode);
    661         }
    662     }
    663 
    664     return true;
    665 }
    666 
    667 template <typename CharacterType>
    668 inline void CSSTokenizer::parseURI(CSSParserString& string)
    669 {
    670     CharacterType* uriStart;
    671     CharacterType* uriEnd;
    672     UChar quote;
    673     if (!findURI(uriStart, uriEnd, quote))
    674         return;
    675 
    676     CharacterType* dest = currentCharacter<CharacterType>() = uriStart;
    677     if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote))) {
    678         string.init(uriStart, dest - uriStart);
    679     } else {
    680         // An escape sequence was encountered that can't be stored in 8 bits.
    681         // Reset the current character to the start of the URI and re-parse with
    682         // a 16-bit destination.
    683         ASSERT(is8BitSource());
    684         currentCharacter<CharacterType>() = uriStart;
    685         UChar* result16 = allocateStringBuffer16(peekMaxURILen(currentCharacter<CharacterType>(), quote));
    686         UChar* uriStart16 = result16;
    687         bool result = parseURIInternal(currentCharacter<CharacterType>(), result16, quote);
    688         ASSERT_UNUSED(result, result);
    689         string.init(uriStart16, result16 - uriStart16);
    690     }
    691 
    692     currentCharacter<CharacterType>() = uriEnd + 1;
    693     m_token = URI;
    694 }
    695 
    696 template <typename CharacterType>
    697 inline bool CSSTokenizer::parseUnicodeRange()
    698 {
    699     CharacterType* character = currentCharacter<CharacterType>() + 1;
    700     int length = 6;
    701     ASSERT(*currentCharacter<CharacterType>() == '+');
    702 
    703     while (isASCIIHexDigit(*character) && length) {
    704         ++character;
    705         --length;
    706     }
    707 
    708     if (length && *character == '?') {
    709         // At most 5 hex digit followed by a question mark.
    710         do {
    711             ++character;
    712             --length;
    713         } while (*character == '?' && length);
    714         currentCharacter<CharacterType>() = character;
    715         return true;
    716     }
    717 
    718     if (length < 6) {
    719         // At least one hex digit.
    720         if (character[0] == '-' && isASCIIHexDigit(character[1])) {
    721             // Followed by a dash and a hex digit.
    722             ++character;
    723             length = 6;
    724             do {
    725                 ++character;
    726             } while (--length && isASCIIHexDigit(*character));
    727         }
    728         currentCharacter<CharacterType>() = character;
    729         return true;
    730     }
    731     return false;
    732 }
    733 
    734 template <typename CharacterType>
    735 bool CSSTokenizer::parseNthChild()
    736 {
    737     CharacterType* character = currentCharacter<CharacterType>();
    738 
    739     while (isASCIIDigit(*character))
    740         ++character;
    741     if (isASCIIAlphaCaselessEqual(*character, 'n')) {
    742         currentCharacter<CharacterType>() = character + 1;
    743         return true;
    744     }
    745     return false;
    746 }
    747 
    748 template <typename CharacterType>
    749 bool CSSTokenizer::parseNthChildExtra()
    750 {
    751     CharacterType* character = skipWhiteSpace(currentCharacter<CharacterType>());
    752     if (*character != '+' && *character != '-')
    753         return false;
    754 
    755     character = skipWhiteSpace(character + 1);
    756     if (!isASCIIDigit(*character))
    757         return false;
    758 
    759     do {
    760         ++character;
    761     } while (isASCIIDigit(*character));
    762 
    763     currentCharacter<CharacterType>() = character;
    764     return true;
    765 }
    766 
    767 template <typename CharacterType>
    768 inline bool CSSTokenizer::detectFunctionTypeToken(int length)
    769 {
    770     ASSERT(length > 0);
    771     CharacterType* name = tokenStart<CharacterType>();
    772     SWITCH(name, length) {
    773         CASE("not") {
    774             m_token = NOTFUNCTION;
    775             return true;
    776         }
    777         CASE("url") {
    778             m_token = URI;
    779             return true;
    780         }
    781         CASE("cue") {
    782             m_token = CUEFUNCTION;
    783             return true;
    784         }
    785         CASE("calc") {
    786             m_token = CALCFUNCTION;
    787             return true;
    788         }
    789         CASE("host") {
    790             m_token = HOSTFUNCTION;
    791             return true;
    792         }
    793         CASE("host-context") {
    794             m_token = HOSTCONTEXTFUNCTION;
    795             return true;
    796         }
    797         CASE("nth-child") {
    798             m_parsingMode = NthChildMode;
    799             return true;
    800         }
    801         CASE("nth-of-type") {
    802             m_parsingMode = NthChildMode;
    803             return true;
    804         }
    805         CASE("nth-last-child") {
    806             m_parsingMode = NthChildMode;
    807             return true;
    808         }
    809         CASE("nth-last-of-type") {
    810             m_parsingMode = NthChildMode;
    811             return true;
    812         }
    813     }
    814     return false;
    815 }
    816 
    817 template <typename CharacterType>
    818 inline void CSSTokenizer::detectMediaQueryToken(int length)
    819 {
    820     ASSERT(m_parsingMode == MediaQueryMode);
    821     CharacterType* name = tokenStart<CharacterType>();
    822 
    823     SWITCH(name, length) {
    824         CASE("and") {
    825             m_token = MEDIA_AND;
    826         }
    827         CASE("not") {
    828             m_token = MEDIA_NOT;
    829         }
    830         CASE("only") {
    831             m_token = MEDIA_ONLY;
    832         }
    833         CASE("or") {
    834             m_token = MEDIA_OR;
    835         }
    836     }
    837 }
    838 
    839 template <typename CharacterType>
    840 inline void CSSTokenizer::detectNumberToken(CharacterType* type, int length)
    841 {
    842     ASSERT(length > 0);
    843 
    844     SWITCH(type, length) {
    845         CASE("cm") {
    846             m_token = CMS;
    847         }
    848         CASE("ch") {
    849             m_token = CHS;
    850         }
    851         CASE("deg") {
    852             m_token = DEGS;
    853         }
    854         CASE("dppx") {
    855             // There is a discussion about the name of this unit on www-style.
    856             // Keep this compile time guard in place until that is resolved.
    857             // http://lists.w3.org/Archives/Public/www-style/2012May/0915.html
    858             m_token = DPPX;
    859         }
    860         CASE("dpcm") {
    861             m_token = DPCM;
    862         }
    863         CASE("dpi") {
    864             m_token = DPI;
    865         }
    866         CASE("em") {
    867             m_token = EMS;
    868         }
    869         CASE("ex") {
    870             m_token = EXS;
    871         }
    872         CASE("fr") {
    873             m_token = FR;
    874         }
    875         CASE("grad") {
    876             m_token = GRADS;
    877         }
    878         CASE("hz") {
    879             m_token = HERTZ;
    880         }
    881         CASE("in") {
    882             m_token = INS;
    883         }
    884         CASE("khz") {
    885             m_token = KHERTZ;
    886         }
    887         CASE("mm") {
    888             m_token = MMS;
    889         }
    890         CASE("ms") {
    891             m_token = MSECS;
    892         }
    893         CASE("px") {
    894             m_token = PXS;
    895         }
    896         CASE("pt") {
    897             m_token = PTS;
    898         }
    899         CASE("pc") {
    900             m_token = PCS;
    901         }
    902         CASE("rad") {
    903             m_token = RADS;
    904         }
    905         CASE("rem") {
    906             m_token = REMS;
    907         }
    908         CASE("s") {
    909             m_token = SECS;
    910         }
    911         CASE("turn") {
    912             m_token = TURNS;
    913         }
    914         CASE("vw") {
    915             m_token = VW;
    916         }
    917         CASE("vh") {
    918             m_token = VH;
    919         }
    920         CASE("vmin") {
    921             m_token = VMIN;
    922         }
    923         CASE("vmax") {
    924             m_token = VMAX;
    925         }
    926         CASE("__qem") {
    927             m_token = QEMS;
    928         }
    929     }
    930 }
    931 
    932 template <typename CharacterType>
    933 inline void CSSTokenizer::detectDashToken(int length)
    934 {
    935     CharacterType* name = tokenStart<CharacterType>();
    936 
    937     // Ignore leading dash.
    938     ++name;
    939     --length;
    940 
    941     SWITCH(name, length) {
    942         CASE("webkit-any") {
    943             m_token = ANYFUNCTION;
    944         }
    945         CASE("webkit-calc") {
    946             m_token = CALCFUNCTION;
    947         }
    948     }
    949 }
    950 
    951 template <typename CharacterType>
    952 inline void CSSTokenizer::detectAtToken(int length, bool hasEscape)
    953 {
    954     CharacterType* name = tokenStart<CharacterType>();
    955     ASSERT(name[0] == '@' && length >= 2);
    956 
    957     // Ignore leading @.
    958     ++name;
    959     --length;
    960 
    961     // charset, font-face, import, media, namespace, page, supports,
    962     // -webkit-keyframes, keyframes, and -webkit-mediaquery are not affected by hasEscape.
    963     SWITCH(name, length) {
    964         CASE("bottom-left") {
    965             if (LIKELY(!hasEscape))
    966                 m_token = BOTTOMLEFT_SYM;
    967         }
    968         CASE("bottom-right") {
    969             if (LIKELY(!hasEscape))
    970                 m_token = BOTTOMRIGHT_SYM;
    971         }
    972         CASE("bottom-center") {
    973             if (LIKELY(!hasEscape))
    974                 m_token = BOTTOMCENTER_SYM;
    975         }
    976         CASE("bottom-left-corner") {
    977             if (LIKELY(!hasEscape))
    978                 m_token = BOTTOMLEFTCORNER_SYM;
    979         }
    980         CASE("bottom-right-corner") {
    981             if (LIKELY(!hasEscape))
    982                 m_token = BOTTOMRIGHTCORNER_SYM;
    983         }
    984         CASE("charset") {
    985             if (name - 1 == dataStart<CharacterType>())
    986                 m_token = CHARSET_SYM;
    987         }
    988         CASE("font-face") {
    989             m_token = FONT_FACE_SYM;
    990         }
    991         CASE("import") {
    992             m_parsingMode = MediaQueryMode;
    993             m_token = IMPORT_SYM;
    994         }
    995         CASE("keyframes") {
    996             if (RuntimeEnabledFeatures::cssAnimationUnprefixedEnabled())
    997                 m_token = KEYFRAMES_SYM;
    998         }
    999         CASE("left-top") {
   1000             if (LIKELY(!hasEscape))
   1001                 m_token = LEFTTOP_SYM;
   1002         }
   1003         CASE("left-middle") {
   1004             if (LIKELY(!hasEscape))
   1005                 m_token = LEFTMIDDLE_SYM;
   1006         }
   1007         CASE("left-bottom") {
   1008             if (LIKELY(!hasEscape))
   1009                 m_token = LEFTBOTTOM_SYM;
   1010         }
   1011         CASE("media") {
   1012             m_parsingMode = MediaQueryMode;
   1013             m_token = MEDIA_SYM;
   1014         }
   1015         CASE("namespace") {
   1016             m_token = NAMESPACE_SYM;
   1017         }
   1018         CASE("page") {
   1019             m_token = PAGE_SYM;
   1020         }
   1021         CASE("right-top") {
   1022             if (LIKELY(!hasEscape))
   1023                 m_token = RIGHTTOP_SYM;
   1024         }
   1025         CASE("right-middle") {
   1026             if (LIKELY(!hasEscape))
   1027                 m_token = RIGHTMIDDLE_SYM;
   1028         }
   1029         CASE("right-bottom") {
   1030             if (LIKELY(!hasEscape))
   1031                 m_token = RIGHTBOTTOM_SYM;
   1032         }
   1033         CASE("supports") {
   1034             m_parsingMode = SupportsMode;
   1035             m_token = SUPPORTS_SYM;
   1036         }
   1037         CASE("top-left") {
   1038             if (LIKELY(!hasEscape))
   1039                 m_token = TOPLEFT_SYM;
   1040         }
   1041         CASE("top-right") {
   1042             if (LIKELY(!hasEscape))
   1043                 m_token = TOPRIGHT_SYM;
   1044         }
   1045         CASE("top-center") {
   1046             if (LIKELY(!hasEscape))
   1047                 m_token = TOPCENTER_SYM;
   1048         }
   1049         CASE("top-left-corner") {
   1050             if (LIKELY(!hasEscape))
   1051                 m_token = TOPLEFTCORNER_SYM;
   1052         }
   1053         CASE("top-right-corner") {
   1054             if (LIKELY(!hasEscape))
   1055                 m_token = TOPRIGHTCORNER_SYM;
   1056         }
   1057         CASE("viewport") {
   1058             m_token = VIEWPORT_RULE_SYM;
   1059         }
   1060         CASE("-internal-rule") {
   1061             if (LIKELY(!hasEscape && m_internal))
   1062                 m_token = INTERNAL_RULE_SYM;
   1063         }
   1064         CASE("-internal-decls") {
   1065             if (LIKELY(!hasEscape && m_internal))
   1066                 m_token = INTERNAL_DECLS_SYM;
   1067         }
   1068         CASE("-internal-value") {
   1069             if (LIKELY(!hasEscape && m_internal))
   1070                 m_token = INTERNAL_VALUE_SYM;
   1071         }
   1072         CASE("-webkit-keyframes") {
   1073             m_token = WEBKIT_KEYFRAMES_SYM;
   1074         }
   1075         CASE("-internal-selector") {
   1076             if (LIKELY(!hasEscape && m_internal))
   1077                 m_token = INTERNAL_SELECTOR_SYM;
   1078         }
   1079         CASE("-internal-medialist") {
   1080             if (!m_internal)
   1081                 return;
   1082             m_parsingMode = MediaQueryMode;
   1083             m_token = INTERNAL_MEDIALIST_SYM;
   1084         }
   1085         CASE("-internal-keyframe-rule") {
   1086             if (LIKELY(!hasEscape && m_internal))
   1087                 m_token = INTERNAL_KEYFRAME_RULE_SYM;
   1088         }
   1089         CASE("-internal-keyframe-key-list") {
   1090             if (!m_internal)
   1091                 return;
   1092             m_token = INTERNAL_KEYFRAME_KEY_LIST_SYM;
   1093         }
   1094         CASE("-internal-supports-condition") {
   1095             if (!m_internal)
   1096                 return;
   1097             m_parsingMode = SupportsMode;
   1098             m_token = INTERNAL_SUPPORTS_CONDITION_SYM;
   1099         }
   1100     }
   1101 }
   1102 
   1103 template <typename CharacterType>
   1104 inline void CSSTokenizer::detectSupportsToken(int length)
   1105 {
   1106     ASSERT(m_parsingMode == SupportsMode);
   1107     CharacterType* name = tokenStart<CharacterType>();
   1108 
   1109     SWITCH(name, length) {
   1110         CASE("or") {
   1111             m_token = SUPPORTS_OR;
   1112         }
   1113         CASE("and") {
   1114             m_token = SUPPORTS_AND;
   1115         }
   1116         CASE("not") {
   1117             m_token = SUPPORTS_NOT;
   1118         }
   1119     }
   1120 }
   1121 
   1122 template <typename SrcCharacterType>
   1123 int CSSTokenizer::realLex(void* yylvalWithoutType)
   1124 {
   1125     YYSTYPE* yylval = static_cast<YYSTYPE*>(yylvalWithoutType);
   1126     // Write pointer for the next character.
   1127     SrcCharacterType* result;
   1128     CSSParserString resultString;
   1129     bool hasEscape;
   1130 
   1131     // The input buffer is terminated by a \0 character, so
   1132     // it is safe to read one character ahead of a known non-null.
   1133 #ifndef NDEBUG
   1134     // In debug we check with an ASSERT that the length is > 0 for string types.
   1135     yylval->string.clear();
   1136 #endif
   1137 
   1138 restartAfterComment:
   1139     result = currentCharacter<SrcCharacterType>();
   1140     setTokenStart(result);
   1141     m_tokenStartLineNumber = m_lineNumber;
   1142     m_token = *currentCharacter<SrcCharacterType>();
   1143     ++currentCharacter<SrcCharacterType>();
   1144 
   1145     switch ((m_token <= 127) ? typesOfASCIICharacters[m_token] : CharacterIdentifierStart) {
   1146     case CharacterCaselessU:
   1147         if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '+')) {
   1148             if (parseUnicodeRange<SrcCharacterType>()) {
   1149                 m_token = UNICODERANGE;
   1150                 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
   1151                 break;
   1152             }
   1153         }
   1154         // Fall through to CharacterIdentifierStart.
   1155 
   1156     case CharacterIdentifierStart:
   1157         --currentCharacter<SrcCharacterType>();
   1158         parseIdentifier(result, yylval->string, hasEscape);
   1159         m_token = IDENT;
   1160 
   1161         if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '(')) {
   1162             if (m_parsingMode == SupportsMode && !hasEscape) {
   1163                 detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
   1164                 if (m_token != IDENT)
   1165                     break;
   1166             }
   1167 
   1168             m_token = FUNCTION;
   1169             if (!hasEscape)
   1170                 detectFunctionTypeToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
   1171 
   1172             // Skip parenthesis
   1173             ++currentCharacter<SrcCharacterType>();
   1174             ++result;
   1175             ++yylval->string.m_length;
   1176 
   1177             if (m_token == URI) {
   1178                 m_token = FUNCTION;
   1179                 // Check whether it is really an URI.
   1180                 if (yylval->string.is8Bit())
   1181                     parseURI<LChar>(yylval->string);
   1182                 else
   1183                     parseURI<UChar>(yylval->string);
   1184             }
   1185         } else if (UNLIKELY(m_parsingMode != NormalMode) && !hasEscape) {
   1186             if (m_parsingMode == MediaQueryMode) {
   1187                 detectMediaQueryToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
   1188             } else if (m_parsingMode == SupportsMode) {
   1189                 detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
   1190             } else if (m_parsingMode == NthChildMode && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[0], 'n')) {
   1191                 if (result - tokenStart<SrcCharacterType>() == 1) {
   1192                     // String "n" is IDENT but "n+1" is NTH.
   1193                     if (parseNthChildExtra<SrcCharacterType>()) {
   1194                         m_token = NTH;
   1195                         yylval->string.m_length = currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>();
   1196                     }
   1197                 } else if (result - tokenStart<SrcCharacterType>() >= 2 && tokenStart<SrcCharacterType>()[1] == '-') {
   1198                     // String "n-" is IDENT but "n-1" is NTH.
   1199                     // Set currentCharacter to '-' to continue parsing.
   1200                     SrcCharacterType* nextCharacter = result;
   1201                     currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 1;
   1202                     if (parseNthChildExtra<SrcCharacterType>()) {
   1203                         m_token = NTH;
   1204                         yylval->string.setLength(currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
   1205                     } else {
   1206                         // Revert the change to currentCharacter if unsuccessful.
   1207                         currentCharacter<SrcCharacterType>() = nextCharacter;
   1208                     }
   1209                 }
   1210             }
   1211         }
   1212         break;
   1213 
   1214     case CharacterDot:
   1215         if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0]))
   1216             break;
   1217         // Fall through to CharacterNumber.
   1218 
   1219     case CharacterNumber: {
   1220         bool dotSeen = (m_token == '.');
   1221 
   1222         while (true) {
   1223             if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0])) {
   1224                 // Only one dot is allowed for a number,
   1225                 // and it must be followed by a digit.
   1226                 if (currentCharacter<SrcCharacterType>()[0] != '.' || dotSeen || !isASCIIDigit(currentCharacter<SrcCharacterType>()[1]))
   1227                     break;
   1228                 dotSeen = true;
   1229             }
   1230             ++currentCharacter<SrcCharacterType>();
   1231         }
   1232 
   1233         if (UNLIKELY(m_parsingMode == NthChildMode) && !dotSeen && isASCIIAlphaCaselessEqual(*currentCharacter<SrcCharacterType>(), 'n')) {
   1234             // "[0-9]+n" is always an NthChild.
   1235             ++currentCharacter<SrcCharacterType>();
   1236             parseNthChildExtra<SrcCharacterType>();
   1237             m_token = NTH;
   1238             yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
   1239             break;
   1240         }
   1241 
   1242         // Use SVG parser for numbers on SVG presentation attributes.
   1243         if (isSVGNumberParsingEnabledForMode(m_parser.m_context.mode())) {
   1244             // We need to take care of units like 'em' or 'ex'.
   1245             SrcCharacterType* character = currentCharacter<SrcCharacterType>();
   1246             if (isASCIIAlphaCaselessEqual(*character, 'e')) {
   1247                 ASSERT(character - tokenStart<SrcCharacterType>() > 0);
   1248                 ++character;
   1249                 if (*character == '-' || *character == '+' || isASCIIDigit(*character)) {
   1250                     ++character;
   1251                     while (isASCIIDigit(*character))
   1252                         ++character;
   1253                     // Use FLOATTOKEN if the string contains exponents.
   1254                     dotSeen = true;
   1255                     currentCharacter<SrcCharacterType>() = character;
   1256                 }
   1257             }
   1258             if (!parseSVGNumber(tokenStart<SrcCharacterType>(), character - tokenStart<SrcCharacterType>(), yylval->number))
   1259                 break;
   1260         } else {
   1261             yylval->number = charactersToDouble(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
   1262         }
   1263 
   1264         // Type of the function.
   1265         if (isIdentifierStart<SrcCharacterType>()) {
   1266             SrcCharacterType* type = currentCharacter<SrcCharacterType>();
   1267             result = currentCharacter<SrcCharacterType>();
   1268 
   1269             parseIdentifier(result, resultString, hasEscape);
   1270 
   1271             m_token = DIMEN;
   1272             if (!hasEscape)
   1273                 detectNumberToken(type, currentCharacter<SrcCharacterType>() - type);
   1274 
   1275             if (m_token == DIMEN) {
   1276                 // The decoded number is overwritten, but this is intentional.
   1277                 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
   1278             }
   1279         } else if (*currentCharacter<SrcCharacterType>() == '%') {
   1280             // Although the CSS grammar says {num}% we follow
   1281             // webkit at the moment which uses {num}%+.
   1282             do {
   1283                 ++currentCharacter<SrcCharacterType>();
   1284             } while (*currentCharacter<SrcCharacterType>() == '%');
   1285             m_token = PERCENTAGE;
   1286         } else {
   1287             m_token = dotSeen ? FLOATTOKEN : INTEGER;
   1288         }
   1289         break;
   1290     }
   1291 
   1292     case CharacterDash:
   1293         if (isIdentifierStartAfterDash(currentCharacter<SrcCharacterType>())) {
   1294             --currentCharacter<SrcCharacterType>();
   1295             parseIdentifier(result, resultString, hasEscape);
   1296             m_token = IDENT;
   1297 
   1298             if (*currentCharacter<SrcCharacterType>() == '(') {
   1299                 m_token = FUNCTION;
   1300                 if (!hasEscape)
   1301                     detectDashToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
   1302                 ++currentCharacter<SrcCharacterType>();
   1303                 ++result;
   1304             } else if (UNLIKELY(m_parsingMode == NthChildMode) && !hasEscape && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[1], 'n')) {
   1305                 if (result - tokenStart<SrcCharacterType>() == 2) {
   1306                     // String "-n" is IDENT but "-n+1" is NTH.
   1307                     if (parseNthChildExtra<SrcCharacterType>()) {
   1308                         m_token = NTH;
   1309                         result = currentCharacter<SrcCharacterType>();
   1310                     }
   1311                 } else if (result - tokenStart<SrcCharacterType>() >= 3 && tokenStart<SrcCharacterType>()[2] == '-') {
   1312                     // String "-n-" is IDENT but "-n-1" is NTH.
   1313                     // Set currentCharacter to second '-' of '-n-' to continue parsing.
   1314                     SrcCharacterType* nextCharacter = result;
   1315                     currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 2;
   1316                     if (parseNthChildExtra<SrcCharacterType>()) {
   1317                         m_token = NTH;
   1318                         result = currentCharacter<SrcCharacterType>();
   1319                     } else {
   1320                         // Revert the change to currentCharacter if unsuccessful.
   1321                         currentCharacter<SrcCharacterType>() = nextCharacter;
   1322                     }
   1323                 }
   1324             }
   1325             resultString.setLength(result - tokenStart<SrcCharacterType>());
   1326             yylval->string = resultString;
   1327         } else if (currentCharacter<SrcCharacterType>()[0] == '-' && currentCharacter<SrcCharacterType>()[1] == '>') {
   1328             currentCharacter<SrcCharacterType>() += 2;
   1329             m_token = SGML_CD;
   1330         } else if (UNLIKELY(m_parsingMode == NthChildMode)) {
   1331             // "-[0-9]+n" is always an NthChild.
   1332             if (parseNthChild<SrcCharacterType>()) {
   1333                 parseNthChildExtra<SrcCharacterType>();
   1334                 m_token = NTH;
   1335                 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
   1336             }
   1337         }
   1338         break;
   1339 
   1340     case CharacterOther:
   1341         // m_token is simply the current character.
   1342         break;
   1343 
   1344     case CharacterNull:
   1345         // Do not advance pointer at the end of input.
   1346         --currentCharacter<SrcCharacterType>();
   1347         break;
   1348 
   1349     case CharacterWhiteSpace:
   1350         m_token = WHITESPACE;
   1351         // Might start with a '\n'.
   1352         --currentCharacter<SrcCharacterType>();
   1353         do {
   1354             if (*currentCharacter<SrcCharacterType>() == '\n')
   1355                 ++m_lineNumber;
   1356             ++currentCharacter<SrcCharacterType>();
   1357         } while (*currentCharacter<SrcCharacterType>() <= ' ' && (typesOfASCIICharacters[*currentCharacter<SrcCharacterType>()] == CharacterWhiteSpace));
   1358         break;
   1359 
   1360     case CharacterEndMediaQueryOrSupports:
   1361         if (m_parsingMode == MediaQueryMode || m_parsingMode == SupportsMode)
   1362             m_parsingMode = NormalMode;
   1363         break;
   1364 
   1365     case CharacterEndNthChild:
   1366         if (m_parsingMode == NthChildMode)
   1367             m_parsingMode = NormalMode;
   1368         break;
   1369 
   1370     case CharacterQuote:
   1371         if (checkAndSkipString(currentCharacter<SrcCharacterType>(), m_token, AbortIfInvalid)) {
   1372             ++result;
   1373             parseString<SrcCharacterType>(result, yylval->string, m_token);
   1374             m_token = STRING;
   1375         }
   1376         break;
   1377 
   1378     case CharacterExclamationMark: {
   1379         SrcCharacterType* start = skipWhiteSpace(currentCharacter<SrcCharacterType>());
   1380         if (isEqualToCSSIdentifier(start, "important")) {
   1381             m_token = IMPORTANT_SYM;
   1382             currentCharacter<SrcCharacterType>() = start + 9;
   1383         }
   1384         break;
   1385     }
   1386 
   1387     case CharacterHashmark: {
   1388         SrcCharacterType* start = currentCharacter<SrcCharacterType>();
   1389         result = currentCharacter<SrcCharacterType>();
   1390 
   1391         if (isASCIIDigit(*currentCharacter<SrcCharacterType>())) {
   1392             // This must be a valid hex number token.
   1393             do {
   1394                 ++currentCharacter<SrcCharacterType>();
   1395             } while (isASCIIHexDigit(*currentCharacter<SrcCharacterType>()));
   1396             m_token = HEX;
   1397             yylval->string.init(start, currentCharacter<SrcCharacterType>() - start);
   1398         } else if (isIdentifierStart<SrcCharacterType>()) {
   1399             m_token = IDSEL;
   1400             parseIdentifier(result, yylval->string, hasEscape);
   1401             if (!hasEscape) {
   1402                 // Check whether the identifier is also a valid hex number.
   1403                 SrcCharacterType* current = start;
   1404                 m_token = HEX;
   1405                 do {
   1406                     if (!isASCIIHexDigit(*current)) {
   1407                         m_token = IDSEL;
   1408                         break;
   1409                     }
   1410                     ++current;
   1411                 } while (current < result);
   1412             }
   1413         }
   1414         break;
   1415     }
   1416 
   1417     case CharacterSlash:
   1418         // Ignore comments. They are not even considered as white spaces.
   1419         if (*currentCharacter<SrcCharacterType>() == '*') {
   1420             const CSSParserLocation startLocation = currentLocation();
   1421             if (m_parser.m_observer) {
   1422                 unsigned startOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>() - 1; // Start with a slash.
   1423                 m_parser.m_observer->startComment(startOffset - m_parsedTextPrefixLength);
   1424             }
   1425             ++currentCharacter<SrcCharacterType>();
   1426             while (currentCharacter<SrcCharacterType>()[0] != '*' || currentCharacter<SrcCharacterType>()[1] != '/') {
   1427                 if (*currentCharacter<SrcCharacterType>() == '\n')
   1428                     ++m_lineNumber;
   1429                 if (*currentCharacter<SrcCharacterType>() == '\0') {
   1430                     // Unterminated comments are simply ignored.
   1431                     currentCharacter<SrcCharacterType>() -= 2;
   1432                     m_parser.reportError(startLocation, UnterminatedCommentCSSError);
   1433                     break;
   1434                 }
   1435                 ++currentCharacter<SrcCharacterType>();
   1436             }
   1437             currentCharacter<SrcCharacterType>() += 2;
   1438             if (m_parser.m_observer) {
   1439                 unsigned endOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>();
   1440                 unsigned userTextEndOffset = static_cast<unsigned>(m_length - 1 - m_parsedTextSuffixLength);
   1441                 m_parser.m_observer->endComment(std::min(endOffset, userTextEndOffset) - m_parsedTextPrefixLength);
   1442             }
   1443             goto restartAfterComment;
   1444         }
   1445         break;
   1446 
   1447     case CharacterDollar:
   1448         if (*currentCharacter<SrcCharacterType>() == '=') {
   1449             ++currentCharacter<SrcCharacterType>();
   1450             m_token = ENDSWITH;
   1451         }
   1452         break;
   1453 
   1454     case CharacterAsterisk:
   1455         if (*currentCharacter<SrcCharacterType>() == '=') {
   1456             ++currentCharacter<SrcCharacterType>();
   1457             m_token = CONTAINS;
   1458         }
   1459         break;
   1460 
   1461     case CharacterPlus:
   1462         if (UNLIKELY(m_parsingMode == NthChildMode)) {
   1463             // Simplest case. "+[0-9]*n" is always NthChild.
   1464             if (parseNthChild<SrcCharacterType>()) {
   1465                 parseNthChildExtra<SrcCharacterType>();
   1466                 m_token = NTH;
   1467                 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
   1468             }
   1469         }
   1470         break;
   1471 
   1472     case CharacterLess:
   1473         if (currentCharacter<SrcCharacterType>()[0] == '!' && currentCharacter<SrcCharacterType>()[1] == '-' && currentCharacter<SrcCharacterType>()[2] == '-') {
   1474             currentCharacter<SrcCharacterType>() += 3;
   1475             m_token = SGML_CD;
   1476         }
   1477         break;
   1478 
   1479     case CharacterAt:
   1480         if (isIdentifierStart<SrcCharacterType>()) {
   1481             m_token = ATKEYWORD;
   1482             ++result;
   1483             parseIdentifier(result, resultString, hasEscape);
   1484             // The standard enables unicode escapes in at-rules. In this case only the resultString will contain the
   1485             // correct identifier, hence we have to use it to determine its length instead of the usual pointer arithmetic.
   1486             detectAtToken<SrcCharacterType>(resultString.length() + 1, hasEscape);
   1487         }
   1488         break;
   1489 
   1490     case CharacterBackSlash:
   1491         if (isCSSEscape(*currentCharacter<SrcCharacterType>())) {
   1492             --currentCharacter<SrcCharacterType>();
   1493             parseIdentifier(result, yylval->string, hasEscape);
   1494             m_token = IDENT;
   1495         }
   1496         break;
   1497 
   1498     case CharacterXor:
   1499         if (*currentCharacter<SrcCharacterType>() == '=') {
   1500             ++currentCharacter<SrcCharacterType>();
   1501             m_token = BEGINSWITH;
   1502         }
   1503         break;
   1504 
   1505     case CharacterVerticalBar:
   1506         if (*currentCharacter<SrcCharacterType>() == '=') {
   1507             ++currentCharacter<SrcCharacterType>();
   1508             m_token = DASHMATCH;
   1509         }
   1510         break;
   1511 
   1512     case CharacterTilde:
   1513         if (*currentCharacter<SrcCharacterType>() == '=') {
   1514             ++currentCharacter<SrcCharacterType>();
   1515             m_token = INCLUDES;
   1516         }
   1517         break;
   1518 
   1519     default:
   1520         ASSERT_NOT_REACHED();
   1521         break;
   1522     }
   1523 
   1524     return m_token;
   1525 }
   1526 
   1527 template <>
   1528 inline void CSSTokenizer::setTokenStart<LChar>(LChar* tokenStart)
   1529 {
   1530     m_tokenStart.ptr8 = tokenStart;
   1531 }
   1532 
   1533 template <>
   1534 inline void CSSTokenizer::setTokenStart<UChar>(UChar* tokenStart)
   1535 {
   1536     m_tokenStart.ptr16 = tokenStart;
   1537 }
   1538 
   1539 void CSSTokenizer::setupTokenizer(const char* prefix, unsigned prefixLength, const String& string, const char* suffix, unsigned suffixLength)
   1540 {
   1541     m_parsedTextPrefixLength = prefixLength;
   1542     m_parsedTextSuffixLength = suffixLength;
   1543     unsigned stringLength = string.length();
   1544     unsigned length = stringLength + m_parsedTextPrefixLength + m_parsedTextSuffixLength + 1;
   1545     m_length = length;
   1546 
   1547     if (!stringLength || string.is8Bit()) {
   1548         m_dataStart8 = adoptArrayPtr(new LChar[length]);
   1549         for (unsigned i = 0; i < m_parsedTextPrefixLength; i++)
   1550             m_dataStart8[i] = prefix[i];
   1551 
   1552         if (stringLength)
   1553             memcpy(m_dataStart8.get() + m_parsedTextPrefixLength, string.characters8(), stringLength * sizeof(LChar));
   1554 
   1555         unsigned start = m_parsedTextPrefixLength + stringLength;
   1556         unsigned end = start + suffixLength;
   1557         for (unsigned i = start; i < end; i++)
   1558             m_dataStart8[i] = suffix[i - start];
   1559 
   1560         m_dataStart8[length - 1] = 0;
   1561 
   1562         m_is8BitSource = true;
   1563         m_currentCharacter8 = m_dataStart8.get();
   1564         m_currentCharacter16 = 0;
   1565         setTokenStart<LChar>(m_currentCharacter8);
   1566         m_lexFunc = &CSSTokenizer::realLex<LChar>;
   1567         return;
   1568     }
   1569 
   1570     m_dataStart16 = adoptArrayPtr(new UChar[length]);
   1571     for (unsigned i = 0; i < m_parsedTextPrefixLength; i++)
   1572         m_dataStart16[i] = prefix[i];
   1573 
   1574     ASSERT(stringLength);
   1575     memcpy(m_dataStart16.get() + m_parsedTextPrefixLength, string.characters16(), stringLength * sizeof(UChar));
   1576 
   1577     unsigned start = m_parsedTextPrefixLength + stringLength;
   1578     unsigned end = start + suffixLength;
   1579     for (unsigned i = start; i < end; i++)
   1580         m_dataStart16[i] = suffix[i - start];
   1581 
   1582     m_dataStart16[length - 1] = 0;
   1583 
   1584     m_is8BitSource = false;
   1585     m_currentCharacter8 = 0;
   1586     m_currentCharacter16 = m_dataStart16.get();
   1587     setTokenStart<UChar>(m_currentCharacter16);
   1588     m_lexFunc = &CSSTokenizer::realLex<UChar>;
   1589 }
   1590 
   1591 } // namespace WebCore
   1592