Home | History | Annotate | Download | only in css
      1 /*
      2  * Copyright (C) 2003 Lars Knoll (knoll (at) kde.org)
      3  * Copyright (C) 2005 Allan Sandfeld Jensen (kde (at) carewolf.com)
      4  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved.
      5  * Copyright (C) 2007 Nicholas Shanks <webkit (at) nickshanks.com>
      6  * Copyright (C) 2008 Eric Seidel <eric (at) webkit.org>
      7  * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/)
      8  * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved.
      9  * Copyright (C) 2012 Intel Corporation. All rights reserved.
     10  *
     11  * This library is free software; you can redistribute it and/or
     12  * modify it under the terms of the GNU Library General Public
     13  * License as published by the Free Software Foundation; either
     14  * version 2 of the License, or (at your option) any later version.
     15  *
     16  * This library is distributed in the hope that it will be useful,
     17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     19  * Library General Public License for more details.
     20  *
     21  * You should have received a copy of the GNU Library General Public License
     22  * along with this library; see the file COPYING.LIB.  If not, write to
     23  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     24  * Boston, MA 02110-1301, USA.
     25  */
     26 
     27 #include "config.h"
     28 #include "core/css/CSSTokenizer.h"
     29 
     30 #include "core/css/CSSKeyframeRule.h"
     31 #include "core/css/CSSParser.h"
     32 #include "core/css/CSSParserValues.h"
     33 #include "core/css/MediaQuery.h"
     34 #include "core/css/StyleRule.h"
     35 #include "core/html/parser/HTMLParserIdioms.h"
     36 #include "core/svg/SVGParserUtilities.h"
     37 
     38 namespace WebCore {
     39 
     40 #include "CSSGrammar.h"
     41 
     42 enum CharacterType {
     43     // Types for the main switch.
     44 
     45     // The first 4 types must be grouped together, as they
     46     // represent the allowed chars in an identifier.
     47     CharacterCaselessU,
     48     CharacterIdentifierStart,
     49     CharacterNumber,
     50     CharacterDash,
     51 
     52     CharacterOther,
     53     CharacterNull,
     54     CharacterWhiteSpace,
     55     CharacterEndMediaQueryOrSupports,
     56     CharacterEndNthChild,
     57     CharacterQuote,
     58     CharacterExclamationMark,
     59     CharacterHashmark,
     60     CharacterDollar,
     61     CharacterAsterisk,
     62     CharacterPlus,
     63     CharacterDot,
     64     CharacterSlash,
     65     CharacterLess,
     66     CharacterAt,
     67     CharacterBackSlash,
     68     CharacterXor,
     69     CharacterVerticalBar,
     70     CharacterTilde,
     71 };
     72 
     73 // 128 ASCII codes
     74 static const CharacterType typesOfASCIICharacters[128] = {
     75 /*   0 - Null               */ CharacterNull,
     76 /*   1 - Start of Heading   */ CharacterOther,
     77 /*   2 - Start of Text      */ CharacterOther,
     78 /*   3 - End of Text        */ CharacterOther,
     79 /*   4 - End of Transm.     */ CharacterOther,
     80 /*   5 - Enquiry            */ CharacterOther,
     81 /*   6 - Acknowledgment     */ CharacterOther,
     82 /*   7 - Bell               */ CharacterOther,
     83 /*   8 - Back Space         */ CharacterOther,
     84 /*   9 - Horizontal Tab     */ CharacterWhiteSpace,
     85 /*  10 - Line Feed          */ CharacterWhiteSpace,
     86 /*  11 - Vertical Tab       */ CharacterOther,
     87 /*  12 - Form Feed          */ CharacterWhiteSpace,
     88 /*  13 - Carriage Return    */ CharacterWhiteSpace,
     89 /*  14 - Shift Out          */ CharacterOther,
     90 /*  15 - Shift In           */ CharacterOther,
     91 /*  16 - Data Line Escape   */ CharacterOther,
     92 /*  17 - Device Control 1   */ CharacterOther,
     93 /*  18 - Device Control 2   */ CharacterOther,
     94 /*  19 - Device Control 3   */ CharacterOther,
     95 /*  20 - Device Control 4   */ CharacterOther,
     96 /*  21 - Negative Ack.      */ CharacterOther,
     97 /*  22 - Synchronous Idle   */ CharacterOther,
     98 /*  23 - End of Transmit    */ CharacterOther,
     99 /*  24 - Cancel             */ CharacterOther,
    100 /*  25 - End of Medium      */ CharacterOther,
    101 /*  26 - Substitute         */ CharacterOther,
    102 /*  27 - Escape             */ CharacterOther,
    103 /*  28 - File Separator     */ CharacterOther,
    104 /*  29 - Group Separator    */ CharacterOther,
    105 /*  30 - Record Separator   */ CharacterOther,
    106 /*  31 - Unit Separator     */ CharacterOther,
    107 /*  32 - Space              */ CharacterWhiteSpace,
    108 /*  33 - !                  */ CharacterExclamationMark,
    109 /*  34 - "                  */ CharacterQuote,
    110 /*  35 - #                  */ CharacterHashmark,
    111 /*  36 - $                  */ CharacterDollar,
    112 /*  37 - %                  */ CharacterOther,
    113 /*  38 - &                  */ CharacterOther,
    114 /*  39 - '                  */ CharacterQuote,
    115 /*  40 - (                  */ CharacterOther,
    116 /*  41 - )                  */ CharacterEndNthChild,
    117 /*  42 - *                  */ CharacterAsterisk,
    118 /*  43 - +                  */ CharacterPlus,
    119 /*  44 - ,                  */ CharacterOther,
    120 /*  45 - -                  */ CharacterDash,
    121 /*  46 - .                  */ CharacterDot,
    122 /*  47 - /                  */ CharacterSlash,
    123 /*  48 - 0                  */ CharacterNumber,
    124 /*  49 - 1                  */ CharacterNumber,
    125 /*  50 - 2                  */ CharacterNumber,
    126 /*  51 - 3                  */ CharacterNumber,
    127 /*  52 - 4                  */ CharacterNumber,
    128 /*  53 - 5                  */ CharacterNumber,
    129 /*  54 - 6                  */ CharacterNumber,
    130 /*  55 - 7                  */ CharacterNumber,
    131 /*  56 - 8                  */ CharacterNumber,
    132 /*  57 - 9                  */ CharacterNumber,
    133 /*  58 - :                  */ CharacterOther,
    134 /*  59 - ;                  */ CharacterEndMediaQueryOrSupports,
    135 /*  60 - <                  */ CharacterLess,
    136 /*  61 - =                  */ CharacterOther,
    137 /*  62 - >                  */ CharacterOther,
    138 /*  63 - ?                  */ CharacterOther,
    139 /*  64 - @                  */ CharacterAt,
    140 /*  65 - A                  */ CharacterIdentifierStart,
    141 /*  66 - B                  */ CharacterIdentifierStart,
    142 /*  67 - C                  */ CharacterIdentifierStart,
    143 /*  68 - D                  */ CharacterIdentifierStart,
    144 /*  69 - E                  */ CharacterIdentifierStart,
    145 /*  70 - F                  */ CharacterIdentifierStart,
    146 /*  71 - G                  */ CharacterIdentifierStart,
    147 /*  72 - H                  */ CharacterIdentifierStart,
    148 /*  73 - I                  */ CharacterIdentifierStart,
    149 /*  74 - J                  */ CharacterIdentifierStart,
    150 /*  75 - K                  */ CharacterIdentifierStart,
    151 /*  76 - L                  */ CharacterIdentifierStart,
    152 /*  77 - M                  */ CharacterIdentifierStart,
    153 /*  78 - N                  */ CharacterIdentifierStart,
    154 /*  79 - O                  */ CharacterIdentifierStart,
    155 /*  80 - P                  */ CharacterIdentifierStart,
    156 /*  81 - Q                  */ CharacterIdentifierStart,
    157 /*  82 - R                  */ CharacterIdentifierStart,
    158 /*  83 - S                  */ CharacterIdentifierStart,
    159 /*  84 - T                  */ CharacterIdentifierStart,
    160 /*  85 - U                  */ CharacterCaselessU,
    161 /*  86 - V                  */ CharacterIdentifierStart,
    162 /*  87 - W                  */ CharacterIdentifierStart,
    163 /*  88 - X                  */ CharacterIdentifierStart,
    164 /*  89 - Y                  */ CharacterIdentifierStart,
    165 /*  90 - Z                  */ CharacterIdentifierStart,
    166 /*  91 - [                  */ CharacterOther,
    167 /*  92 - \                  */ CharacterBackSlash,
    168 /*  93 - ]                  */ CharacterOther,
    169 /*  94 - ^                  */ CharacterXor,
    170 /*  95 - _                  */ CharacterIdentifierStart,
    171 /*  96 - `                  */ CharacterOther,
    172 /*  97 - a                  */ CharacterIdentifierStart,
    173 /*  98 - b                  */ CharacterIdentifierStart,
    174 /*  99 - c                  */ CharacterIdentifierStart,
    175 /* 100 - d                  */ CharacterIdentifierStart,
    176 /* 101 - e                  */ CharacterIdentifierStart,
    177 /* 102 - f                  */ CharacterIdentifierStart,
    178 /* 103 - g                  */ CharacterIdentifierStart,
    179 /* 104 - h                  */ CharacterIdentifierStart,
    180 /* 105 - i                  */ CharacterIdentifierStart,
    181 /* 106 - j                  */ CharacterIdentifierStart,
    182 /* 107 - k                  */ CharacterIdentifierStart,
    183 /* 108 - l                  */ CharacterIdentifierStart,
    184 /* 109 - m                  */ CharacterIdentifierStart,
    185 /* 110 - n                  */ CharacterIdentifierStart,
    186 /* 111 - o                  */ CharacterIdentifierStart,
    187 /* 112 - p                  */ CharacterIdentifierStart,
    188 /* 113 - q                  */ CharacterIdentifierStart,
    189 /* 114 - r                  */ CharacterIdentifierStart,
    190 /* 115 - s                  */ CharacterIdentifierStart,
    191 /* 116 - t                  */ CharacterIdentifierStart,
    192 /* 117 - u                  */ CharacterCaselessU,
    193 /* 118 - v                  */ CharacterIdentifierStart,
    194 /* 119 - w                  */ CharacterIdentifierStart,
    195 /* 120 - x                  */ CharacterIdentifierStart,
    196 /* 121 - y                  */ CharacterIdentifierStart,
    197 /* 122 - z                  */ CharacterIdentifierStart,
    198 /* 123 - {                  */ CharacterEndMediaQueryOrSupports,
    199 /* 124 - |                  */ CharacterVerticalBar,
    200 /* 125 - }                  */ CharacterOther,
    201 /* 126 - ~                  */ CharacterTilde,
    202 /* 127 - Delete             */ CharacterOther,
    203 };
    204 
    205 // Utility functions for the CSS tokenizer.
    206 
    207 template <typename CharacterType>
    208 static inline bool isCSSLetter(CharacterType character)
    209 {
    210     return character >= 128 || typesOfASCIICharacters[character] <= CharacterDash;
    211 }
    212 
    213 template <typename CharacterType>
    214 static inline bool isCSSEscape(CharacterType character)
    215 {
    216     return character >= ' ' && character != 127;
    217 }
    218 
    219 template <typename CharacterType>
    220 static inline bool isURILetter(CharacterType character)
    221 {
    222     return (character >= '*' && character != 127) || (character >= '#' && character <= '&') || character == '!';
    223 }
    224 
    225 template <typename CharacterType>
    226 static inline bool isIdentifierStartAfterDash(CharacterType* currentCharacter)
    227 {
    228     return isASCIIAlpha(currentCharacter[0]) || currentCharacter[0] == '_' || currentCharacter[0] >= 128
    229         || (currentCharacter[0] == '\\' && isCSSEscape(currentCharacter[1]));
    230 }
    231 
    232 template <typename CharacterType>
    233 static inline bool isEqualToCSSIdentifier(CharacterType* cssString, const char* constantString)
    234 {
    235     // Compare an character memory data with a zero terminated string.
    236     do {
    237         // The input must be part of an identifier if constantChar or constString
    238         // contains '-'. Otherwise toASCIILowerUnchecked('\r') would be equal to '-'.
    239         ASSERT((*constantString >= 'a' && *constantString <= 'z') || *constantString == '-');
    240         ASSERT(*constantString != '-' || isCSSLetter(*cssString));
    241         if (toASCIILowerUnchecked(*cssString++) != (*constantString++))
    242             return false;
    243     } while (*constantString);
    244     return true;
    245 }
    246 
    247 template <typename CharacterType>
    248 static inline bool isEqualToCSSCaseSensitiveIdentifier(CharacterType* string, const char* constantString)
    249 {
    250     ASSERT(*constantString);
    251 
    252     do {
    253         if (*string++ != *constantString++)
    254             return false;
    255     } while (*constantString);
    256     return true;
    257 }
    258 
    259 template <typename CharacterType>
    260 static CharacterType* checkAndSkipEscape(CharacterType* currentCharacter)
    261 {
    262     // Returns with 0, if escape check is failed. Otherwise
    263     // it returns with the following character.
    264     ASSERT(*currentCharacter == '\\');
    265 
    266     ++currentCharacter;
    267     if (!isCSSEscape(*currentCharacter))
    268         return 0;
    269 
    270     if (isASCIIHexDigit(*currentCharacter)) {
    271         int length = 6;
    272 
    273         do {
    274             ++currentCharacter;
    275         } while (isASCIIHexDigit(*currentCharacter) && --length);
    276 
    277         // Optional space after the escape sequence.
    278         if (isHTMLSpace<CharacterType>(*currentCharacter))
    279             ++currentCharacter;
    280         return currentCharacter;
    281     }
    282     return currentCharacter + 1;
    283 }
    284 
    285 template <typename CharacterType>
    286 static inline CharacterType* skipWhiteSpace(CharacterType* currentCharacter)
    287 {
    288     while (isHTMLSpace<CharacterType>(*currentCharacter))
    289         ++currentCharacter;
    290     return currentCharacter;
    291 }
    292 
    293 // Main CSS tokenizer functions.
    294 
    295 template <>
    296 inline LChar*& CSSTokenizer::currentCharacter<LChar>()
    297 {
    298     return m_currentCharacter8;
    299 }
    300 
    301 template <>
    302 inline UChar*& CSSTokenizer::currentCharacter<UChar>()
    303 {
    304     return m_currentCharacter16;
    305 }
    306 
    307 UChar*& CSSTokenizer::currentCharacter16()
    308 {
    309     if (!m_currentCharacter16) {
    310         m_dataStart16 = adoptArrayPtr(new UChar[m_length]);
    311         m_currentCharacter16 = m_dataStart16.get();
    312     }
    313 
    314     return m_currentCharacter16;
    315 }
    316 
    317 template <>
    318 inline LChar* CSSTokenizer::dataStart<LChar>()
    319 {
    320     return m_dataStart8.get();
    321 }
    322 
    323 template <>
    324 inline UChar* CSSTokenizer::dataStart<UChar>()
    325 {
    326     return m_dataStart16.get();
    327 }
    328 
    329 template <typename CharacterType>
    330 inline CSSParserLocation CSSTokenizer::tokenLocation()
    331 {
    332     CSSParserLocation location;
    333     location.token.init(tokenStart<CharacterType>(), currentCharacter<CharacterType>() - tokenStart<CharacterType>());
    334     location.lineNumber = m_tokenStartLineNumber;
    335     location.offset = tokenStart<CharacterType>() - dataStart<CharacterType>();
    336     return location;
    337 }
    338 
    339 CSSParserLocation CSSTokenizer::currentLocation()
    340 {
    341     if (is8BitSource())
    342         return tokenLocation<LChar>();
    343     return tokenLocation<UChar>();
    344 }
    345 
    346 template <typename CharacterType>
    347 inline bool CSSTokenizer::isIdentifierStart()
    348 {
    349     // Check whether an identifier is started.
    350     return isIdentifierStartAfterDash((*currentCharacter<CharacterType>() != '-') ? currentCharacter<CharacterType>() : currentCharacter<CharacterType>() + 1);
    351 }
    352 
    353 template <typename CharacterType>
    354 static inline CharacterType* checkAndSkipString(CharacterType* currentCharacter, int quote)
    355 {
    356     // Returns with 0, if string check is failed. Otherwise
    357     // it returns with the following character. This is necessary
    358     // since we cannot revert escape sequences, thus strings
    359     // must be validated before parsing.
    360     while (true) {
    361         if (UNLIKELY(*currentCharacter == quote)) {
    362             // String parsing is successful.
    363             return currentCharacter + 1;
    364         }
    365         if (UNLIKELY(!*currentCharacter)) {
    366             // String parsing is successful up to end of input.
    367             return currentCharacter;
    368         }
    369         if (UNLIKELY(*currentCharacter <= '\r' && (*currentCharacter == '\n' || (*currentCharacter | 0x1) == '\r'))) {
    370             // String parsing is failed for character '\n', '\f' or '\r'.
    371             return 0;
    372         }
    373 
    374         if (LIKELY(currentCharacter[0] != '\\')) {
    375             ++currentCharacter;
    376         } else if (currentCharacter[1] == '\n' || currentCharacter[1] == '\f') {
    377             currentCharacter += 2;
    378         } else if (currentCharacter[1] == '\r') {
    379             currentCharacter += currentCharacter[2] == '\n' ? 3 : 2;
    380         } else {
    381             currentCharacter = checkAndSkipEscape(currentCharacter);
    382             if (!currentCharacter)
    383                 return 0;
    384         }
    385     }
    386 }
    387 
    388 template <typename CharacterType>
    389 unsigned CSSTokenizer::parseEscape(CharacterType*& src)
    390 {
    391     ASSERT(*src == '\\' && isCSSEscape(src[1]));
    392 
    393     unsigned unicode = 0;
    394 
    395     ++src;
    396     if (isASCIIHexDigit(*src)) {
    397 
    398         int length = 6;
    399 
    400         do {
    401             unicode = (unicode << 4) + toASCIIHexValue(*src++);
    402         } while (--length && isASCIIHexDigit(*src));
    403 
    404         // Characters above 0x10ffff are not handled.
    405         if (unicode > 0x10ffff)
    406             unicode = 0xfffd;
    407 
    408         // Optional space after the escape sequence.
    409         if (isHTMLSpace<CharacterType>(*src))
    410             ++src;
    411 
    412         return unicode;
    413     }
    414 
    415     return *currentCharacter<CharacterType>()++;
    416 }
    417 
    418 template <>
    419 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode)
    420 {
    421     ASSERT(unicode <= 0xff);
    422     *result = unicode;
    423 
    424     ++result;
    425 }
    426 
    427 template <>
    428 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode)
    429 {
    430     // Replace unicode with a surrogate pairs when it is bigger than 0xffff
    431     if (U16_LENGTH(unicode) == 2) {
    432         *result++ = U16_LEAD(unicode);
    433         *result = U16_TRAIL(unicode);
    434     } else {
    435         *result = unicode;
    436     }
    437 
    438     ++result;
    439 }
    440 
    441 template <typename SrcCharacterType, typename DestCharacterType>
    442 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCharacterType*& result, bool& hasEscape)
    443 {
    444     hasEscape = false;
    445     do {
    446         if (LIKELY(*src != '\\')) {
    447             *result++ = *src++;
    448         } else {
    449             hasEscape = true;
    450             SrcCharacterType* savedEscapeStart = src;
    451             unsigned unicode = parseEscape<SrcCharacterType>(src);
    452             if (unicode > 0xff && sizeof(DestCharacterType) == 1) {
    453                 src = savedEscapeStart;
    454                 return false;
    455             }
    456             UnicodeToChars(result, unicode);
    457         }
    458     } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1])));
    459 
    460     return true;
    461 }
    462 
    463 template <typename CharacterType>
    464 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserString& resultString, bool& hasEscape)
    465 {
    466     // If a valid identifier start is found, we can safely
    467     // parse the identifier until the next invalid character.
    468     ASSERT(isIdentifierStart<CharacterType>());
    469 
    470     CharacterType* start = currentCharacter<CharacterType>();
    471     if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), result, hasEscape))) {
    472         // Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue
    473         ASSERT(is8BitSource());
    474         UChar*& result16 = currentCharacter16();
    475         UChar* start16 = result16;
    476         int i = 0;
    477         for (; i < result - start; i++)
    478             result16[i] = start[i];
    479 
    480         result16 += i;
    481 
    482         parseIdentifierInternal(currentCharacter<CharacterType>(), result16, hasEscape);
    483 
    484         resultString.init(start16, result16 - start16);
    485 
    486         return;
    487     }
    488 
    489     resultString.init(start, result - start);
    490 }
    491 
    492 template <typename SrcCharacterType, typename DestCharacterType>
    493 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharacterType*& result, UChar quote)
    494 {
    495     while (true) {
    496         if (UNLIKELY(*src == quote)) {
    497             // String parsing is done.
    498             ++src;
    499             return true;
    500         }
    501         if (UNLIKELY(!*src)) {
    502             // String parsing is done, but don't advance pointer if at the end of input.
    503             return true;
    504         }
    505         ASSERT(*src > '\r' || (*src < '\n' && *src) || *src == '\v');
    506 
    507         if (LIKELY(src[0] != '\\')) {
    508             *result++ = *src++;
    509         } else if (src[1] == '\n' || src[1] == '\f') {
    510             src += 2;
    511         } else if (src[1] == '\r') {
    512             src += src[2] == '\n' ? 3 : 2;
    513         } else {
    514             SrcCharacterType* savedEscapeStart = src;
    515             unsigned unicode = parseEscape<SrcCharacterType>(src);
    516             if (unicode > 0xff && sizeof(DestCharacterType) == 1) {
    517                 src = savedEscapeStart;
    518                 return false;
    519             }
    520             UnicodeToChars(result, unicode);
    521         }
    522     }
    523 
    524     return true;
    525 }
    526 
    527 template <typename CharacterType>
    528 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& resultString, UChar quote)
    529 {
    530     CharacterType* start = currentCharacter<CharacterType>();
    531 
    532     if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) {
    533         // Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue
    534         ASSERT(is8BitSource());
    535         UChar*& result16 = currentCharacter16();
    536         UChar* start16 = result16;
    537         int i = 0;
    538         for (; i < result - start; i++)
    539             result16[i] = start[i];
    540 
    541         result16 += i;
    542 
    543         parseStringInternal(currentCharacter<CharacterType>(), result16, quote);
    544 
    545         resultString.init(start16, result16 - start16);
    546         return;
    547     }
    548 
    549     resultString.init(start, result - start);
    550 }
    551 
    552 template <typename CharacterType>
    553 inline bool CSSTokenizer::findURI(CharacterType*& start, CharacterType*& end, UChar& quote)
    554 {
    555     start = skipWhiteSpace(currentCharacter<CharacterType>());
    556 
    557     if (*start == '"' || *start == '\'') {
    558         quote = *start++;
    559         end = checkAndSkipString(start, quote);
    560         if (!end)
    561             return false;
    562     } else {
    563         quote = 0;
    564         end = start;
    565         while (isURILetter(*end)) {
    566             if (LIKELY(*end != '\\')) {
    567                 ++end;
    568             } else {
    569                 end = checkAndSkipEscape(end);
    570                 if (!end)
    571                     return false;
    572             }
    573         }
    574     }
    575 
    576     end = skipWhiteSpace(end);
    577     if (*end != ')')
    578         return false;
    579 
    580     return true;
    581 }
    582 
    583 template <typename SrcCharacterType, typename DestCharacterType>
    584 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacterType*& dest, UChar quote)
    585 {
    586     if (quote) {
    587         ASSERT(quote == '"' || quote == '\'');
    588         return parseStringInternal(src, dest, quote);
    589     }
    590 
    591     while (isURILetter(*src)) {
    592         if (LIKELY(*src != '\\')) {
    593             *dest++ = *src++;
    594         } else {
    595             unsigned unicode = parseEscape<SrcCharacterType>(src);
    596             if (unicode > 0xff && sizeof(SrcCharacterType) == 1)
    597                 return false;
    598             UnicodeToChars(dest, unicode);
    599         }
    600     }
    601 
    602     return true;
    603 }
    604 
    605 template <typename CharacterType>
    606 inline void CSSTokenizer::parseURI(CSSParserString& string)
    607 {
    608     CharacterType* uriStart;
    609     CharacterType* uriEnd;
    610     UChar quote;
    611     if (!findURI(uriStart, uriEnd, quote))
    612         return;
    613 
    614     CharacterType* dest = currentCharacter<CharacterType>() = uriStart;
    615     if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote))) {
    616         string.init(uriStart, dest - uriStart);
    617     } else {
    618         // An escape sequence was encountered that can't be stored in 8 bits.
    619         // Reset the current character to the start of the URI and re-parse with
    620         // a 16-bit destination.
    621         ASSERT(is8BitSource());
    622         UChar* uriStart16 = currentCharacter16();
    623         currentCharacter<CharacterType>() = uriStart;
    624         bool result = parseURIInternal(currentCharacter<CharacterType>(), currentCharacter16(), quote);
    625         ASSERT_UNUSED(result, result);
    626         string.init(uriStart16, currentCharacter16() - uriStart16);
    627     }
    628 
    629     currentCharacter<CharacterType>() = uriEnd + 1;
    630     m_token = URI;
    631 }
    632 
    633 template <typename CharacterType>
    634 inline bool CSSTokenizer::parseUnicodeRange()
    635 {
    636     CharacterType* character = currentCharacter<CharacterType>() + 1;
    637     int length = 6;
    638     ASSERT(*currentCharacter<CharacterType>() == '+');
    639 
    640     while (isASCIIHexDigit(*character) && length) {
    641         ++character;
    642         --length;
    643     }
    644 
    645     if (length && *character == '?') {
    646         // At most 5 hex digit followed by a question mark.
    647         do {
    648             ++character;
    649             --length;
    650         } while (*character == '?' && length);
    651         currentCharacter<CharacterType>() = character;
    652         return true;
    653     }
    654 
    655     if (length < 6) {
    656         // At least one hex digit.
    657         if (character[0] == '-' && isASCIIHexDigit(character[1])) {
    658             // Followed by a dash and a hex digit.
    659             ++character;
    660             length = 6;
    661             do {
    662                 ++character;
    663             } while (--length && isASCIIHexDigit(*character));
    664         }
    665         currentCharacter<CharacterType>() = character;
    666         return true;
    667     }
    668     return false;
    669 }
    670 
    671 template <typename CharacterType>
    672 bool CSSTokenizer::parseNthChild()
    673 {
    674     CharacterType* character = currentCharacter<CharacterType>();
    675 
    676     while (isASCIIDigit(*character))
    677         ++character;
    678     if (isASCIIAlphaCaselessEqual(*character, 'n')) {
    679         currentCharacter<CharacterType>() = character + 1;
    680         return true;
    681     }
    682     return false;
    683 }
    684 
    685 template <typename CharacterType>
    686 bool CSSTokenizer::parseNthChildExtra()
    687 {
    688     CharacterType* character = skipWhiteSpace(currentCharacter<CharacterType>());
    689     if (*character != '+' && *character != '-')
    690         return false;
    691 
    692     character = skipWhiteSpace(character + 1);
    693     if (!isASCIIDigit(*character))
    694         return false;
    695 
    696     do {
    697         ++character;
    698     } while (isASCIIDigit(*character));
    699 
    700     currentCharacter<CharacterType>() = character;
    701     return true;
    702 }
    703 
    704 template <typename CharacterType>
    705 inline bool CSSTokenizer::detectFunctionTypeToken(int length)
    706 {
    707     ASSERT(length > 0);
    708     CharacterType* name = tokenStart<CharacterType>();
    709     SWITCH(name, length) {
    710         CASE("not") {
    711             m_token = NOTFUNCTION;
    712             return true;
    713         }
    714         CASE("url") {
    715             m_token = URI;
    716             return true;
    717         }
    718         CASE("cue") {
    719             m_token = CUEFUNCTION;
    720             return true;
    721         }
    722         CASE("var") {
    723             if (!RuntimeEnabledFeatures::cssVariablesEnabled())
    724                 return false;
    725             m_token = VARFUNCTION;
    726             return true;
    727         }
    728         CASE("calc") {
    729             m_token = CALCFUNCTION;
    730             return true;
    731         }
    732         CASE("host") {
    733             m_token = HOSTFUNCTION;
    734             return true;
    735         }
    736         CASE("nth-child") {
    737             m_parsingMode = NthChildMode;
    738             return true;
    739         }
    740         CASE("nth-of-type") {
    741             m_parsingMode = NthChildMode;
    742             return true;
    743         }
    744         CASE("nth-last-child") {
    745             m_parsingMode = NthChildMode;
    746             return true;
    747         }
    748         CASE("nth-last-of-type") {
    749             m_parsingMode = NthChildMode;
    750             return true;
    751         }
    752     }
    753     return false;
    754 }
    755 
    756 template <typename CharacterType>
    757 inline void CSSTokenizer::detectMediaQueryToken(int length)
    758 {
    759     ASSERT(m_parsingMode == MediaQueryMode);
    760     CharacterType* name = tokenStart<CharacterType>();
    761 
    762     SWITCH(name, length) {
    763         CASE("and") {
    764             m_token = MEDIA_AND;
    765         }
    766         CASE("not") {
    767             m_token = MEDIA_NOT;
    768         }
    769         CASE("only") {
    770             m_token = MEDIA_ONLY;
    771         }
    772         CASE("or") {
    773             m_token = MEDIA_OR;
    774         }
    775     }
    776 }
    777 
    778 template <typename CharacterType>
    779 inline void CSSTokenizer::detectNumberToken(CharacterType* type, int length)
    780 {
    781     ASSERT(length > 0);
    782 
    783     SWITCH(type, length) {
    784         CASE("cm") {
    785             m_token = CMS;
    786         }
    787         CASE("ch") {
    788             m_token = CHS;
    789         }
    790         CASE("deg") {
    791             m_token = DEGS;
    792         }
    793         CASE("dppx") {
    794             // There is a discussion about the name of this unit on www-style.
    795             // Keep this compile time guard in place until that is resolved.
    796             // http://lists.w3.org/Archives/Public/www-style/2012May/0915.html
    797             m_token = DPPX;
    798         }
    799         CASE("dpcm") {
    800             m_token = DPCM;
    801         }
    802         CASE("dpi") {
    803             m_token = DPI;
    804         }
    805         CASE("em") {
    806             m_token = EMS;
    807         }
    808         CASE("ex") {
    809             m_token = EXS;
    810         }
    811         CASE("fr") {
    812             m_token = FR;
    813         }
    814         CASE("grad") {
    815             m_token = GRADS;
    816         }
    817         CASE("hz") {
    818             m_token = HERTZ;
    819         }
    820         CASE("in") {
    821             m_token = INS;
    822         }
    823         CASE("khz") {
    824             m_token = KHERTZ;
    825         }
    826         CASE("mm") {
    827             m_token = MMS;
    828         }
    829         CASE("ms") {
    830             m_token = MSECS;
    831         }
    832         CASE("px") {
    833             m_token = PXS;
    834         }
    835         CASE("pt") {
    836             m_token = PTS;
    837         }
    838         CASE("pc") {
    839             m_token = PCS;
    840         }
    841         CASE("rad") {
    842             m_token = RADS;
    843         }
    844         CASE("rem") {
    845             m_token = REMS;
    846         }
    847         CASE("s") {
    848             m_token = SECS;
    849         }
    850         CASE("turn") {
    851             m_token = TURNS;
    852         }
    853         CASE("vw") {
    854             m_token = VW;
    855         }
    856         CASE("vh") {
    857             m_token = VH;
    858         }
    859         CASE("vmin") {
    860             m_token = VMIN;
    861         }
    862         CASE("vmax") {
    863             m_token = VMAX;
    864         }
    865         CASE("__qem") {
    866             m_token = QEMS;
    867         }
    868     }
    869 }
    870 
    871 template <typename CharacterType>
    872 inline void CSSTokenizer::detectDashToken(int length)
    873 {
    874     CharacterType* name = tokenStart<CharacterType>();
    875 
    876     // Ignore leading dash.
    877     ++name;
    878     --length;
    879 
    880     SWITCH(name, length) {
    881         CASE("webkit-any") {
    882             m_token = ANYFUNCTION;
    883         }
    884         CASE("webkit-min") {
    885             m_token = MINFUNCTION;
    886         }
    887         CASE("webkit-max") {
    888             m_token = MAXFUNCTION;
    889         }
    890         CASE("webkit-calc") {
    891             m_token = CALCFUNCTION;
    892         }
    893         CASE("webkit-distributed") {
    894             m_token = DISTRIBUTEDFUNCTION;
    895         }
    896     }
    897 }
    898 
    899 template <typename CharacterType>
    900 inline void CSSTokenizer::detectAtToken(int length, bool hasEscape)
    901 {
    902     CharacterType* name = tokenStart<CharacterType>();
    903     ASSERT(name[0] == '@' && length >= 2);
    904 
    905     // Ignore leading @.
    906     ++name;
    907     --length;
    908 
    909     // charset, font-face, import, media, namespace, page, supports,
    910     // -webkit-keyframes, keyframes, and -webkit-mediaquery are not affected by hasEscape.
    911     SWITCH(name, length) {
    912         CASE("bottom-left") {
    913             if (LIKELY(!hasEscape))
    914                 m_token = BOTTOMLEFT_SYM;
    915         }
    916         CASE("bottom-right") {
    917             if (LIKELY(!hasEscape))
    918                 m_token = BOTTOMRIGHT_SYM;
    919         }
    920         CASE("bottom-center") {
    921             if (LIKELY(!hasEscape))
    922                 m_token = BOTTOMCENTER_SYM;
    923         }
    924         CASE("bottom-left-corner") {
    925             if (LIKELY(!hasEscape))
    926                 m_token = BOTTOMLEFTCORNER_SYM;
    927         }
    928         CASE("bottom-right-corner") {
    929             if (LIKELY(!hasEscape))
    930                 m_token = BOTTOMRIGHTCORNER_SYM;
    931         }
    932         CASE("charset") {
    933             if (name - 1 == dataStart<CharacterType>())
    934                 m_token = CHARSET_SYM;
    935         }
    936         CASE("font-face") {
    937             m_token = FONT_FACE_SYM;
    938         }
    939         CASE("import") {
    940             m_parsingMode = MediaQueryMode;
    941             m_token = IMPORT_SYM;
    942         }
    943         CASE("keyframes") {
    944             if (RuntimeEnabledFeatures::cssAnimationUnprefixedEnabled())
    945                 m_token = KEYFRAMES_SYM;
    946         }
    947         CASE("left-top") {
    948             if (LIKELY(!hasEscape))
    949                 m_token = LEFTTOP_SYM;
    950         }
    951         CASE("left-middle") {
    952             if (LIKELY(!hasEscape))
    953                 m_token = LEFTMIDDLE_SYM;
    954         }
    955         CASE("left-bottom") {
    956             if (LIKELY(!hasEscape))
    957                 m_token = LEFTBOTTOM_SYM;
    958         }
    959         CASE("media") {
    960             m_parsingMode = MediaQueryMode;
    961             m_token = MEDIA_SYM;
    962         }
    963         CASE("namespace") {
    964             m_token = NAMESPACE_SYM;
    965         }
    966         CASE("page") {
    967             m_token = PAGE_SYM;
    968         }
    969         CASE("right-top") {
    970             if (LIKELY(!hasEscape))
    971                 m_token = RIGHTTOP_SYM;
    972         }
    973         CASE("right-middle") {
    974             if (LIKELY(!hasEscape))
    975                 m_token = RIGHTMIDDLE_SYM;
    976         }
    977         CASE("right-bottom") {
    978             if (LIKELY(!hasEscape))
    979                 m_token = RIGHTBOTTOM_SYM;
    980         }
    981         CASE("supports") {
    982             m_parsingMode = SupportsMode;
    983             m_token = SUPPORTS_SYM;
    984         }
    985         CASE("top-left") {
    986             if (LIKELY(!hasEscape))
    987                 m_token = TOPLEFT_SYM;
    988         }
    989         CASE("top-right") {
    990             if (LIKELY(!hasEscape))
    991                 m_token = TOPRIGHT_SYM;
    992         }
    993         CASE("top-center") {
    994             if (LIKELY(!hasEscape))
    995                 m_token = TOPCENTER_SYM;
    996         }
    997         CASE("top-left-corner") {
    998             if (LIKELY(!hasEscape))
    999                 m_token = TOPLEFTCORNER_SYM;
   1000         }
   1001         CASE("top-right-corner") {
   1002             if (LIKELY(!hasEscape))
   1003                 m_token = TOPRIGHTCORNER_SYM;
   1004         }
   1005         CASE("viewport") {
   1006             m_token = VIEWPORT_RULE_SYM;
   1007         }
   1008         CASE("-internal-rule") {
   1009             if (LIKELY(!hasEscape && m_internal))
   1010                 m_token = INTERNAL_RULE_SYM;
   1011         }
   1012         CASE("-webkit-region") {
   1013             if (LIKELY(!hasEscape))
   1014                 m_token = WEBKIT_REGION_RULE_SYM;
   1015         }
   1016         CASE("-webkit-filter") {
   1017             if (LIKELY(!hasEscape))
   1018                 m_token = WEBKIT_FILTER_RULE_SYM;
   1019         }
   1020         CASE("-internal-decls") {
   1021             if (LIKELY(!hasEscape && m_internal))
   1022                 m_token = INTERNAL_DECLS_SYM;
   1023         }
   1024         CASE("-internal-value") {
   1025             if (LIKELY(!hasEscape && m_internal))
   1026                 m_token = INTERNAL_VALUE_SYM;
   1027         }
   1028         CASE("-webkit-keyframes") {
   1029             m_token = WEBKIT_KEYFRAMES_SYM;
   1030         }
   1031         CASE("-internal-selector") {
   1032             if (LIKELY(!hasEscape && m_internal))
   1033                 m_token = INTERNAL_SELECTOR_SYM;
   1034         }
   1035         CASE("-internal-medialist") {
   1036             if (!m_internal)
   1037                 return;
   1038             m_parsingMode = MediaQueryMode;
   1039             m_token = INTERNAL_MEDIALIST_SYM;
   1040         }
   1041         CASE("-internal-keyframe-rule") {
   1042             if (LIKELY(!hasEscape && m_internal))
   1043                 m_token = INTERNAL_KEYFRAME_RULE_SYM;
   1044         }
   1045         CASE("-internal-keyframe-key-list") {
   1046             if (!m_internal)
   1047                 return;
   1048             m_token = INTERNAL_KEYFRAME_KEY_LIST_SYM;
   1049         }
   1050         CASE("-internal-supports-condition") {
   1051             if (!m_internal)
   1052                 return;
   1053             m_parsingMode = SupportsMode;
   1054             m_token = INTERNAL_SUPPORTS_CONDITION_SYM;
   1055         }
   1056     }
   1057 }
   1058 
   1059 template <typename CharacterType>
   1060 inline void CSSTokenizer::detectSupportsToken(int length)
   1061 {
   1062     ASSERT(m_parsingMode == SupportsMode);
   1063     CharacterType* name = tokenStart<CharacterType>();
   1064 
   1065     SWITCH(name, length) {
   1066         CASE("or") {
   1067             m_token = SUPPORTS_OR;
   1068         }
   1069         CASE("and") {
   1070             m_token = SUPPORTS_AND;
   1071         }
   1072         CASE("not") {
   1073             m_token = SUPPORTS_NOT;
   1074         }
   1075     }
   1076 }
   1077 
   1078 template <typename CharacterType>
   1079 inline void CSSTokenizer::detectCSSVariableDefinitionToken(int length)
   1080 {
   1081     static const int prefixLength = static_cast<int>(sizeof("var-") - 1);
   1082     if (length <= prefixLength)
   1083         return;
   1084     CharacterType* name = tokenStart<CharacterType>();
   1085     COMPILE_ASSERT(prefixLength > 0, CSS_variable_prefix_must_be_nonempty);
   1086     if (name[prefixLength - 1] == '-' && isIdentifierStartAfterDash(name + prefixLength) && isEqualToCSSCaseSensitiveIdentifier(name, "var"))
   1087         m_token = VAR_DEFINITION;
   1088 }
   1089 
   1090 template <typename SrcCharacterType>
   1091 int CSSTokenizer::realLex(void* yylvalWithoutType)
   1092 {
   1093     YYSTYPE* yylval = static_cast<YYSTYPE*>(yylvalWithoutType);
   1094     // Write pointer for the next character.
   1095     SrcCharacterType* result;
   1096     CSSParserString resultString;
   1097     bool hasEscape;
   1098 
   1099     // The input buffer is terminated by a \0 character, so
   1100     // it is safe to read one character ahead of a known non-null.
   1101 #ifndef NDEBUG
   1102     // In debug we check with an ASSERT that the length is > 0 for string types.
   1103     yylval->string.clear();
   1104 #endif
   1105 
   1106 restartAfterComment:
   1107     result = currentCharacter<SrcCharacterType>();
   1108     setTokenStart(result);
   1109     m_tokenStartLineNumber = m_lineNumber;
   1110     m_token = *currentCharacter<SrcCharacterType>();
   1111     ++currentCharacter<SrcCharacterType>();
   1112 
   1113     switch ((m_token <= 127) ? typesOfASCIICharacters[m_token] : CharacterIdentifierStart) {
   1114     case CharacterCaselessU:
   1115         if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '+')) {
   1116             if (parseUnicodeRange<SrcCharacterType>()) {
   1117                 m_token = UNICODERANGE;
   1118                 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
   1119                 break;
   1120             }
   1121         }
   1122         // Fall through to CharacterIdentifierStart.
   1123 
   1124     case CharacterIdentifierStart:
   1125         --currentCharacter<SrcCharacterType>();
   1126         parseIdentifier(result, yylval->string, hasEscape);
   1127         m_token = IDENT;
   1128 
   1129         if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '(')) {
   1130             if (m_parsingMode == SupportsMode && !hasEscape) {
   1131                 detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
   1132                 if (m_token != IDENT)
   1133                     break;
   1134             }
   1135 
   1136             m_token = FUNCTION;
   1137             if (!hasEscape)
   1138                 detectFunctionTypeToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
   1139 
   1140             // Skip parenthesis
   1141             ++currentCharacter<SrcCharacterType>();
   1142             ++result;
   1143             ++yylval->string.m_length;
   1144 
   1145             if (m_token == URI) {
   1146                 m_token = FUNCTION;
   1147                 // Check whether it is really an URI.
   1148                 if (yylval->string.is8Bit())
   1149                     parseURI<LChar>(yylval->string);
   1150                 else
   1151                     parseURI<UChar>(yylval->string);
   1152             }
   1153         } else if (UNLIKELY(m_parsingMode != NormalMode) && !hasEscape) {
   1154             if (m_parsingMode == MediaQueryMode) {
   1155                 detectMediaQueryToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
   1156             } else if (m_parsingMode == SupportsMode) {
   1157                 detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
   1158             } else if (m_parsingMode == NthChildMode && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[0], 'n')) {
   1159                 if (result - tokenStart<SrcCharacterType>() == 1) {
   1160                     // String "n" is IDENT but "n+1" is NTH.
   1161                     if (parseNthChildExtra<SrcCharacterType>()) {
   1162                         m_token = NTH;
   1163                         yylval->string.m_length = currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>();
   1164                     }
   1165                 } else if (result - tokenStart<SrcCharacterType>() >= 2 && tokenStart<SrcCharacterType>()[1] == '-') {
   1166                     // String "n-" is IDENT but "n-1" is NTH.
   1167                     // Set currentCharacter to '-' to continue parsing.
   1168                     SrcCharacterType* nextCharacter = result;
   1169                     currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 1;
   1170                     if (parseNthChildExtra<SrcCharacterType>()) {
   1171                         m_token = NTH;
   1172                         yylval->string.setLength(currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
   1173                     } else {
   1174                         // Revert the change to currentCharacter if unsuccessful.
   1175                         currentCharacter<SrcCharacterType>() = nextCharacter;
   1176                     }
   1177                 }
   1178             }
   1179         } else if (UNLIKELY(RuntimeEnabledFeatures::cssVariablesEnabled())) {
   1180             detectCSSVariableDefinitionToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
   1181         }
   1182         break;
   1183 
   1184     case CharacterDot:
   1185         if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0]))
   1186             break;
   1187         // Fall through to CharacterNumber.
   1188 
   1189     case CharacterNumber: {
   1190         bool dotSeen = (m_token == '.');
   1191 
   1192         while (true) {
   1193             if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0])) {
   1194                 // Only one dot is allowed for a number,
   1195                 // and it must be followed by a digit.
   1196                 if (currentCharacter<SrcCharacterType>()[0] != '.' || dotSeen || !isASCIIDigit(currentCharacter<SrcCharacterType>()[1]))
   1197                     break;
   1198                 dotSeen = true;
   1199             }
   1200             ++currentCharacter<SrcCharacterType>();
   1201         }
   1202 
   1203         if (UNLIKELY(m_parsingMode == NthChildMode) && !dotSeen && isASCIIAlphaCaselessEqual(*currentCharacter<SrcCharacterType>(), 'n')) {
   1204             // "[0-9]+n" is always an NthChild.
   1205             ++currentCharacter<SrcCharacterType>();
   1206             parseNthChildExtra<SrcCharacterType>();
   1207             m_token = NTH;
   1208             yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
   1209             break;
   1210         }
   1211 
   1212         // Use SVG parser for numbers on SVG presentation attributes.
   1213         if (isSVGNumberParsingEnabledForMode(m_parser.m_context.mode())) {
   1214             // We need to take care of units like 'em' or 'ex'.
   1215             SrcCharacterType* character = currentCharacter<SrcCharacterType>();
   1216             if (isASCIIAlphaCaselessEqual(*character, 'e')) {
   1217                 ASSERT(character - tokenStart<SrcCharacterType>() > 0);
   1218                 ++character;
   1219                 if (*character == '-' || *character == '+' || isASCIIDigit(*character)) {
   1220                     ++character;
   1221                     while (isASCIIDigit(*character))
   1222                         ++character;
   1223                     // Use FLOATTOKEN if the string contains exponents.
   1224                     dotSeen = true;
   1225                     currentCharacter<SrcCharacterType>() = character;
   1226                 }
   1227             }
   1228             if (!parseSVGNumber(tokenStart<SrcCharacterType>(), character - tokenStart<SrcCharacterType>(), yylval->number))
   1229                 break;
   1230         } else {
   1231             yylval->number = charactersToDouble(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
   1232         }
   1233 
   1234         // Type of the function.
   1235         if (isIdentifierStart<SrcCharacterType>()) {
   1236             SrcCharacterType* type = currentCharacter<SrcCharacterType>();
   1237             result = currentCharacter<SrcCharacterType>();
   1238 
   1239             parseIdentifier(result, resultString, hasEscape);
   1240 
   1241             m_token = DIMEN;
   1242             if (!hasEscape)
   1243                 detectNumberToken(type, currentCharacter<SrcCharacterType>() - type);
   1244 
   1245             if (m_token == DIMEN) {
   1246                 // The decoded number is overwritten, but this is intentional.
   1247                 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
   1248             }
   1249         } else if (*currentCharacter<SrcCharacterType>() == '%') {
   1250             // Although the CSS grammar says {num}% we follow
   1251             // webkit at the moment which uses {num}%+.
   1252             do {
   1253                 ++currentCharacter<SrcCharacterType>();
   1254             } while (*currentCharacter<SrcCharacterType>() == '%');
   1255             m_token = PERCENTAGE;
   1256         } else {
   1257             m_token = dotSeen ? FLOATTOKEN : INTEGER;
   1258         }
   1259         break;
   1260     }
   1261 
   1262     case CharacterDash:
   1263         if (isIdentifierStartAfterDash(currentCharacter<SrcCharacterType>())) {
   1264             --currentCharacter<SrcCharacterType>();
   1265             parseIdentifier(result, resultString, hasEscape);
   1266             m_token = IDENT;
   1267 
   1268             if (*currentCharacter<SrcCharacterType>() == '(') {
   1269                 m_token = FUNCTION;
   1270                 if (!hasEscape)
   1271                     detectDashToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>());
   1272                 ++currentCharacter<SrcCharacterType>();
   1273                 ++result;
   1274             } else if (UNLIKELY(m_parsingMode == NthChildMode) && !hasEscape && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[1], 'n')) {
   1275                 if (result - tokenStart<SrcCharacterType>() == 2) {
   1276                     // String "-n" is IDENT but "-n+1" is NTH.
   1277                     if (parseNthChildExtra<SrcCharacterType>()) {
   1278                         m_token = NTH;
   1279                         result = currentCharacter<SrcCharacterType>();
   1280                     }
   1281                 } else if (result - tokenStart<SrcCharacterType>() >= 3 && tokenStart<SrcCharacterType>()[2] == '-') {
   1282                     // String "-n-" is IDENT but "-n-1" is NTH.
   1283                     // Set currentCharacter to second '-' of '-n-' to continue parsing.
   1284                     SrcCharacterType* nextCharacter = result;
   1285                     currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 2;
   1286                     if (parseNthChildExtra<SrcCharacterType>()) {
   1287                         m_token = NTH;
   1288                         result = currentCharacter<SrcCharacterType>();
   1289                     } else {
   1290                         // Revert the change to currentCharacter if unsuccessful.
   1291                         currentCharacter<SrcCharacterType>() = nextCharacter;
   1292                     }
   1293                 }
   1294             }
   1295             resultString.setLength(result - tokenStart<SrcCharacterType>());
   1296             yylval->string = resultString;
   1297         } else if (currentCharacter<SrcCharacterType>()[0] == '-' && currentCharacter<SrcCharacterType>()[1] == '>') {
   1298             currentCharacter<SrcCharacterType>() += 2;
   1299             m_token = SGML_CD;
   1300         } else if (UNLIKELY(m_parsingMode == NthChildMode)) {
   1301             // "-[0-9]+n" is always an NthChild.
   1302             if (parseNthChild<SrcCharacterType>()) {
   1303                 parseNthChildExtra<SrcCharacterType>();
   1304                 m_token = NTH;
   1305                 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
   1306             }
   1307         }
   1308         break;
   1309 
   1310     case CharacterOther:
   1311         // m_token is simply the current character.
   1312         break;
   1313 
   1314     case CharacterNull:
   1315         // Do not advance pointer at the end of input.
   1316         --currentCharacter<SrcCharacterType>();
   1317         break;
   1318 
   1319     case CharacterWhiteSpace:
   1320         m_token = WHITESPACE;
   1321         // Might start with a '\n'.
   1322         --currentCharacter<SrcCharacterType>();
   1323         do {
   1324             if (*currentCharacter<SrcCharacterType>() == '\n')
   1325                 ++m_lineNumber;
   1326             ++currentCharacter<SrcCharacterType>();
   1327         } while (*currentCharacter<SrcCharacterType>() <= ' ' && (typesOfASCIICharacters[*currentCharacter<SrcCharacterType>()] == CharacterWhiteSpace));
   1328         break;
   1329 
   1330     case CharacterEndMediaQueryOrSupports:
   1331         if (m_parsingMode == MediaQueryMode || m_parsingMode == SupportsMode)
   1332             m_parsingMode = NormalMode;
   1333         break;
   1334 
   1335     case CharacterEndNthChild:
   1336         if (m_parsingMode == NthChildMode)
   1337             m_parsingMode = NormalMode;
   1338         break;
   1339 
   1340     case CharacterQuote:
   1341         if (checkAndSkipString(currentCharacter<SrcCharacterType>(), m_token)) {
   1342             ++result;
   1343             parseString<SrcCharacterType>(result, yylval->string, m_token);
   1344             m_token = STRING;
   1345         }
   1346         break;
   1347 
   1348     case CharacterExclamationMark: {
   1349         SrcCharacterType* start = skipWhiteSpace(currentCharacter<SrcCharacterType>());
   1350         if (isEqualToCSSIdentifier(start, "important")) {
   1351             m_token = IMPORTANT_SYM;
   1352             currentCharacter<SrcCharacterType>() = start + 9;
   1353         }
   1354         break;
   1355     }
   1356 
   1357     case CharacterHashmark: {
   1358         SrcCharacterType* start = currentCharacter<SrcCharacterType>();
   1359         result = currentCharacter<SrcCharacterType>();
   1360 
   1361         if (isASCIIDigit(*currentCharacter<SrcCharacterType>())) {
   1362             // This must be a valid hex number token.
   1363             do {
   1364                 ++currentCharacter<SrcCharacterType>();
   1365             } while (isASCIIHexDigit(*currentCharacter<SrcCharacterType>()));
   1366             m_token = HEX;
   1367             yylval->string.init(start, currentCharacter<SrcCharacterType>() - start);
   1368         } else if (isIdentifierStart<SrcCharacterType>()) {
   1369             m_token = IDSEL;
   1370             parseIdentifier(result, yylval->string, hasEscape);
   1371             if (!hasEscape) {
   1372                 // Check whether the identifier is also a valid hex number.
   1373                 SrcCharacterType* current = start;
   1374                 m_token = HEX;
   1375                 do {
   1376                     if (!isASCIIHexDigit(*current)) {
   1377                         m_token = IDSEL;
   1378                         break;
   1379                     }
   1380                     ++current;
   1381                 } while (current < result);
   1382             }
   1383         }
   1384         break;
   1385     }
   1386 
   1387     case CharacterSlash:
   1388         // Ignore comments. They are not even considered as white spaces.
   1389         if (*currentCharacter<SrcCharacterType>() == '*') {
   1390             const CSSParserLocation startLocation = currentLocation();
   1391             if (m_parser.m_sourceDataHandler) {
   1392                 unsigned startOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>() - 1; // Start with a slash.
   1393                 m_parser.m_sourceDataHandler->startComment(startOffset - m_parsedTextPrefixLength);
   1394             }
   1395             ++currentCharacter<SrcCharacterType>();
   1396             while (currentCharacter<SrcCharacterType>()[0] != '*' || currentCharacter<SrcCharacterType>()[1] != '/') {
   1397                 if (*currentCharacter<SrcCharacterType>() == '\n')
   1398                     ++m_lineNumber;
   1399                 if (*currentCharacter<SrcCharacterType>() == '\0') {
   1400                     // Unterminated comments are simply ignored.
   1401                     currentCharacter<SrcCharacterType>() -= 2;
   1402                     m_parser.reportError(startLocation, CSSParser::UnterminatedCommentError);
   1403                     break;
   1404                 }
   1405                 ++currentCharacter<SrcCharacterType>();
   1406             }
   1407             currentCharacter<SrcCharacterType>() += 2;
   1408             if (m_parser.m_sourceDataHandler) {
   1409                 unsigned endOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>();
   1410                 unsigned userTextEndOffset = static_cast<unsigned>(m_length - 1 - m_parsedTextSuffixLength);
   1411                 m_parser.m_sourceDataHandler->endComment(std::min(endOffset, userTextEndOffset) - m_parsedTextPrefixLength);
   1412             }
   1413             goto restartAfterComment;
   1414         }
   1415         break;
   1416 
   1417     case CharacterDollar:
   1418         if (*currentCharacter<SrcCharacterType>() == '=') {
   1419             ++currentCharacter<SrcCharacterType>();
   1420             m_token = ENDSWITH;
   1421         }
   1422         break;
   1423 
   1424     case CharacterAsterisk:
   1425         if (*currentCharacter<SrcCharacterType>() == '=') {
   1426             ++currentCharacter<SrcCharacterType>();
   1427             m_token = CONTAINS;
   1428         }
   1429         break;
   1430 
   1431     case CharacterPlus:
   1432         if (UNLIKELY(m_parsingMode == NthChildMode)) {
   1433             // Simplest case. "+[0-9]*n" is always NthChild.
   1434             if (parseNthChild<SrcCharacterType>()) {
   1435                 parseNthChildExtra<SrcCharacterType>();
   1436                 m_token = NTH;
   1437                 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>());
   1438             }
   1439         }
   1440         break;
   1441 
   1442     case CharacterLess:
   1443         if (currentCharacter<SrcCharacterType>()[0] == '!' && currentCharacter<SrcCharacterType>()[1] == '-' && currentCharacter<SrcCharacterType>()[2] == '-') {
   1444             currentCharacter<SrcCharacterType>() += 3;
   1445             m_token = SGML_CD;
   1446         }
   1447         break;
   1448 
   1449     case CharacterAt:
   1450         if (isIdentifierStart<SrcCharacterType>()) {
   1451             m_token = ATKEYWORD;
   1452             ++result;
   1453             parseIdentifier(result, resultString, hasEscape);
   1454             detectAtToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>(), hasEscape);
   1455         }
   1456         break;
   1457 
   1458     case CharacterBackSlash:
   1459         if (isCSSEscape(*currentCharacter<SrcCharacterType>())) {
   1460             --currentCharacter<SrcCharacterType>();
   1461             parseIdentifier(result, yylval->string, hasEscape);
   1462             m_token = IDENT;
   1463         }
   1464         break;
   1465 
   1466     case CharacterXor:
   1467         if (*currentCharacter<SrcCharacterType>() == '=') {
   1468             ++currentCharacter<SrcCharacterType>();
   1469             m_token = BEGINSWITH;
   1470         }
   1471         break;
   1472 
   1473     case CharacterVerticalBar:
   1474         if (*currentCharacter<SrcCharacterType>() == '=') {
   1475             ++currentCharacter<SrcCharacterType>();
   1476             m_token = DASHMATCH;
   1477         }
   1478         break;
   1479 
   1480     case CharacterTilde:
   1481         if (*currentCharacter<SrcCharacterType>() == '=') {
   1482             ++currentCharacter<SrcCharacterType>();
   1483             m_token = INCLUDES;
   1484         }
   1485         break;
   1486 
   1487     default:
   1488         ASSERT_NOT_REACHED();
   1489         break;
   1490     }
   1491 
   1492     return m_token;
   1493 }
   1494 
   1495 template <>
   1496 inline void CSSTokenizer::setTokenStart<LChar>(LChar* tokenStart)
   1497 {
   1498     m_tokenStart.ptr8 = tokenStart;
   1499 }
   1500 
   1501 template <>
   1502 inline void CSSTokenizer::setTokenStart<UChar>(UChar* tokenStart)
   1503 {
   1504     m_tokenStart.ptr16 = tokenStart;
   1505 }
   1506 
   1507 void CSSTokenizer::setupTokenizer(const char* prefix, unsigned prefixLength, const String& string, const char* suffix, unsigned suffixLength)
   1508 {
   1509     m_parsedTextPrefixLength = prefixLength;
   1510     m_parsedTextSuffixLength = suffixLength;
   1511     unsigned stringLength = string.length();
   1512     unsigned length = stringLength + m_parsedTextPrefixLength + m_parsedTextSuffixLength + 1;
   1513     m_length = length;
   1514 
   1515     if (!stringLength || string.is8Bit()) {
   1516         m_dataStart8 = adoptArrayPtr(new LChar[length]);
   1517         for (unsigned i = 0; i < m_parsedTextPrefixLength; i++)
   1518             m_dataStart8[i] = prefix[i];
   1519 
   1520         if (stringLength)
   1521             memcpy(m_dataStart8.get() + m_parsedTextPrefixLength, string.characters8(), stringLength * sizeof(LChar));
   1522 
   1523         unsigned start = m_parsedTextPrefixLength + stringLength;
   1524         unsigned end = start + suffixLength;
   1525         for (unsigned i = start; i < end; i++)
   1526             m_dataStart8[i] = suffix[i - start];
   1527 
   1528         m_dataStart8[length - 1] = 0;
   1529 
   1530         m_is8BitSource = true;
   1531         m_currentCharacter8 = m_dataStart8.get();
   1532         m_currentCharacter16 = 0;
   1533         setTokenStart<LChar>(m_currentCharacter8);
   1534         m_lexFunc = &CSSTokenizer::realLex<LChar>;
   1535         return;
   1536     }
   1537 
   1538     m_dataStart16 = adoptArrayPtr(new UChar[length]);
   1539     for (unsigned i = 0; i < m_parsedTextPrefixLength; i++)
   1540         m_dataStart16[i] = prefix[i];
   1541 
   1542     ASSERT(stringLength);
   1543     memcpy(m_dataStart16.get() + m_parsedTextPrefixLength, string.characters16(), stringLength * sizeof(UChar));
   1544 
   1545     unsigned start = m_parsedTextPrefixLength + stringLength;
   1546     unsigned end = start + suffixLength;
   1547     for (unsigned i = start; i < end; i++)
   1548         m_dataStart16[i] = suffix[i - start];
   1549 
   1550     m_dataStart16[length - 1] = 0;
   1551 
   1552     m_is8BitSource = false;
   1553     m_currentCharacter8 = 0;
   1554     m_currentCharacter16 = m_dataStart16.get();
   1555     setTokenStart<UChar>(m_currentCharacter16);
   1556     m_lexFunc = &CSSTokenizer::realLex<UChar>;
   1557 }
   1558 
   1559 } // namespace WebCore
   1560