1 /* 2 * Copyright (C) 2003 Lars Knoll (knoll (at) kde.org) 3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde (at) carewolf.com) 4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved. 5 * Copyright (C) 2007 Nicholas Shanks <webkit (at) nickshanks.com> 6 * Copyright (C) 2008 Eric Seidel <eric (at) webkit.org> 7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) 8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved. 9 * Copyright (C) 2012 Intel Corporation. All rights reserved. 10 * 11 * This library is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU Library General Public 13 * License as published by the Free Software Foundation; either 14 * version 2 of the License, or (at your option) any later version. 15 * 16 * This library is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * Library General Public License for more details. 20 * 21 * You should have received a copy of the GNU Library General Public License 22 * along with this library; see the file COPYING.LIB. If not, write to 23 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 24 * Boston, MA 02110-1301, USA. 25 */ 26 27 #include "config.h" 28 #include "core/css/CSSTokenizer.h" 29 30 #include "core/css/CSSKeyframeRule.h" 31 #include "core/css/CSSParser.h" 32 #include "core/css/CSSParserValues.h" 33 #include "core/css/MediaQuery.h" 34 #include "core/css/StyleRule.h" 35 #include "core/html/parser/HTMLParserIdioms.h" 36 #include "core/svg/SVGParserUtilities.h" 37 38 namespace WebCore { 39 40 #include "CSSGrammar.h" 41 42 enum CharacterType { 43 // Types for the main switch. 44 45 // The first 4 types must be grouped together, as they 46 // represent the allowed chars in an identifier. 47 CharacterCaselessU, 48 CharacterIdentifierStart, 49 CharacterNumber, 50 CharacterDash, 51 52 CharacterOther, 53 CharacterNull, 54 CharacterWhiteSpace, 55 CharacterEndMediaQueryOrSupports, 56 CharacterEndNthChild, 57 CharacterQuote, 58 CharacterExclamationMark, 59 CharacterHashmark, 60 CharacterDollar, 61 CharacterAsterisk, 62 CharacterPlus, 63 CharacterDot, 64 CharacterSlash, 65 CharacterLess, 66 CharacterAt, 67 CharacterBackSlash, 68 CharacterXor, 69 CharacterVerticalBar, 70 CharacterTilde, 71 }; 72 73 // 128 ASCII codes 74 static const CharacterType typesOfASCIICharacters[128] = { 75 /* 0 - Null */ CharacterNull, 76 /* 1 - Start of Heading */ CharacterOther, 77 /* 2 - Start of Text */ CharacterOther, 78 /* 3 - End of Text */ CharacterOther, 79 /* 4 - End of Transm. */ CharacterOther, 80 /* 5 - Enquiry */ CharacterOther, 81 /* 6 - Acknowledgment */ CharacterOther, 82 /* 7 - Bell */ CharacterOther, 83 /* 8 - Back Space */ CharacterOther, 84 /* 9 - Horizontal Tab */ CharacterWhiteSpace, 85 /* 10 - Line Feed */ CharacterWhiteSpace, 86 /* 11 - Vertical Tab */ CharacterOther, 87 /* 12 - Form Feed */ CharacterWhiteSpace, 88 /* 13 - Carriage Return */ CharacterWhiteSpace, 89 /* 14 - Shift Out */ CharacterOther, 90 /* 15 - Shift In */ CharacterOther, 91 /* 16 - Data Line Escape */ CharacterOther, 92 /* 17 - Device Control 1 */ CharacterOther, 93 /* 18 - Device Control 2 */ CharacterOther, 94 /* 19 - Device Control 3 */ CharacterOther, 95 /* 20 - Device Control 4 */ CharacterOther, 96 /* 21 - Negative Ack. */ CharacterOther, 97 /* 22 - Synchronous Idle */ CharacterOther, 98 /* 23 - End of Transmit */ CharacterOther, 99 /* 24 - Cancel */ CharacterOther, 100 /* 25 - End of Medium */ CharacterOther, 101 /* 26 - Substitute */ CharacterOther, 102 /* 27 - Escape */ CharacterOther, 103 /* 28 - File Separator */ CharacterOther, 104 /* 29 - Group Separator */ CharacterOther, 105 /* 30 - Record Separator */ CharacterOther, 106 /* 31 - Unit Separator */ CharacterOther, 107 /* 32 - Space */ CharacterWhiteSpace, 108 /* 33 - ! */ CharacterExclamationMark, 109 /* 34 - " */ CharacterQuote, 110 /* 35 - # */ CharacterHashmark, 111 /* 36 - $ */ CharacterDollar, 112 /* 37 - % */ CharacterOther, 113 /* 38 - & */ CharacterOther, 114 /* 39 - ' */ CharacterQuote, 115 /* 40 - ( */ CharacterOther, 116 /* 41 - ) */ CharacterEndNthChild, 117 /* 42 - * */ CharacterAsterisk, 118 /* 43 - + */ CharacterPlus, 119 /* 44 - , */ CharacterOther, 120 /* 45 - - */ CharacterDash, 121 /* 46 - . */ CharacterDot, 122 /* 47 - / */ CharacterSlash, 123 /* 48 - 0 */ CharacterNumber, 124 /* 49 - 1 */ CharacterNumber, 125 /* 50 - 2 */ CharacterNumber, 126 /* 51 - 3 */ CharacterNumber, 127 /* 52 - 4 */ CharacterNumber, 128 /* 53 - 5 */ CharacterNumber, 129 /* 54 - 6 */ CharacterNumber, 130 /* 55 - 7 */ CharacterNumber, 131 /* 56 - 8 */ CharacterNumber, 132 /* 57 - 9 */ CharacterNumber, 133 /* 58 - : */ CharacterOther, 134 /* 59 - ; */ CharacterEndMediaQueryOrSupports, 135 /* 60 - < */ CharacterLess, 136 /* 61 - = */ CharacterOther, 137 /* 62 - > */ CharacterOther, 138 /* 63 - ? */ CharacterOther, 139 /* 64 - @ */ CharacterAt, 140 /* 65 - A */ CharacterIdentifierStart, 141 /* 66 - B */ CharacterIdentifierStart, 142 /* 67 - C */ CharacterIdentifierStart, 143 /* 68 - D */ CharacterIdentifierStart, 144 /* 69 - E */ CharacterIdentifierStart, 145 /* 70 - F */ CharacterIdentifierStart, 146 /* 71 - G */ CharacterIdentifierStart, 147 /* 72 - H */ CharacterIdentifierStart, 148 /* 73 - I */ CharacterIdentifierStart, 149 /* 74 - J */ CharacterIdentifierStart, 150 /* 75 - K */ CharacterIdentifierStart, 151 /* 76 - L */ CharacterIdentifierStart, 152 /* 77 - M */ CharacterIdentifierStart, 153 /* 78 - N */ CharacterIdentifierStart, 154 /* 79 - O */ CharacterIdentifierStart, 155 /* 80 - P */ CharacterIdentifierStart, 156 /* 81 - Q */ CharacterIdentifierStart, 157 /* 82 - R */ CharacterIdentifierStart, 158 /* 83 - S */ CharacterIdentifierStart, 159 /* 84 - T */ CharacterIdentifierStart, 160 /* 85 - U */ CharacterCaselessU, 161 /* 86 - V */ CharacterIdentifierStart, 162 /* 87 - W */ CharacterIdentifierStart, 163 /* 88 - X */ CharacterIdentifierStart, 164 /* 89 - Y */ CharacterIdentifierStart, 165 /* 90 - Z */ CharacterIdentifierStart, 166 /* 91 - [ */ CharacterOther, 167 /* 92 - \ */ CharacterBackSlash, 168 /* 93 - ] */ CharacterOther, 169 /* 94 - ^ */ CharacterXor, 170 /* 95 - _ */ CharacterIdentifierStart, 171 /* 96 - ` */ CharacterOther, 172 /* 97 - a */ CharacterIdentifierStart, 173 /* 98 - b */ CharacterIdentifierStart, 174 /* 99 - c */ CharacterIdentifierStart, 175 /* 100 - d */ CharacterIdentifierStart, 176 /* 101 - e */ CharacterIdentifierStart, 177 /* 102 - f */ CharacterIdentifierStart, 178 /* 103 - g */ CharacterIdentifierStart, 179 /* 104 - h */ CharacterIdentifierStart, 180 /* 105 - i */ CharacterIdentifierStart, 181 /* 106 - j */ CharacterIdentifierStart, 182 /* 107 - k */ CharacterIdentifierStart, 183 /* 108 - l */ CharacterIdentifierStart, 184 /* 109 - m */ CharacterIdentifierStart, 185 /* 110 - n */ CharacterIdentifierStart, 186 /* 111 - o */ CharacterIdentifierStart, 187 /* 112 - p */ CharacterIdentifierStart, 188 /* 113 - q */ CharacterIdentifierStart, 189 /* 114 - r */ CharacterIdentifierStart, 190 /* 115 - s */ CharacterIdentifierStart, 191 /* 116 - t */ CharacterIdentifierStart, 192 /* 117 - u */ CharacterCaselessU, 193 /* 118 - v */ CharacterIdentifierStart, 194 /* 119 - w */ CharacterIdentifierStart, 195 /* 120 - x */ CharacterIdentifierStart, 196 /* 121 - y */ CharacterIdentifierStart, 197 /* 122 - z */ CharacterIdentifierStart, 198 /* 123 - { */ CharacterEndMediaQueryOrSupports, 199 /* 124 - | */ CharacterVerticalBar, 200 /* 125 - } */ CharacterOther, 201 /* 126 - ~ */ CharacterTilde, 202 /* 127 - Delete */ CharacterOther, 203 }; 204 205 // Utility functions for the CSS tokenizer. 206 207 template <typename CharacterType> 208 static inline bool isCSSLetter(CharacterType character) 209 { 210 return character >= 128 || typesOfASCIICharacters[character] <= CharacterDash; 211 } 212 213 template <typename CharacterType> 214 static inline bool isCSSEscape(CharacterType character) 215 { 216 return character >= ' ' && character != 127; 217 } 218 219 template <typename CharacterType> 220 static inline bool isURILetter(CharacterType character) 221 { 222 return (character >= '*' && character != 127) || (character >= '#' && character <= '&') || character == '!'; 223 } 224 225 template <typename CharacterType> 226 static inline bool isIdentifierStartAfterDash(CharacterType* currentCharacter) 227 { 228 return isASCIIAlpha(currentCharacter[0]) || currentCharacter[0] == '_' || currentCharacter[0] >= 128 229 || (currentCharacter[0] == '\\' && isCSSEscape(currentCharacter[1])); 230 } 231 232 template <typename CharacterType> 233 static inline bool isEqualToCSSIdentifier(CharacterType* cssString, const char* constantString) 234 { 235 // Compare an character memory data with a zero terminated string. 236 do { 237 // The input must be part of an identifier if constantChar or constString 238 // contains '-'. Otherwise toASCIILowerUnchecked('\r') would be equal to '-'. 239 ASSERT((*constantString >= 'a' && *constantString <= 'z') || *constantString == '-'); 240 ASSERT(*constantString != '-' || isCSSLetter(*cssString)); 241 if (toASCIILowerUnchecked(*cssString++) != (*constantString++)) 242 return false; 243 } while (*constantString); 244 return true; 245 } 246 247 template <typename CharacterType> 248 static inline bool isEqualToCSSCaseSensitiveIdentifier(CharacterType* string, const char* constantString) 249 { 250 ASSERT(*constantString); 251 252 do { 253 if (*string++ != *constantString++) 254 return false; 255 } while (*constantString); 256 return true; 257 } 258 259 template <typename CharacterType> 260 static CharacterType* checkAndSkipEscape(CharacterType* currentCharacter) 261 { 262 // Returns with 0, if escape check is failed. Otherwise 263 // it returns with the following character. 264 ASSERT(*currentCharacter == '\\'); 265 266 ++currentCharacter; 267 if (!isCSSEscape(*currentCharacter)) 268 return 0; 269 270 if (isASCIIHexDigit(*currentCharacter)) { 271 int length = 6; 272 273 do { 274 ++currentCharacter; 275 } while (isASCIIHexDigit(*currentCharacter) && --length); 276 277 // Optional space after the escape sequence. 278 if (isHTMLSpace<CharacterType>(*currentCharacter)) 279 ++currentCharacter; 280 return currentCharacter; 281 } 282 return currentCharacter + 1; 283 } 284 285 template <typename CharacterType> 286 static inline CharacterType* skipWhiteSpace(CharacterType* currentCharacter) 287 { 288 while (isHTMLSpace<CharacterType>(*currentCharacter)) 289 ++currentCharacter; 290 return currentCharacter; 291 } 292 293 // Main CSS tokenizer functions. 294 295 template <> 296 inline LChar*& CSSTokenizer::currentCharacter<LChar>() 297 { 298 return m_currentCharacter8; 299 } 300 301 template <> 302 inline UChar*& CSSTokenizer::currentCharacter<UChar>() 303 { 304 return m_currentCharacter16; 305 } 306 307 UChar*& CSSTokenizer::currentCharacter16() 308 { 309 if (!m_currentCharacter16) { 310 m_dataStart16 = adoptArrayPtr(new UChar[m_length]); 311 m_currentCharacter16 = m_dataStart16.get(); 312 } 313 314 return m_currentCharacter16; 315 } 316 317 template <> 318 inline LChar* CSSTokenizer::dataStart<LChar>() 319 { 320 return m_dataStart8.get(); 321 } 322 323 template <> 324 inline UChar* CSSTokenizer::dataStart<UChar>() 325 { 326 return m_dataStart16.get(); 327 } 328 329 template <typename CharacterType> 330 inline CSSParserLocation CSSTokenizer::tokenLocation() 331 { 332 CSSParserLocation location; 333 location.token.init(tokenStart<CharacterType>(), currentCharacter<CharacterType>() - tokenStart<CharacterType>()); 334 location.lineNumber = m_tokenStartLineNumber; 335 location.offset = tokenStart<CharacterType>() - dataStart<CharacterType>(); 336 return location; 337 } 338 339 CSSParserLocation CSSTokenizer::currentLocation() 340 { 341 if (is8BitSource()) 342 return tokenLocation<LChar>(); 343 return tokenLocation<UChar>(); 344 } 345 346 template <typename CharacterType> 347 inline bool CSSTokenizer::isIdentifierStart() 348 { 349 // Check whether an identifier is started. 350 return isIdentifierStartAfterDash((*currentCharacter<CharacterType>() != '-') ? currentCharacter<CharacterType>() : currentCharacter<CharacterType>() + 1); 351 } 352 353 template <typename CharacterType> 354 static inline CharacterType* checkAndSkipString(CharacterType* currentCharacter, int quote) 355 { 356 // Returns with 0, if string check is failed. Otherwise 357 // it returns with the following character. This is necessary 358 // since we cannot revert escape sequences, thus strings 359 // must be validated before parsing. 360 while (true) { 361 if (UNLIKELY(*currentCharacter == quote)) { 362 // String parsing is successful. 363 return currentCharacter + 1; 364 } 365 if (UNLIKELY(!*currentCharacter)) { 366 // String parsing is successful up to end of input. 367 return currentCharacter; 368 } 369 if (UNLIKELY(*currentCharacter <= '\r' && (*currentCharacter == '\n' || (*currentCharacter | 0x1) == '\r'))) { 370 // String parsing is failed for character '\n', '\f' or '\r'. 371 return 0; 372 } 373 374 if (LIKELY(currentCharacter[0] != '\\')) { 375 ++currentCharacter; 376 } else if (currentCharacter[1] == '\n' || currentCharacter[1] == '\f') { 377 currentCharacter += 2; 378 } else if (currentCharacter[1] == '\r') { 379 currentCharacter += currentCharacter[2] == '\n' ? 3 : 2; 380 } else { 381 currentCharacter = checkAndSkipEscape(currentCharacter); 382 if (!currentCharacter) 383 return 0; 384 } 385 } 386 } 387 388 template <typename CharacterType> 389 unsigned CSSTokenizer::parseEscape(CharacterType*& src) 390 { 391 ASSERT(*src == '\\' && isCSSEscape(src[1])); 392 393 unsigned unicode = 0; 394 395 ++src; 396 if (isASCIIHexDigit(*src)) { 397 398 int length = 6; 399 400 do { 401 unicode = (unicode << 4) + toASCIIHexValue(*src++); 402 } while (--length && isASCIIHexDigit(*src)); 403 404 // Characters above 0x10ffff are not handled. 405 if (unicode > 0x10ffff) 406 unicode = 0xfffd; 407 408 // Optional space after the escape sequence. 409 if (isHTMLSpace<CharacterType>(*src)) 410 ++src; 411 412 return unicode; 413 } 414 415 return *currentCharacter<CharacterType>()++; 416 } 417 418 template <> 419 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode) 420 { 421 ASSERT(unicode <= 0xff); 422 *result = unicode; 423 424 ++result; 425 } 426 427 template <> 428 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode) 429 { 430 // Replace unicode with a surrogate pairs when it is bigger than 0xffff 431 if (U16_LENGTH(unicode) == 2) { 432 *result++ = U16_LEAD(unicode); 433 *result = U16_TRAIL(unicode); 434 } else { 435 *result = unicode; 436 } 437 438 ++result; 439 } 440 441 template <typename SrcCharacterType, typename DestCharacterType> 442 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCharacterType*& result, bool& hasEscape) 443 { 444 hasEscape = false; 445 do { 446 if (LIKELY(*src != '\\')) { 447 *result++ = *src++; 448 } else { 449 hasEscape = true; 450 SrcCharacterType* savedEscapeStart = src; 451 unsigned unicode = parseEscape<SrcCharacterType>(src); 452 if (unicode > 0xff && sizeof(DestCharacterType) == 1) { 453 src = savedEscapeStart; 454 return false; 455 } 456 UnicodeToChars(result, unicode); 457 } 458 } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1]))); 459 460 return true; 461 } 462 463 template <typename CharacterType> 464 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserString& resultString, bool& hasEscape) 465 { 466 // If a valid identifier start is found, we can safely 467 // parse the identifier until the next invalid character. 468 ASSERT(isIdentifierStart<CharacterType>()); 469 470 CharacterType* start = currentCharacter<CharacterType>(); 471 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), result, hasEscape))) { 472 // Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue 473 ASSERT(is8BitSource()); 474 UChar*& result16 = currentCharacter16(); 475 UChar* start16 = result16; 476 int i = 0; 477 for (; i < result - start; i++) 478 result16[i] = start[i]; 479 480 result16 += i; 481 482 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, hasEscape); 483 484 resultString.init(start16, result16 - start16); 485 486 return; 487 } 488 489 resultString.init(start, result - start); 490 } 491 492 template <typename SrcCharacterType, typename DestCharacterType> 493 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharacterType*& result, UChar quote) 494 { 495 while (true) { 496 if (UNLIKELY(*src == quote)) { 497 // String parsing is done. 498 ++src; 499 return true; 500 } 501 if (UNLIKELY(!*src)) { 502 // String parsing is done, but don't advance pointer if at the end of input. 503 return true; 504 } 505 ASSERT(*src > '\r' || (*src < '\n' && *src) || *src == '\v'); 506 507 if (LIKELY(src[0] != '\\')) { 508 *result++ = *src++; 509 } else if (src[1] == '\n' || src[1] == '\f') { 510 src += 2; 511 } else if (src[1] == '\r') { 512 src += src[2] == '\n' ? 3 : 2; 513 } else { 514 SrcCharacterType* savedEscapeStart = src; 515 unsigned unicode = parseEscape<SrcCharacterType>(src); 516 if (unicode > 0xff && sizeof(DestCharacterType) == 1) { 517 src = savedEscapeStart; 518 return false; 519 } 520 UnicodeToChars(result, unicode); 521 } 522 } 523 524 return true; 525 } 526 527 template <typename CharacterType> 528 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& resultString, UChar quote) 529 { 530 CharacterType* start = currentCharacter<CharacterType>(); 531 532 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) { 533 // Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue 534 ASSERT(is8BitSource()); 535 UChar*& result16 = currentCharacter16(); 536 UChar* start16 = result16; 537 int i = 0; 538 for (; i < result - start; i++) 539 result16[i] = start[i]; 540 541 result16 += i; 542 543 parseStringInternal(currentCharacter<CharacterType>(), result16, quote); 544 545 resultString.init(start16, result16 - start16); 546 return; 547 } 548 549 resultString.init(start, result - start); 550 } 551 552 template <typename CharacterType> 553 inline bool CSSTokenizer::findURI(CharacterType*& start, CharacterType*& end, UChar& quote) 554 { 555 start = skipWhiteSpace(currentCharacter<CharacterType>()); 556 557 if (*start == '"' || *start == '\'') { 558 quote = *start++; 559 end = checkAndSkipString(start, quote); 560 if (!end) 561 return false; 562 } else { 563 quote = 0; 564 end = start; 565 while (isURILetter(*end)) { 566 if (LIKELY(*end != '\\')) { 567 ++end; 568 } else { 569 end = checkAndSkipEscape(end); 570 if (!end) 571 return false; 572 } 573 } 574 } 575 576 end = skipWhiteSpace(end); 577 if (*end != ')') 578 return false; 579 580 return true; 581 } 582 583 template <typename SrcCharacterType, typename DestCharacterType> 584 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacterType*& dest, UChar quote) 585 { 586 if (quote) { 587 ASSERT(quote == '"' || quote == '\''); 588 return parseStringInternal(src, dest, quote); 589 } 590 591 while (isURILetter(*src)) { 592 if (LIKELY(*src != '\\')) { 593 *dest++ = *src++; 594 } else { 595 unsigned unicode = parseEscape<SrcCharacterType>(src); 596 if (unicode > 0xff && sizeof(SrcCharacterType) == 1) 597 return false; 598 UnicodeToChars(dest, unicode); 599 } 600 } 601 602 return true; 603 } 604 605 template <typename CharacterType> 606 inline void CSSTokenizer::parseURI(CSSParserString& string) 607 { 608 CharacterType* uriStart; 609 CharacterType* uriEnd; 610 UChar quote; 611 if (!findURI(uriStart, uriEnd, quote)) 612 return; 613 614 CharacterType* dest = currentCharacter<CharacterType>() = uriStart; 615 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote))) { 616 string.init(uriStart, dest - uriStart); 617 } else { 618 // An escape sequence was encountered that can't be stored in 8 bits. 619 // Reset the current character to the start of the URI and re-parse with 620 // a 16-bit destination. 621 ASSERT(is8BitSource()); 622 UChar* uriStart16 = currentCharacter16(); 623 currentCharacter<CharacterType>() = uriStart; 624 bool result = parseURIInternal(currentCharacter<CharacterType>(), currentCharacter16(), quote); 625 ASSERT_UNUSED(result, result); 626 string.init(uriStart16, currentCharacter16() - uriStart16); 627 } 628 629 currentCharacter<CharacterType>() = uriEnd + 1; 630 m_token = URI; 631 } 632 633 template <typename CharacterType> 634 inline bool CSSTokenizer::parseUnicodeRange() 635 { 636 CharacterType* character = currentCharacter<CharacterType>() + 1; 637 int length = 6; 638 ASSERT(*currentCharacter<CharacterType>() == '+'); 639 640 while (isASCIIHexDigit(*character) && length) { 641 ++character; 642 --length; 643 } 644 645 if (length && *character == '?') { 646 // At most 5 hex digit followed by a question mark. 647 do { 648 ++character; 649 --length; 650 } while (*character == '?' && length); 651 currentCharacter<CharacterType>() = character; 652 return true; 653 } 654 655 if (length < 6) { 656 // At least one hex digit. 657 if (character[0] == '-' && isASCIIHexDigit(character[1])) { 658 // Followed by a dash and a hex digit. 659 ++character; 660 length = 6; 661 do { 662 ++character; 663 } while (--length && isASCIIHexDigit(*character)); 664 } 665 currentCharacter<CharacterType>() = character; 666 return true; 667 } 668 return false; 669 } 670 671 template <typename CharacterType> 672 bool CSSTokenizer::parseNthChild() 673 { 674 CharacterType* character = currentCharacter<CharacterType>(); 675 676 while (isASCIIDigit(*character)) 677 ++character; 678 if (isASCIIAlphaCaselessEqual(*character, 'n')) { 679 currentCharacter<CharacterType>() = character + 1; 680 return true; 681 } 682 return false; 683 } 684 685 template <typename CharacterType> 686 bool CSSTokenizer::parseNthChildExtra() 687 { 688 CharacterType* character = skipWhiteSpace(currentCharacter<CharacterType>()); 689 if (*character != '+' && *character != '-') 690 return false; 691 692 character = skipWhiteSpace(character + 1); 693 if (!isASCIIDigit(*character)) 694 return false; 695 696 do { 697 ++character; 698 } while (isASCIIDigit(*character)); 699 700 currentCharacter<CharacterType>() = character; 701 return true; 702 } 703 704 template <typename CharacterType> 705 inline bool CSSTokenizer::detectFunctionTypeToken(int length) 706 { 707 ASSERT(length > 0); 708 CharacterType* name = tokenStart<CharacterType>(); 709 SWITCH(name, length) { 710 CASE("not") { 711 m_token = NOTFUNCTION; 712 return true; 713 } 714 CASE("url") { 715 m_token = URI; 716 return true; 717 } 718 CASE("cue") { 719 m_token = CUEFUNCTION; 720 return true; 721 } 722 CASE("var") { 723 if (!RuntimeEnabledFeatures::cssVariablesEnabled()) 724 return false; 725 m_token = VARFUNCTION; 726 return true; 727 } 728 CASE("calc") { 729 m_token = CALCFUNCTION; 730 return true; 731 } 732 CASE("host") { 733 m_token = HOSTFUNCTION; 734 return true; 735 } 736 CASE("nth-child") { 737 m_parsingMode = NthChildMode; 738 return true; 739 } 740 CASE("nth-of-type") { 741 m_parsingMode = NthChildMode; 742 return true; 743 } 744 CASE("nth-last-child") { 745 m_parsingMode = NthChildMode; 746 return true; 747 } 748 CASE("nth-last-of-type") { 749 m_parsingMode = NthChildMode; 750 return true; 751 } 752 } 753 return false; 754 } 755 756 template <typename CharacterType> 757 inline void CSSTokenizer::detectMediaQueryToken(int length) 758 { 759 ASSERT(m_parsingMode == MediaQueryMode); 760 CharacterType* name = tokenStart<CharacterType>(); 761 762 SWITCH(name, length) { 763 CASE("and") { 764 m_token = MEDIA_AND; 765 } 766 CASE("not") { 767 m_token = MEDIA_NOT; 768 } 769 CASE("only") { 770 m_token = MEDIA_ONLY; 771 } 772 CASE("or") { 773 m_token = MEDIA_OR; 774 } 775 } 776 } 777 778 template <typename CharacterType> 779 inline void CSSTokenizer::detectNumberToken(CharacterType* type, int length) 780 { 781 ASSERT(length > 0); 782 783 SWITCH(type, length) { 784 CASE("cm") { 785 m_token = CMS; 786 } 787 CASE("ch") { 788 m_token = CHS; 789 } 790 CASE("deg") { 791 m_token = DEGS; 792 } 793 CASE("dppx") { 794 // There is a discussion about the name of this unit on www-style. 795 // Keep this compile time guard in place until that is resolved. 796 // http://lists.w3.org/Archives/Public/www-style/2012May/0915.html 797 m_token = DPPX; 798 } 799 CASE("dpcm") { 800 m_token = DPCM; 801 } 802 CASE("dpi") { 803 m_token = DPI; 804 } 805 CASE("em") { 806 m_token = EMS; 807 } 808 CASE("ex") { 809 m_token = EXS; 810 } 811 CASE("fr") { 812 m_token = FR; 813 } 814 CASE("grad") { 815 m_token = GRADS; 816 } 817 CASE("hz") { 818 m_token = HERTZ; 819 } 820 CASE("in") { 821 m_token = INS; 822 } 823 CASE("khz") { 824 m_token = KHERTZ; 825 } 826 CASE("mm") { 827 m_token = MMS; 828 } 829 CASE("ms") { 830 m_token = MSECS; 831 } 832 CASE("px") { 833 m_token = PXS; 834 } 835 CASE("pt") { 836 m_token = PTS; 837 } 838 CASE("pc") { 839 m_token = PCS; 840 } 841 CASE("rad") { 842 m_token = RADS; 843 } 844 CASE("rem") { 845 m_token = REMS; 846 } 847 CASE("s") { 848 m_token = SECS; 849 } 850 CASE("turn") { 851 m_token = TURNS; 852 } 853 CASE("vw") { 854 m_token = VW; 855 } 856 CASE("vh") { 857 m_token = VH; 858 } 859 CASE("vmin") { 860 m_token = VMIN; 861 } 862 CASE("vmax") { 863 m_token = VMAX; 864 } 865 CASE("__qem") { 866 m_token = QEMS; 867 } 868 } 869 } 870 871 template <typename CharacterType> 872 inline void CSSTokenizer::detectDashToken(int length) 873 { 874 CharacterType* name = tokenStart<CharacterType>(); 875 876 // Ignore leading dash. 877 ++name; 878 --length; 879 880 SWITCH(name, length) { 881 CASE("webkit-any") { 882 m_token = ANYFUNCTION; 883 } 884 CASE("webkit-min") { 885 m_token = MINFUNCTION; 886 } 887 CASE("webkit-max") { 888 m_token = MAXFUNCTION; 889 } 890 CASE("webkit-calc") { 891 m_token = CALCFUNCTION; 892 } 893 CASE("webkit-distributed") { 894 m_token = DISTRIBUTEDFUNCTION; 895 } 896 } 897 } 898 899 template <typename CharacterType> 900 inline void CSSTokenizer::detectAtToken(int length, bool hasEscape) 901 { 902 CharacterType* name = tokenStart<CharacterType>(); 903 ASSERT(name[0] == '@' && length >= 2); 904 905 // Ignore leading @. 906 ++name; 907 --length; 908 909 // charset, font-face, import, media, namespace, page, supports, 910 // -webkit-keyframes, keyframes, and -webkit-mediaquery are not affected by hasEscape. 911 SWITCH(name, length) { 912 CASE("bottom-left") { 913 if (LIKELY(!hasEscape)) 914 m_token = BOTTOMLEFT_SYM; 915 } 916 CASE("bottom-right") { 917 if (LIKELY(!hasEscape)) 918 m_token = BOTTOMRIGHT_SYM; 919 } 920 CASE("bottom-center") { 921 if (LIKELY(!hasEscape)) 922 m_token = BOTTOMCENTER_SYM; 923 } 924 CASE("bottom-left-corner") { 925 if (LIKELY(!hasEscape)) 926 m_token = BOTTOMLEFTCORNER_SYM; 927 } 928 CASE("bottom-right-corner") { 929 if (LIKELY(!hasEscape)) 930 m_token = BOTTOMRIGHTCORNER_SYM; 931 } 932 CASE("charset") { 933 if (name - 1 == dataStart<CharacterType>()) 934 m_token = CHARSET_SYM; 935 } 936 CASE("font-face") { 937 m_token = FONT_FACE_SYM; 938 } 939 CASE("import") { 940 m_parsingMode = MediaQueryMode; 941 m_token = IMPORT_SYM; 942 } 943 CASE("keyframes") { 944 if (RuntimeEnabledFeatures::cssAnimationUnprefixedEnabled()) 945 m_token = KEYFRAMES_SYM; 946 } 947 CASE("left-top") { 948 if (LIKELY(!hasEscape)) 949 m_token = LEFTTOP_SYM; 950 } 951 CASE("left-middle") { 952 if (LIKELY(!hasEscape)) 953 m_token = LEFTMIDDLE_SYM; 954 } 955 CASE("left-bottom") { 956 if (LIKELY(!hasEscape)) 957 m_token = LEFTBOTTOM_SYM; 958 } 959 CASE("media") { 960 m_parsingMode = MediaQueryMode; 961 m_token = MEDIA_SYM; 962 } 963 CASE("namespace") { 964 m_token = NAMESPACE_SYM; 965 } 966 CASE("page") { 967 m_token = PAGE_SYM; 968 } 969 CASE("right-top") { 970 if (LIKELY(!hasEscape)) 971 m_token = RIGHTTOP_SYM; 972 } 973 CASE("right-middle") { 974 if (LIKELY(!hasEscape)) 975 m_token = RIGHTMIDDLE_SYM; 976 } 977 CASE("right-bottom") { 978 if (LIKELY(!hasEscape)) 979 m_token = RIGHTBOTTOM_SYM; 980 } 981 CASE("supports") { 982 m_parsingMode = SupportsMode; 983 m_token = SUPPORTS_SYM; 984 } 985 CASE("top-left") { 986 if (LIKELY(!hasEscape)) 987 m_token = TOPLEFT_SYM; 988 } 989 CASE("top-right") { 990 if (LIKELY(!hasEscape)) 991 m_token = TOPRIGHT_SYM; 992 } 993 CASE("top-center") { 994 if (LIKELY(!hasEscape)) 995 m_token = TOPCENTER_SYM; 996 } 997 CASE("top-left-corner") { 998 if (LIKELY(!hasEscape)) 999 m_token = TOPLEFTCORNER_SYM; 1000 } 1001 CASE("top-right-corner") { 1002 if (LIKELY(!hasEscape)) 1003 m_token = TOPRIGHTCORNER_SYM; 1004 } 1005 CASE("viewport") { 1006 m_token = VIEWPORT_RULE_SYM; 1007 } 1008 CASE("-internal-rule") { 1009 if (LIKELY(!hasEscape && m_internal)) 1010 m_token = INTERNAL_RULE_SYM; 1011 } 1012 CASE("-webkit-region") { 1013 if (LIKELY(!hasEscape)) 1014 m_token = WEBKIT_REGION_RULE_SYM; 1015 } 1016 CASE("-webkit-filter") { 1017 if (LIKELY(!hasEscape)) 1018 m_token = WEBKIT_FILTER_RULE_SYM; 1019 } 1020 CASE("-internal-decls") { 1021 if (LIKELY(!hasEscape && m_internal)) 1022 m_token = INTERNAL_DECLS_SYM; 1023 } 1024 CASE("-internal-value") { 1025 if (LIKELY(!hasEscape && m_internal)) 1026 m_token = INTERNAL_VALUE_SYM; 1027 } 1028 CASE("-webkit-keyframes") { 1029 m_token = WEBKIT_KEYFRAMES_SYM; 1030 } 1031 CASE("-internal-selector") { 1032 if (LIKELY(!hasEscape && m_internal)) 1033 m_token = INTERNAL_SELECTOR_SYM; 1034 } 1035 CASE("-internal-medialist") { 1036 if (!m_internal) 1037 return; 1038 m_parsingMode = MediaQueryMode; 1039 m_token = INTERNAL_MEDIALIST_SYM; 1040 } 1041 CASE("-internal-keyframe-rule") { 1042 if (LIKELY(!hasEscape && m_internal)) 1043 m_token = INTERNAL_KEYFRAME_RULE_SYM; 1044 } 1045 CASE("-internal-keyframe-key-list") { 1046 if (!m_internal) 1047 return; 1048 m_token = INTERNAL_KEYFRAME_KEY_LIST_SYM; 1049 } 1050 CASE("-internal-supports-condition") { 1051 if (!m_internal) 1052 return; 1053 m_parsingMode = SupportsMode; 1054 m_token = INTERNAL_SUPPORTS_CONDITION_SYM; 1055 } 1056 } 1057 } 1058 1059 template <typename CharacterType> 1060 inline void CSSTokenizer::detectSupportsToken(int length) 1061 { 1062 ASSERT(m_parsingMode == SupportsMode); 1063 CharacterType* name = tokenStart<CharacterType>(); 1064 1065 SWITCH(name, length) { 1066 CASE("or") { 1067 m_token = SUPPORTS_OR; 1068 } 1069 CASE("and") { 1070 m_token = SUPPORTS_AND; 1071 } 1072 CASE("not") { 1073 m_token = SUPPORTS_NOT; 1074 } 1075 } 1076 } 1077 1078 template <typename CharacterType> 1079 inline void CSSTokenizer::detectCSSVariableDefinitionToken(int length) 1080 { 1081 static const int prefixLength = static_cast<int>(sizeof("var-") - 1); 1082 if (length <= prefixLength) 1083 return; 1084 CharacterType* name = tokenStart<CharacterType>(); 1085 COMPILE_ASSERT(prefixLength > 0, CSS_variable_prefix_must_be_nonempty); 1086 if (name[prefixLength - 1] == '-' && isIdentifierStartAfterDash(name + prefixLength) && isEqualToCSSCaseSensitiveIdentifier(name, "var")) 1087 m_token = VAR_DEFINITION; 1088 } 1089 1090 template <typename SrcCharacterType> 1091 int CSSTokenizer::realLex(void* yylvalWithoutType) 1092 { 1093 YYSTYPE* yylval = static_cast<YYSTYPE*>(yylvalWithoutType); 1094 // Write pointer for the next character. 1095 SrcCharacterType* result; 1096 CSSParserString resultString; 1097 bool hasEscape; 1098 1099 // The input buffer is terminated by a \0 character, so 1100 // it is safe to read one character ahead of a known non-null. 1101 #ifndef NDEBUG 1102 // In debug we check with an ASSERT that the length is > 0 for string types. 1103 yylval->string.clear(); 1104 #endif 1105 1106 restartAfterComment: 1107 result = currentCharacter<SrcCharacterType>(); 1108 setTokenStart(result); 1109 m_tokenStartLineNumber = m_lineNumber; 1110 m_token = *currentCharacter<SrcCharacterType>(); 1111 ++currentCharacter<SrcCharacterType>(); 1112 1113 switch ((m_token <= 127) ? typesOfASCIICharacters[m_token] : CharacterIdentifierStart) { 1114 case CharacterCaselessU: 1115 if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '+')) { 1116 if (parseUnicodeRange<SrcCharacterType>()) { 1117 m_token = UNICODERANGE; 1118 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); 1119 break; 1120 } 1121 } 1122 // Fall through to CharacterIdentifierStart. 1123 1124 case CharacterIdentifierStart: 1125 --currentCharacter<SrcCharacterType>(); 1126 parseIdentifier(result, yylval->string, hasEscape); 1127 m_token = IDENT; 1128 1129 if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '(')) { 1130 if (m_parsingMode == SupportsMode && !hasEscape) { 1131 detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>()); 1132 if (m_token != IDENT) 1133 break; 1134 } 1135 1136 m_token = FUNCTION; 1137 if (!hasEscape) 1138 detectFunctionTypeToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>()); 1139 1140 // Skip parenthesis 1141 ++currentCharacter<SrcCharacterType>(); 1142 ++result; 1143 ++yylval->string.m_length; 1144 1145 if (m_token == URI) { 1146 m_token = FUNCTION; 1147 // Check whether it is really an URI. 1148 if (yylval->string.is8Bit()) 1149 parseURI<LChar>(yylval->string); 1150 else 1151 parseURI<UChar>(yylval->string); 1152 } 1153 } else if (UNLIKELY(m_parsingMode != NormalMode) && !hasEscape) { 1154 if (m_parsingMode == MediaQueryMode) { 1155 detectMediaQueryToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>()); 1156 } else if (m_parsingMode == SupportsMode) { 1157 detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>()); 1158 } else if (m_parsingMode == NthChildMode && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[0], 'n')) { 1159 if (result - tokenStart<SrcCharacterType>() == 1) { 1160 // String "n" is IDENT but "n+1" is NTH. 1161 if (parseNthChildExtra<SrcCharacterType>()) { 1162 m_token = NTH; 1163 yylval->string.m_length = currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>(); 1164 } 1165 } else if (result - tokenStart<SrcCharacterType>() >= 2 && tokenStart<SrcCharacterType>()[1] == '-') { 1166 // String "n-" is IDENT but "n-1" is NTH. 1167 // Set currentCharacter to '-' to continue parsing. 1168 SrcCharacterType* nextCharacter = result; 1169 currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 1; 1170 if (parseNthChildExtra<SrcCharacterType>()) { 1171 m_token = NTH; 1172 yylval->string.setLength(currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); 1173 } else { 1174 // Revert the change to currentCharacter if unsuccessful. 1175 currentCharacter<SrcCharacterType>() = nextCharacter; 1176 } 1177 } 1178 } 1179 } else if (UNLIKELY(RuntimeEnabledFeatures::cssVariablesEnabled())) { 1180 detectCSSVariableDefinitionToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>()); 1181 } 1182 break; 1183 1184 case CharacterDot: 1185 if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0])) 1186 break; 1187 // Fall through to CharacterNumber. 1188 1189 case CharacterNumber: { 1190 bool dotSeen = (m_token == '.'); 1191 1192 while (true) { 1193 if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0])) { 1194 // Only one dot is allowed for a number, 1195 // and it must be followed by a digit. 1196 if (currentCharacter<SrcCharacterType>()[0] != '.' || dotSeen || !isASCIIDigit(currentCharacter<SrcCharacterType>()[1])) 1197 break; 1198 dotSeen = true; 1199 } 1200 ++currentCharacter<SrcCharacterType>(); 1201 } 1202 1203 if (UNLIKELY(m_parsingMode == NthChildMode) && !dotSeen && isASCIIAlphaCaselessEqual(*currentCharacter<SrcCharacterType>(), 'n')) { 1204 // "[0-9]+n" is always an NthChild. 1205 ++currentCharacter<SrcCharacterType>(); 1206 parseNthChildExtra<SrcCharacterType>(); 1207 m_token = NTH; 1208 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); 1209 break; 1210 } 1211 1212 // Use SVG parser for numbers on SVG presentation attributes. 1213 if (isSVGNumberParsingEnabledForMode(m_parser.m_context.mode())) { 1214 // We need to take care of units like 'em' or 'ex'. 1215 SrcCharacterType* character = currentCharacter<SrcCharacterType>(); 1216 if (isASCIIAlphaCaselessEqual(*character, 'e')) { 1217 ASSERT(character - tokenStart<SrcCharacterType>() > 0); 1218 ++character; 1219 if (*character == '-' || *character == '+' || isASCIIDigit(*character)) { 1220 ++character; 1221 while (isASCIIDigit(*character)) 1222 ++character; 1223 // Use FLOATTOKEN if the string contains exponents. 1224 dotSeen = true; 1225 currentCharacter<SrcCharacterType>() = character; 1226 } 1227 } 1228 if (!parseSVGNumber(tokenStart<SrcCharacterType>(), character - tokenStart<SrcCharacterType>(), yylval->number)) 1229 break; 1230 } else { 1231 yylval->number = charactersToDouble(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); 1232 } 1233 1234 // Type of the function. 1235 if (isIdentifierStart<SrcCharacterType>()) { 1236 SrcCharacterType* type = currentCharacter<SrcCharacterType>(); 1237 result = currentCharacter<SrcCharacterType>(); 1238 1239 parseIdentifier(result, resultString, hasEscape); 1240 1241 m_token = DIMEN; 1242 if (!hasEscape) 1243 detectNumberToken(type, currentCharacter<SrcCharacterType>() - type); 1244 1245 if (m_token == DIMEN) { 1246 // The decoded number is overwritten, but this is intentional. 1247 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); 1248 } 1249 } else if (*currentCharacter<SrcCharacterType>() == '%') { 1250 // Although the CSS grammar says {num}% we follow 1251 // webkit at the moment which uses {num}%+. 1252 do { 1253 ++currentCharacter<SrcCharacterType>(); 1254 } while (*currentCharacter<SrcCharacterType>() == '%'); 1255 m_token = PERCENTAGE; 1256 } else { 1257 m_token = dotSeen ? FLOATTOKEN : INTEGER; 1258 } 1259 break; 1260 } 1261 1262 case CharacterDash: 1263 if (isIdentifierStartAfterDash(currentCharacter<SrcCharacterType>())) { 1264 --currentCharacter<SrcCharacterType>(); 1265 parseIdentifier(result, resultString, hasEscape); 1266 m_token = IDENT; 1267 1268 if (*currentCharacter<SrcCharacterType>() == '(') { 1269 m_token = FUNCTION; 1270 if (!hasEscape) 1271 detectDashToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>()); 1272 ++currentCharacter<SrcCharacterType>(); 1273 ++result; 1274 } else if (UNLIKELY(m_parsingMode == NthChildMode) && !hasEscape && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[1], 'n')) { 1275 if (result - tokenStart<SrcCharacterType>() == 2) { 1276 // String "-n" is IDENT but "-n+1" is NTH. 1277 if (parseNthChildExtra<SrcCharacterType>()) { 1278 m_token = NTH; 1279 result = currentCharacter<SrcCharacterType>(); 1280 } 1281 } else if (result - tokenStart<SrcCharacterType>() >= 3 && tokenStart<SrcCharacterType>()[2] == '-') { 1282 // String "-n-" is IDENT but "-n-1" is NTH. 1283 // Set currentCharacter to second '-' of '-n-' to continue parsing. 1284 SrcCharacterType* nextCharacter = result; 1285 currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 2; 1286 if (parseNthChildExtra<SrcCharacterType>()) { 1287 m_token = NTH; 1288 result = currentCharacter<SrcCharacterType>(); 1289 } else { 1290 // Revert the change to currentCharacter if unsuccessful. 1291 currentCharacter<SrcCharacterType>() = nextCharacter; 1292 } 1293 } 1294 } 1295 resultString.setLength(result - tokenStart<SrcCharacterType>()); 1296 yylval->string = resultString; 1297 } else if (currentCharacter<SrcCharacterType>()[0] == '-' && currentCharacter<SrcCharacterType>()[1] == '>') { 1298 currentCharacter<SrcCharacterType>() += 2; 1299 m_token = SGML_CD; 1300 } else if (UNLIKELY(m_parsingMode == NthChildMode)) { 1301 // "-[0-9]+n" is always an NthChild. 1302 if (parseNthChild<SrcCharacterType>()) { 1303 parseNthChildExtra<SrcCharacterType>(); 1304 m_token = NTH; 1305 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); 1306 } 1307 } 1308 break; 1309 1310 case CharacterOther: 1311 // m_token is simply the current character. 1312 break; 1313 1314 case CharacterNull: 1315 // Do not advance pointer at the end of input. 1316 --currentCharacter<SrcCharacterType>(); 1317 break; 1318 1319 case CharacterWhiteSpace: 1320 m_token = WHITESPACE; 1321 // Might start with a '\n'. 1322 --currentCharacter<SrcCharacterType>(); 1323 do { 1324 if (*currentCharacter<SrcCharacterType>() == '\n') 1325 ++m_lineNumber; 1326 ++currentCharacter<SrcCharacterType>(); 1327 } while (*currentCharacter<SrcCharacterType>() <= ' ' && (typesOfASCIICharacters[*currentCharacter<SrcCharacterType>()] == CharacterWhiteSpace)); 1328 break; 1329 1330 case CharacterEndMediaQueryOrSupports: 1331 if (m_parsingMode == MediaQueryMode || m_parsingMode == SupportsMode) 1332 m_parsingMode = NormalMode; 1333 break; 1334 1335 case CharacterEndNthChild: 1336 if (m_parsingMode == NthChildMode) 1337 m_parsingMode = NormalMode; 1338 break; 1339 1340 case CharacterQuote: 1341 if (checkAndSkipString(currentCharacter<SrcCharacterType>(), m_token)) { 1342 ++result; 1343 parseString<SrcCharacterType>(result, yylval->string, m_token); 1344 m_token = STRING; 1345 } 1346 break; 1347 1348 case CharacterExclamationMark: { 1349 SrcCharacterType* start = skipWhiteSpace(currentCharacter<SrcCharacterType>()); 1350 if (isEqualToCSSIdentifier(start, "important")) { 1351 m_token = IMPORTANT_SYM; 1352 currentCharacter<SrcCharacterType>() = start + 9; 1353 } 1354 break; 1355 } 1356 1357 case CharacterHashmark: { 1358 SrcCharacterType* start = currentCharacter<SrcCharacterType>(); 1359 result = currentCharacter<SrcCharacterType>(); 1360 1361 if (isASCIIDigit(*currentCharacter<SrcCharacterType>())) { 1362 // This must be a valid hex number token. 1363 do { 1364 ++currentCharacter<SrcCharacterType>(); 1365 } while (isASCIIHexDigit(*currentCharacter<SrcCharacterType>())); 1366 m_token = HEX; 1367 yylval->string.init(start, currentCharacter<SrcCharacterType>() - start); 1368 } else if (isIdentifierStart<SrcCharacterType>()) { 1369 m_token = IDSEL; 1370 parseIdentifier(result, yylval->string, hasEscape); 1371 if (!hasEscape) { 1372 // Check whether the identifier is also a valid hex number. 1373 SrcCharacterType* current = start; 1374 m_token = HEX; 1375 do { 1376 if (!isASCIIHexDigit(*current)) { 1377 m_token = IDSEL; 1378 break; 1379 } 1380 ++current; 1381 } while (current < result); 1382 } 1383 } 1384 break; 1385 } 1386 1387 case CharacterSlash: 1388 // Ignore comments. They are not even considered as white spaces. 1389 if (*currentCharacter<SrcCharacterType>() == '*') { 1390 const CSSParserLocation startLocation = currentLocation(); 1391 if (m_parser.m_sourceDataHandler) { 1392 unsigned startOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>() - 1; // Start with a slash. 1393 m_parser.m_sourceDataHandler->startComment(startOffset - m_parsedTextPrefixLength); 1394 } 1395 ++currentCharacter<SrcCharacterType>(); 1396 while (currentCharacter<SrcCharacterType>()[0] != '*' || currentCharacter<SrcCharacterType>()[1] != '/') { 1397 if (*currentCharacter<SrcCharacterType>() == '\n') 1398 ++m_lineNumber; 1399 if (*currentCharacter<SrcCharacterType>() == '\0') { 1400 // Unterminated comments are simply ignored. 1401 currentCharacter<SrcCharacterType>() -= 2; 1402 m_parser.reportError(startLocation, CSSParser::UnterminatedCommentError); 1403 break; 1404 } 1405 ++currentCharacter<SrcCharacterType>(); 1406 } 1407 currentCharacter<SrcCharacterType>() += 2; 1408 if (m_parser.m_sourceDataHandler) { 1409 unsigned endOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>(); 1410 unsigned userTextEndOffset = static_cast<unsigned>(m_length - 1 - m_parsedTextSuffixLength); 1411 m_parser.m_sourceDataHandler->endComment(std::min(endOffset, userTextEndOffset) - m_parsedTextPrefixLength); 1412 } 1413 goto restartAfterComment; 1414 } 1415 break; 1416 1417 case CharacterDollar: 1418 if (*currentCharacter<SrcCharacterType>() == '=') { 1419 ++currentCharacter<SrcCharacterType>(); 1420 m_token = ENDSWITH; 1421 } 1422 break; 1423 1424 case CharacterAsterisk: 1425 if (*currentCharacter<SrcCharacterType>() == '=') { 1426 ++currentCharacter<SrcCharacterType>(); 1427 m_token = CONTAINS; 1428 } 1429 break; 1430 1431 case CharacterPlus: 1432 if (UNLIKELY(m_parsingMode == NthChildMode)) { 1433 // Simplest case. "+[0-9]*n" is always NthChild. 1434 if (parseNthChild<SrcCharacterType>()) { 1435 parseNthChildExtra<SrcCharacterType>(); 1436 m_token = NTH; 1437 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); 1438 } 1439 } 1440 break; 1441 1442 case CharacterLess: 1443 if (currentCharacter<SrcCharacterType>()[0] == '!' && currentCharacter<SrcCharacterType>()[1] == '-' && currentCharacter<SrcCharacterType>()[2] == '-') { 1444 currentCharacter<SrcCharacterType>() += 3; 1445 m_token = SGML_CD; 1446 } 1447 break; 1448 1449 case CharacterAt: 1450 if (isIdentifierStart<SrcCharacterType>()) { 1451 m_token = ATKEYWORD; 1452 ++result; 1453 parseIdentifier(result, resultString, hasEscape); 1454 detectAtToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>(), hasEscape); 1455 } 1456 break; 1457 1458 case CharacterBackSlash: 1459 if (isCSSEscape(*currentCharacter<SrcCharacterType>())) { 1460 --currentCharacter<SrcCharacterType>(); 1461 parseIdentifier(result, yylval->string, hasEscape); 1462 m_token = IDENT; 1463 } 1464 break; 1465 1466 case CharacterXor: 1467 if (*currentCharacter<SrcCharacterType>() == '=') { 1468 ++currentCharacter<SrcCharacterType>(); 1469 m_token = BEGINSWITH; 1470 } 1471 break; 1472 1473 case CharacterVerticalBar: 1474 if (*currentCharacter<SrcCharacterType>() == '=') { 1475 ++currentCharacter<SrcCharacterType>(); 1476 m_token = DASHMATCH; 1477 } 1478 break; 1479 1480 case CharacterTilde: 1481 if (*currentCharacter<SrcCharacterType>() == '=') { 1482 ++currentCharacter<SrcCharacterType>(); 1483 m_token = INCLUDES; 1484 } 1485 break; 1486 1487 default: 1488 ASSERT_NOT_REACHED(); 1489 break; 1490 } 1491 1492 return m_token; 1493 } 1494 1495 template <> 1496 inline void CSSTokenizer::setTokenStart<LChar>(LChar* tokenStart) 1497 { 1498 m_tokenStart.ptr8 = tokenStart; 1499 } 1500 1501 template <> 1502 inline void CSSTokenizer::setTokenStart<UChar>(UChar* tokenStart) 1503 { 1504 m_tokenStart.ptr16 = tokenStart; 1505 } 1506 1507 void CSSTokenizer::setupTokenizer(const char* prefix, unsigned prefixLength, const String& string, const char* suffix, unsigned suffixLength) 1508 { 1509 m_parsedTextPrefixLength = prefixLength; 1510 m_parsedTextSuffixLength = suffixLength; 1511 unsigned stringLength = string.length(); 1512 unsigned length = stringLength + m_parsedTextPrefixLength + m_parsedTextSuffixLength + 1; 1513 m_length = length; 1514 1515 if (!stringLength || string.is8Bit()) { 1516 m_dataStart8 = adoptArrayPtr(new LChar[length]); 1517 for (unsigned i = 0; i < m_parsedTextPrefixLength; i++) 1518 m_dataStart8[i] = prefix[i]; 1519 1520 if (stringLength) 1521 memcpy(m_dataStart8.get() + m_parsedTextPrefixLength, string.characters8(), stringLength * sizeof(LChar)); 1522 1523 unsigned start = m_parsedTextPrefixLength + stringLength; 1524 unsigned end = start + suffixLength; 1525 for (unsigned i = start; i < end; i++) 1526 m_dataStart8[i] = suffix[i - start]; 1527 1528 m_dataStart8[length - 1] = 0; 1529 1530 m_is8BitSource = true; 1531 m_currentCharacter8 = m_dataStart8.get(); 1532 m_currentCharacter16 = 0; 1533 setTokenStart<LChar>(m_currentCharacter8); 1534 m_lexFunc = &CSSTokenizer::realLex<LChar>; 1535 return; 1536 } 1537 1538 m_dataStart16 = adoptArrayPtr(new UChar[length]); 1539 for (unsigned i = 0; i < m_parsedTextPrefixLength; i++) 1540 m_dataStart16[i] = prefix[i]; 1541 1542 ASSERT(stringLength); 1543 memcpy(m_dataStart16.get() + m_parsedTextPrefixLength, string.characters16(), stringLength * sizeof(UChar)); 1544 1545 unsigned start = m_parsedTextPrefixLength + stringLength; 1546 unsigned end = start + suffixLength; 1547 for (unsigned i = start; i < end; i++) 1548 m_dataStart16[i] = suffix[i - start]; 1549 1550 m_dataStart16[length - 1] = 0; 1551 1552 m_is8BitSource = false; 1553 m_currentCharacter8 = 0; 1554 m_currentCharacter16 = m_dataStart16.get(); 1555 setTokenStart<UChar>(m_currentCharacter16); 1556 m_lexFunc = &CSSTokenizer::realLex<UChar>; 1557 } 1558 1559 } // namespace WebCore 1560