1 /* 2 * Copyright (C) 2003 Lars Knoll (knoll (at) kde.org) 3 * Copyright (C) 2005 Allan Sandfeld Jensen (kde (at) carewolf.com) 4 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012 Apple Inc. All rights reserved. 5 * Copyright (C) 2007 Nicholas Shanks <webkit (at) nickshanks.com> 6 * Copyright (C) 2008 Eric Seidel <eric (at) webkit.org> 7 * Copyright (C) 2009 Torch Mobile Inc. All rights reserved. (http://www.torchmobile.com/) 8 * Copyright (C) 2012 Adobe Systems Incorporated. All rights reserved. 9 * Copyright (C) 2012 Intel Corporation. All rights reserved. 10 * 11 * This library is free software; you can redistribute it and/or 12 * modify it under the terms of the GNU Library General Public 13 * License as published by the Free Software Foundation; either 14 * version 2 of the License, or (at your option) any later version. 15 * 16 * This library is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 * Library General Public License for more details. 20 * 21 * You should have received a copy of the GNU Library General Public License 22 * along with this library; see the file COPYING.LIB. If not, write to 23 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 24 * Boston, MA 02110-1301, USA. 25 */ 26 27 #include "config.h" 28 #include "core/css/CSSTokenizer.h" 29 30 #include "core/css/CSSKeyframeRule.h" 31 #include "core/css/parser/BisonCSSParser.h" 32 #include "core/css/CSSParserValues.h" 33 #include "core/css/MediaQuery.h" 34 #include "core/css/StyleRule.h" 35 #include "core/html/parser/HTMLParserIdioms.h" 36 #include "core/svg/SVGParserUtilities.h" 37 38 namespace WebCore { 39 40 #include "core/CSSGrammar.h" 41 42 enum CharacterType { 43 // Types for the main switch. 44 45 // The first 4 types must be grouped together, as they 46 // represent the allowed chars in an identifier. 47 CharacterCaselessU, 48 CharacterIdentifierStart, 49 CharacterNumber, 50 CharacterDash, 51 52 CharacterOther, 53 CharacterNull, 54 CharacterWhiteSpace, 55 CharacterEndMediaQueryOrSupports, 56 CharacterEndNthChild, 57 CharacterQuote, 58 CharacterExclamationMark, 59 CharacterHashmark, 60 CharacterDollar, 61 CharacterAsterisk, 62 CharacterPlus, 63 CharacterDot, 64 CharacterSlash, 65 CharacterLess, 66 CharacterAt, 67 CharacterBackSlash, 68 CharacterXor, 69 CharacterVerticalBar, 70 CharacterTilde, 71 }; 72 73 // 128 ASCII codes 74 static const CharacterType typesOfASCIICharacters[128] = { 75 /* 0 - Null */ CharacterNull, 76 /* 1 - Start of Heading */ CharacterOther, 77 /* 2 - Start of Text */ CharacterOther, 78 /* 3 - End of Text */ CharacterOther, 79 /* 4 - End of Transm. */ CharacterOther, 80 /* 5 - Enquiry */ CharacterOther, 81 /* 6 - Acknowledgment */ CharacterOther, 82 /* 7 - Bell */ CharacterOther, 83 /* 8 - Back Space */ CharacterOther, 84 /* 9 - Horizontal Tab */ CharacterWhiteSpace, 85 /* 10 - Line Feed */ CharacterWhiteSpace, 86 /* 11 - Vertical Tab */ CharacterOther, 87 /* 12 - Form Feed */ CharacterWhiteSpace, 88 /* 13 - Carriage Return */ CharacterWhiteSpace, 89 /* 14 - Shift Out */ CharacterOther, 90 /* 15 - Shift In */ CharacterOther, 91 /* 16 - Data Line Escape */ CharacterOther, 92 /* 17 - Device Control 1 */ CharacterOther, 93 /* 18 - Device Control 2 */ CharacterOther, 94 /* 19 - Device Control 3 */ CharacterOther, 95 /* 20 - Device Control 4 */ CharacterOther, 96 /* 21 - Negative Ack. */ CharacterOther, 97 /* 22 - Synchronous Idle */ CharacterOther, 98 /* 23 - End of Transmit */ CharacterOther, 99 /* 24 - Cancel */ CharacterOther, 100 /* 25 - End of Medium */ CharacterOther, 101 /* 26 - Substitute */ CharacterOther, 102 /* 27 - Escape */ CharacterOther, 103 /* 28 - File Separator */ CharacterOther, 104 /* 29 - Group Separator */ CharacterOther, 105 /* 30 - Record Separator */ CharacterOther, 106 /* 31 - Unit Separator */ CharacterOther, 107 /* 32 - Space */ CharacterWhiteSpace, 108 /* 33 - ! */ CharacterExclamationMark, 109 /* 34 - " */ CharacterQuote, 110 /* 35 - # */ CharacterHashmark, 111 /* 36 - $ */ CharacterDollar, 112 /* 37 - % */ CharacterOther, 113 /* 38 - & */ CharacterOther, 114 /* 39 - ' */ CharacterQuote, 115 /* 40 - ( */ CharacterOther, 116 /* 41 - ) */ CharacterEndNthChild, 117 /* 42 - * */ CharacterAsterisk, 118 /* 43 - + */ CharacterPlus, 119 /* 44 - , */ CharacterOther, 120 /* 45 - - */ CharacterDash, 121 /* 46 - . */ CharacterDot, 122 /* 47 - / */ CharacterSlash, 123 /* 48 - 0 */ CharacterNumber, 124 /* 49 - 1 */ CharacterNumber, 125 /* 50 - 2 */ CharacterNumber, 126 /* 51 - 3 */ CharacterNumber, 127 /* 52 - 4 */ CharacterNumber, 128 /* 53 - 5 */ CharacterNumber, 129 /* 54 - 6 */ CharacterNumber, 130 /* 55 - 7 */ CharacterNumber, 131 /* 56 - 8 */ CharacterNumber, 132 /* 57 - 9 */ CharacterNumber, 133 /* 58 - : */ CharacterOther, 134 /* 59 - ; */ CharacterEndMediaQueryOrSupports, 135 /* 60 - < */ CharacterLess, 136 /* 61 - = */ CharacterOther, 137 /* 62 - > */ CharacterOther, 138 /* 63 - ? */ CharacterOther, 139 /* 64 - @ */ CharacterAt, 140 /* 65 - A */ CharacterIdentifierStart, 141 /* 66 - B */ CharacterIdentifierStart, 142 /* 67 - C */ CharacterIdentifierStart, 143 /* 68 - D */ CharacterIdentifierStart, 144 /* 69 - E */ CharacterIdentifierStart, 145 /* 70 - F */ CharacterIdentifierStart, 146 /* 71 - G */ CharacterIdentifierStart, 147 /* 72 - H */ CharacterIdentifierStart, 148 /* 73 - I */ CharacterIdentifierStart, 149 /* 74 - J */ CharacterIdentifierStart, 150 /* 75 - K */ CharacterIdentifierStart, 151 /* 76 - L */ CharacterIdentifierStart, 152 /* 77 - M */ CharacterIdentifierStart, 153 /* 78 - N */ CharacterIdentifierStart, 154 /* 79 - O */ CharacterIdentifierStart, 155 /* 80 - P */ CharacterIdentifierStart, 156 /* 81 - Q */ CharacterIdentifierStart, 157 /* 82 - R */ CharacterIdentifierStart, 158 /* 83 - S */ CharacterIdentifierStart, 159 /* 84 - T */ CharacterIdentifierStart, 160 /* 85 - U */ CharacterCaselessU, 161 /* 86 - V */ CharacterIdentifierStart, 162 /* 87 - W */ CharacterIdentifierStart, 163 /* 88 - X */ CharacterIdentifierStart, 164 /* 89 - Y */ CharacterIdentifierStart, 165 /* 90 - Z */ CharacterIdentifierStart, 166 /* 91 - [ */ CharacterOther, 167 /* 92 - \ */ CharacterBackSlash, 168 /* 93 - ] */ CharacterOther, 169 /* 94 - ^ */ CharacterXor, 170 /* 95 - _ */ CharacterIdentifierStart, 171 /* 96 - ` */ CharacterOther, 172 /* 97 - a */ CharacterIdentifierStart, 173 /* 98 - b */ CharacterIdentifierStart, 174 /* 99 - c */ CharacterIdentifierStart, 175 /* 100 - d */ CharacterIdentifierStart, 176 /* 101 - e */ CharacterIdentifierStart, 177 /* 102 - f */ CharacterIdentifierStart, 178 /* 103 - g */ CharacterIdentifierStart, 179 /* 104 - h */ CharacterIdentifierStart, 180 /* 105 - i */ CharacterIdentifierStart, 181 /* 106 - j */ CharacterIdentifierStart, 182 /* 107 - k */ CharacterIdentifierStart, 183 /* 108 - l */ CharacterIdentifierStart, 184 /* 109 - m */ CharacterIdentifierStart, 185 /* 110 - n */ CharacterIdentifierStart, 186 /* 111 - o */ CharacterIdentifierStart, 187 /* 112 - p */ CharacterIdentifierStart, 188 /* 113 - q */ CharacterIdentifierStart, 189 /* 114 - r */ CharacterIdentifierStart, 190 /* 115 - s */ CharacterIdentifierStart, 191 /* 116 - t */ CharacterIdentifierStart, 192 /* 117 - u */ CharacterCaselessU, 193 /* 118 - v */ CharacterIdentifierStart, 194 /* 119 - w */ CharacterIdentifierStart, 195 /* 120 - x */ CharacterIdentifierStart, 196 /* 121 - y */ CharacterIdentifierStart, 197 /* 122 - z */ CharacterIdentifierStart, 198 /* 123 - { */ CharacterEndMediaQueryOrSupports, 199 /* 124 - | */ CharacterVerticalBar, 200 /* 125 - } */ CharacterOther, 201 /* 126 - ~ */ CharacterTilde, 202 /* 127 - Delete */ CharacterOther, 203 }; 204 205 // Utility functions for the CSS tokenizer. 206 207 template <typename CharacterType> 208 static inline bool isCSSLetter(CharacterType character) 209 { 210 return character >= 128 || typesOfASCIICharacters[character] <= CharacterDash; 211 } 212 213 template <typename CharacterType> 214 static inline bool isCSSEscape(CharacterType character) 215 { 216 return character >= ' ' && character != 127; 217 } 218 219 template <typename CharacterType> 220 static inline bool isURILetter(CharacterType character) 221 { 222 return (character >= '*' && character != 127) || (character >= '#' && character <= '&') || character == '!'; 223 } 224 225 template <typename CharacterType> 226 static inline bool isIdentifierStartAfterDash(CharacterType* currentCharacter) 227 { 228 return isASCIIAlpha(currentCharacter[0]) || currentCharacter[0] == '_' || currentCharacter[0] >= 128 229 || (currentCharacter[0] == '\\' && isCSSEscape(currentCharacter[1])); 230 } 231 232 template <typename CharacterType> 233 static inline bool isEqualToCSSIdentifier(CharacterType* cssString, const char* constantString) 234 { 235 // Compare an character memory data with a zero terminated string. 236 do { 237 // The input must be part of an identifier if constantChar or constString 238 // contains '-'. Otherwise toASCIILowerUnchecked('\r') would be equal to '-'. 239 ASSERT((*constantString >= 'a' && *constantString <= 'z') || *constantString == '-'); 240 ASSERT(*constantString != '-' || isCSSLetter(*cssString)); 241 if (toASCIILowerUnchecked(*cssString++) != (*constantString++)) 242 return false; 243 } while (*constantString); 244 return true; 245 } 246 247 template <typename CharacterType> 248 static inline bool isEqualToCSSCaseSensitiveIdentifier(CharacterType* string, const char* constantString) 249 { 250 ASSERT(*constantString); 251 252 do { 253 if (*string++ != *constantString++) 254 return false; 255 } while (*constantString); 256 return true; 257 } 258 259 template <typename CharacterType> 260 static CharacterType* checkAndSkipEscape(CharacterType* currentCharacter) 261 { 262 // Returns with 0, if escape check is failed. Otherwise 263 // it returns with the following character. 264 ASSERT(*currentCharacter == '\\'); 265 266 ++currentCharacter; 267 if (!isCSSEscape(*currentCharacter)) 268 return 0; 269 270 if (isASCIIHexDigit(*currentCharacter)) { 271 int length = 6; 272 273 do { 274 ++currentCharacter; 275 } while (isASCIIHexDigit(*currentCharacter) && --length); 276 277 // Optional space after the escape sequence. 278 if (isHTMLSpace<CharacterType>(*currentCharacter)) 279 ++currentCharacter; 280 return currentCharacter; 281 } 282 return currentCharacter + 1; 283 } 284 285 template <typename CharacterType> 286 static inline CharacterType* skipWhiteSpace(CharacterType* currentCharacter) 287 { 288 while (isHTMLSpace<CharacterType>(*currentCharacter)) 289 ++currentCharacter; 290 return currentCharacter; 291 } 292 293 // Main CSS tokenizer functions. 294 295 template <> 296 inline LChar*& CSSTokenizer::currentCharacter<LChar>() 297 { 298 return m_currentCharacter8; 299 } 300 301 template <> 302 inline UChar*& CSSTokenizer::currentCharacter<UChar>() 303 { 304 return m_currentCharacter16; 305 } 306 307 UChar* CSSTokenizer::allocateStringBuffer16(size_t len) 308 { 309 // Allocates and returns a CSSTokenizer owned buffer for storing 310 // UTF-16 data. Used to get a suitable life span for UTF-16 311 // strings, identifiers and URIs created by the tokenizer. 312 OwnPtr<UChar[]> buffer = adoptArrayPtr(new UChar[len]); 313 314 UChar* bufferPtr = buffer.get(); 315 316 m_cssStrings16.append(buffer.release()); 317 return bufferPtr; 318 } 319 320 template <> 321 inline LChar* CSSTokenizer::dataStart<LChar>() 322 { 323 return m_dataStart8.get(); 324 } 325 326 template <> 327 inline UChar* CSSTokenizer::dataStart<UChar>() 328 { 329 return m_dataStart16.get(); 330 } 331 332 template <typename CharacterType> 333 inline CSSParserLocation CSSTokenizer::tokenLocation() 334 { 335 CSSParserLocation location; 336 location.token.init(tokenStart<CharacterType>(), currentCharacter<CharacterType>() - tokenStart<CharacterType>()); 337 location.lineNumber = m_tokenStartLineNumber; 338 location.offset = tokenStart<CharacterType>() - dataStart<CharacterType>(); 339 return location; 340 } 341 342 CSSParserLocation CSSTokenizer::currentLocation() 343 { 344 if (is8BitSource()) 345 return tokenLocation<LChar>(); 346 return tokenLocation<UChar>(); 347 } 348 349 template <typename CharacterType> 350 inline bool CSSTokenizer::isIdentifierStart() 351 { 352 // Check whether an identifier is started. 353 return isIdentifierStartAfterDash((*currentCharacter<CharacterType>() != '-') ? currentCharacter<CharacterType>() : currentCharacter<CharacterType>() + 1); 354 } 355 356 enum CheckStringValidationMode { 357 AbortIfInvalid, 358 SkipInvalid 359 }; 360 361 template <typename CharacterType> 362 static inline CharacterType* checkAndSkipString(CharacterType* currentCharacter, int quote, CheckStringValidationMode mode) 363 { 364 // If mode is AbortIfInvalid and the string check fails it returns 365 // with 0. Otherwise it returns with a pointer to the first 366 // character after the string. 367 while (true) { 368 if (UNLIKELY(*currentCharacter == quote)) { 369 // String parsing is successful. 370 return currentCharacter + 1; 371 } 372 if (UNLIKELY(!*currentCharacter)) { 373 // String parsing is successful up to end of input. 374 return currentCharacter; 375 } 376 if (mode == AbortIfInvalid && UNLIKELY(*currentCharacter <= '\r' && (*currentCharacter == '\n' || (*currentCharacter | 0x1) == '\r'))) { 377 // String parsing is failed for character '\n', '\f' or '\r'. 378 return 0; 379 } 380 381 if (LIKELY(currentCharacter[0] != '\\')) { 382 ++currentCharacter; 383 } else if (currentCharacter[1] == '\n' || currentCharacter[1] == '\f') { 384 currentCharacter += 2; 385 } else if (currentCharacter[1] == '\r') { 386 currentCharacter += currentCharacter[2] == '\n' ? 3 : 2; 387 } else { 388 CharacterType* next = checkAndSkipEscape(currentCharacter); 389 if (!next) { 390 if (mode == AbortIfInvalid) 391 return 0; 392 next = currentCharacter + 1; 393 } 394 currentCharacter = next; 395 } 396 } 397 } 398 399 template <typename CharacterType> 400 unsigned CSSTokenizer::parseEscape(CharacterType*& src) 401 { 402 ASSERT(*src == '\\' && isCSSEscape(src[1])); 403 404 unsigned unicode = 0; 405 406 ++src; 407 if (isASCIIHexDigit(*src)) { 408 409 int length = 6; 410 411 do { 412 unicode = (unicode << 4) + toASCIIHexValue(*src++); 413 } while (--length && isASCIIHexDigit(*src)); 414 415 // Characters above 0x10ffff are not handled. 416 if (unicode > 0x10ffff) 417 unicode = 0xfffd; 418 419 // Optional space after the escape sequence. 420 if (isHTMLSpace<CharacterType>(*src)) 421 ++src; 422 423 return unicode; 424 } 425 426 return *src++; 427 } 428 429 template <> 430 inline void CSSTokenizer::UnicodeToChars<LChar>(LChar*& result, unsigned unicode) 431 { 432 ASSERT(unicode <= 0xff); 433 *result = unicode; 434 435 ++result; 436 } 437 438 template <> 439 inline void CSSTokenizer::UnicodeToChars<UChar>(UChar*& result, unsigned unicode) 440 { 441 // Replace unicode with a surrogate pairs when it is bigger than 0xffff 442 if (U16_LENGTH(unicode) == 2) { 443 *result++ = U16_LEAD(unicode); 444 *result = U16_TRAIL(unicode); 445 } else { 446 *result = unicode; 447 } 448 449 ++result; 450 } 451 452 template <typename SrcCharacterType> 453 size_t CSSTokenizer::peekMaxIdentifierLen(SrcCharacterType* src) 454 { 455 // The decoded form of an identifier (after resolving escape 456 // sequences) will not contain more characters (ASCII or UTF-16 457 // codepoints) than the input. This code can therefore ignore 458 // escape sequences completely. 459 SrcCharacterType* start = src; 460 do { 461 if (LIKELY(*src != '\\')) 462 src++; 463 else 464 parseEscape<SrcCharacterType>(src); 465 } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1]))); 466 467 return src - start; 468 } 469 470 template <typename SrcCharacterType, typename DestCharacterType> 471 inline bool CSSTokenizer::parseIdentifierInternal(SrcCharacterType*& src, DestCharacterType*& result, bool& hasEscape) 472 { 473 hasEscape = false; 474 do { 475 if (LIKELY(*src != '\\')) { 476 *result++ = *src++; 477 } else { 478 hasEscape = true; 479 SrcCharacterType* savedEscapeStart = src; 480 unsigned unicode = parseEscape<SrcCharacterType>(src); 481 if (unicode > 0xff && sizeof(DestCharacterType) == 1) { 482 src = savedEscapeStart; 483 return false; 484 } 485 UnicodeToChars(result, unicode); 486 } 487 } while (isCSSLetter(src[0]) || (src[0] == '\\' && isCSSEscape(src[1]))); 488 489 return true; 490 } 491 492 template <typename CharacterType> 493 inline void CSSTokenizer::parseIdentifier(CharacterType*& result, CSSParserString& resultString, bool& hasEscape) 494 { 495 // If a valid identifier start is found, we can safely 496 // parse the identifier until the next invalid character. 497 ASSERT(isIdentifierStart<CharacterType>()); 498 499 CharacterType* start = currentCharacter<CharacterType>(); 500 if (UNLIKELY(!parseIdentifierInternal(currentCharacter<CharacterType>(), result, hasEscape))) { 501 // Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue 502 ASSERT(is8BitSource()); 503 UChar* result16 = allocateStringBuffer16((result - start) + peekMaxIdentifierLen(currentCharacter<CharacterType>())); 504 UChar* start16 = result16; 505 int i = 0; 506 for (; i < result - start; i++) 507 result16[i] = start[i]; 508 509 result16 += i; 510 511 parseIdentifierInternal(currentCharacter<CharacterType>(), result16, hasEscape); 512 513 resultString.init(start16, result16 - start16); 514 515 return; 516 } 517 518 resultString.init(start, result - start); 519 } 520 521 template <typename SrcCharacterType> 522 size_t CSSTokenizer::peekMaxStringLen(SrcCharacterType* src, UChar quote) 523 { 524 // The decoded form of a CSS string (after resolving escape 525 // sequences) will not contain more characters (ASCII or UTF-16 526 // codepoints) than the input. This code can therefore ignore 527 // escape sequences completely and just return the length of the 528 // input string (possibly including terminating quote if any). 529 SrcCharacterType* end = checkAndSkipString(src, quote, SkipInvalid); 530 return end ? end - src : 0; 531 } 532 533 template <typename SrcCharacterType, typename DestCharacterType> 534 inline bool CSSTokenizer::parseStringInternal(SrcCharacterType*& src, DestCharacterType*& result, UChar quote) 535 { 536 while (true) { 537 if (UNLIKELY(*src == quote)) { 538 // String parsing is done. 539 ++src; 540 return true; 541 } 542 if (UNLIKELY(!*src)) { 543 // String parsing is done, but don't advance pointer if at the end of input. 544 return true; 545 } 546 if (LIKELY(src[0] != '\\')) { 547 *result++ = *src++; 548 } else if (src[1] == '\n' || src[1] == '\f') { 549 src += 2; 550 } else if (src[1] == '\r') { 551 src += src[2] == '\n' ? 3 : 2; 552 } else { 553 SrcCharacterType* savedEscapeStart = src; 554 unsigned unicode = parseEscape<SrcCharacterType>(src); 555 if (unicode > 0xff && sizeof(DestCharacterType) == 1) { 556 src = savedEscapeStart; 557 return false; 558 } 559 UnicodeToChars(result, unicode); 560 } 561 } 562 563 return true; 564 } 565 566 template <typename CharacterType> 567 inline void CSSTokenizer::parseString(CharacterType*& result, CSSParserString& resultString, UChar quote) 568 { 569 CharacterType* start = currentCharacter<CharacterType>(); 570 571 if (UNLIKELY(!parseStringInternal(currentCharacter<CharacterType>(), result, quote))) { 572 // Found an escape we couldn't handle with 8 bits, copy what has been recognized and continue 573 ASSERT(is8BitSource()); 574 UChar* result16 = allocateStringBuffer16((result - start) + peekMaxStringLen(currentCharacter<CharacterType>(), quote)); 575 UChar* start16 = result16; 576 int i = 0; 577 for (; i < result - start; i++) 578 result16[i] = start[i]; 579 580 result16 += i; 581 582 parseStringInternal(currentCharacter<CharacterType>(), result16, quote); 583 584 resultString.init(start16, result16 - start16); 585 return; 586 } 587 588 resultString.init(start, result - start); 589 } 590 591 template <typename CharacterType> 592 inline bool CSSTokenizer::findURI(CharacterType*& start, CharacterType*& end, UChar& quote) 593 { 594 start = skipWhiteSpace(currentCharacter<CharacterType>()); 595 596 if (*start == '"' || *start == '\'') { 597 quote = *start++; 598 end = checkAndSkipString(start, quote, AbortIfInvalid); 599 if (!end) 600 return false; 601 } else { 602 quote = 0; 603 end = start; 604 while (isURILetter(*end)) { 605 if (LIKELY(*end != '\\')) { 606 ++end; 607 } else { 608 end = checkAndSkipEscape(end); 609 if (!end) 610 return false; 611 } 612 } 613 } 614 615 end = skipWhiteSpace(end); 616 if (*end != ')') 617 return false; 618 619 return true; 620 } 621 622 template <typename SrcCharacterType> 623 inline size_t CSSTokenizer::peekMaxURILen(SrcCharacterType* src, UChar quote) 624 { 625 // The decoded form of a URI (after resolving escape sequences) 626 // will not contain more characters (ASCII or UTF-16 codepoints) 627 // than the input. This code can therefore ignore escape sequences 628 // completely. 629 SrcCharacterType* start = src; 630 if (quote) { 631 ASSERT(quote == '"' || quote == '\''); 632 return peekMaxStringLen(src, quote); 633 } 634 635 while (isURILetter(*src)) { 636 if (LIKELY(*src != '\\')) 637 src++; 638 else 639 parseEscape<SrcCharacterType>(src); 640 } 641 642 return src - start; 643 } 644 645 template <typename SrcCharacterType, typename DestCharacterType> 646 inline bool CSSTokenizer::parseURIInternal(SrcCharacterType*& src, DestCharacterType*& dest, UChar quote) 647 { 648 if (quote) { 649 ASSERT(quote == '"' || quote == '\''); 650 return parseStringInternal(src, dest, quote); 651 } 652 653 while (isURILetter(*src)) { 654 if (LIKELY(*src != '\\')) { 655 *dest++ = *src++; 656 } else { 657 unsigned unicode = parseEscape<SrcCharacterType>(src); 658 if (unicode > 0xff && sizeof(DestCharacterType) == 1) 659 return false; 660 UnicodeToChars(dest, unicode); 661 } 662 } 663 664 return true; 665 } 666 667 template <typename CharacterType> 668 inline void CSSTokenizer::parseURI(CSSParserString& string) 669 { 670 CharacterType* uriStart; 671 CharacterType* uriEnd; 672 UChar quote; 673 if (!findURI(uriStart, uriEnd, quote)) 674 return; 675 676 CharacterType* dest = currentCharacter<CharacterType>() = uriStart; 677 if (LIKELY(parseURIInternal(currentCharacter<CharacterType>(), dest, quote))) { 678 string.init(uriStart, dest - uriStart); 679 } else { 680 // An escape sequence was encountered that can't be stored in 8 bits. 681 // Reset the current character to the start of the URI and re-parse with 682 // a 16-bit destination. 683 ASSERT(is8BitSource()); 684 currentCharacter<CharacterType>() = uriStart; 685 UChar* result16 = allocateStringBuffer16(peekMaxURILen(currentCharacter<CharacterType>(), quote)); 686 UChar* uriStart16 = result16; 687 bool result = parseURIInternal(currentCharacter<CharacterType>(), result16, quote); 688 ASSERT_UNUSED(result, result); 689 string.init(uriStart16, result16 - uriStart16); 690 } 691 692 currentCharacter<CharacterType>() = uriEnd + 1; 693 m_token = URI; 694 } 695 696 template <typename CharacterType> 697 inline bool CSSTokenizer::parseUnicodeRange() 698 { 699 CharacterType* character = currentCharacter<CharacterType>() + 1; 700 int length = 6; 701 ASSERT(*currentCharacter<CharacterType>() == '+'); 702 703 while (isASCIIHexDigit(*character) && length) { 704 ++character; 705 --length; 706 } 707 708 if (length && *character == '?') { 709 // At most 5 hex digit followed by a question mark. 710 do { 711 ++character; 712 --length; 713 } while (*character == '?' && length); 714 currentCharacter<CharacterType>() = character; 715 return true; 716 } 717 718 if (length < 6) { 719 // At least one hex digit. 720 if (character[0] == '-' && isASCIIHexDigit(character[1])) { 721 // Followed by a dash and a hex digit. 722 ++character; 723 length = 6; 724 do { 725 ++character; 726 } while (--length && isASCIIHexDigit(*character)); 727 } 728 currentCharacter<CharacterType>() = character; 729 return true; 730 } 731 return false; 732 } 733 734 template <typename CharacterType> 735 bool CSSTokenizer::parseNthChild() 736 { 737 CharacterType* character = currentCharacter<CharacterType>(); 738 739 while (isASCIIDigit(*character)) 740 ++character; 741 if (isASCIIAlphaCaselessEqual(*character, 'n')) { 742 currentCharacter<CharacterType>() = character + 1; 743 return true; 744 } 745 return false; 746 } 747 748 template <typename CharacterType> 749 bool CSSTokenizer::parseNthChildExtra() 750 { 751 CharacterType* character = skipWhiteSpace(currentCharacter<CharacterType>()); 752 if (*character != '+' && *character != '-') 753 return false; 754 755 character = skipWhiteSpace(character + 1); 756 if (!isASCIIDigit(*character)) 757 return false; 758 759 do { 760 ++character; 761 } while (isASCIIDigit(*character)); 762 763 currentCharacter<CharacterType>() = character; 764 return true; 765 } 766 767 template <typename CharacterType> 768 inline bool CSSTokenizer::detectFunctionTypeToken(int length) 769 { 770 ASSERT(length > 0); 771 CharacterType* name = tokenStart<CharacterType>(); 772 SWITCH(name, length) { 773 CASE("not") { 774 m_token = NOTFUNCTION; 775 return true; 776 } 777 CASE("url") { 778 m_token = URI; 779 return true; 780 } 781 CASE("cue") { 782 m_token = CUEFUNCTION; 783 return true; 784 } 785 CASE("calc") { 786 m_token = CALCFUNCTION; 787 return true; 788 } 789 CASE("host") { 790 m_token = HOSTFUNCTION; 791 return true; 792 } 793 CASE("host-context") { 794 m_token = HOSTCONTEXTFUNCTION; 795 return true; 796 } 797 CASE("nth-child") { 798 m_parsingMode = NthChildMode; 799 return true; 800 } 801 CASE("nth-of-type") { 802 m_parsingMode = NthChildMode; 803 return true; 804 } 805 CASE("nth-last-child") { 806 m_parsingMode = NthChildMode; 807 return true; 808 } 809 CASE("nth-last-of-type") { 810 m_parsingMode = NthChildMode; 811 return true; 812 } 813 } 814 return false; 815 } 816 817 template <typename CharacterType> 818 inline void CSSTokenizer::detectMediaQueryToken(int length) 819 { 820 ASSERT(m_parsingMode == MediaQueryMode); 821 CharacterType* name = tokenStart<CharacterType>(); 822 823 SWITCH(name, length) { 824 CASE("and") { 825 m_token = MEDIA_AND; 826 } 827 CASE("not") { 828 m_token = MEDIA_NOT; 829 } 830 CASE("only") { 831 m_token = MEDIA_ONLY; 832 } 833 CASE("or") { 834 m_token = MEDIA_OR; 835 } 836 } 837 } 838 839 template <typename CharacterType> 840 inline void CSSTokenizer::detectNumberToken(CharacterType* type, int length) 841 { 842 ASSERT(length > 0); 843 844 SWITCH(type, length) { 845 CASE("cm") { 846 m_token = CMS; 847 } 848 CASE("ch") { 849 m_token = CHS; 850 } 851 CASE("deg") { 852 m_token = DEGS; 853 } 854 CASE("dppx") { 855 // There is a discussion about the name of this unit on www-style. 856 // Keep this compile time guard in place until that is resolved. 857 // http://lists.w3.org/Archives/Public/www-style/2012May/0915.html 858 m_token = DPPX; 859 } 860 CASE("dpcm") { 861 m_token = DPCM; 862 } 863 CASE("dpi") { 864 m_token = DPI; 865 } 866 CASE("em") { 867 m_token = EMS; 868 } 869 CASE("ex") { 870 m_token = EXS; 871 } 872 CASE("fr") { 873 m_token = FR; 874 } 875 CASE("grad") { 876 m_token = GRADS; 877 } 878 CASE("hz") { 879 m_token = HERTZ; 880 } 881 CASE("in") { 882 m_token = INS; 883 } 884 CASE("khz") { 885 m_token = KHERTZ; 886 } 887 CASE("mm") { 888 m_token = MMS; 889 } 890 CASE("ms") { 891 m_token = MSECS; 892 } 893 CASE("px") { 894 m_token = PXS; 895 } 896 CASE("pt") { 897 m_token = PTS; 898 } 899 CASE("pc") { 900 m_token = PCS; 901 } 902 CASE("rad") { 903 m_token = RADS; 904 } 905 CASE("rem") { 906 m_token = REMS; 907 } 908 CASE("s") { 909 m_token = SECS; 910 } 911 CASE("turn") { 912 m_token = TURNS; 913 } 914 CASE("vw") { 915 m_token = VW; 916 } 917 CASE("vh") { 918 m_token = VH; 919 } 920 CASE("vmin") { 921 m_token = VMIN; 922 } 923 CASE("vmax") { 924 m_token = VMAX; 925 } 926 CASE("__qem") { 927 m_token = QEMS; 928 } 929 } 930 } 931 932 template <typename CharacterType> 933 inline void CSSTokenizer::detectDashToken(int length) 934 { 935 CharacterType* name = tokenStart<CharacterType>(); 936 937 // Ignore leading dash. 938 ++name; 939 --length; 940 941 SWITCH(name, length) { 942 CASE("webkit-any") { 943 m_token = ANYFUNCTION; 944 } 945 CASE("webkit-calc") { 946 m_token = CALCFUNCTION; 947 } 948 } 949 } 950 951 template <typename CharacterType> 952 inline void CSSTokenizer::detectAtToken(int length, bool hasEscape) 953 { 954 CharacterType* name = tokenStart<CharacterType>(); 955 ASSERT(name[0] == '@' && length >= 2); 956 957 // Ignore leading @. 958 ++name; 959 --length; 960 961 // charset, font-face, import, media, namespace, page, supports, 962 // -webkit-keyframes, keyframes, and -webkit-mediaquery are not affected by hasEscape. 963 SWITCH(name, length) { 964 CASE("bottom-left") { 965 if (LIKELY(!hasEscape)) 966 m_token = BOTTOMLEFT_SYM; 967 } 968 CASE("bottom-right") { 969 if (LIKELY(!hasEscape)) 970 m_token = BOTTOMRIGHT_SYM; 971 } 972 CASE("bottom-center") { 973 if (LIKELY(!hasEscape)) 974 m_token = BOTTOMCENTER_SYM; 975 } 976 CASE("bottom-left-corner") { 977 if (LIKELY(!hasEscape)) 978 m_token = BOTTOMLEFTCORNER_SYM; 979 } 980 CASE("bottom-right-corner") { 981 if (LIKELY(!hasEscape)) 982 m_token = BOTTOMRIGHTCORNER_SYM; 983 } 984 CASE("charset") { 985 if (name - 1 == dataStart<CharacterType>()) 986 m_token = CHARSET_SYM; 987 } 988 CASE("font-face") { 989 m_token = FONT_FACE_SYM; 990 } 991 CASE("import") { 992 m_parsingMode = MediaQueryMode; 993 m_token = IMPORT_SYM; 994 } 995 CASE("keyframes") { 996 if (RuntimeEnabledFeatures::cssAnimationUnprefixedEnabled()) 997 m_token = KEYFRAMES_SYM; 998 } 999 CASE("left-top") { 1000 if (LIKELY(!hasEscape)) 1001 m_token = LEFTTOP_SYM; 1002 } 1003 CASE("left-middle") { 1004 if (LIKELY(!hasEscape)) 1005 m_token = LEFTMIDDLE_SYM; 1006 } 1007 CASE("left-bottom") { 1008 if (LIKELY(!hasEscape)) 1009 m_token = LEFTBOTTOM_SYM; 1010 } 1011 CASE("media") { 1012 m_parsingMode = MediaQueryMode; 1013 m_token = MEDIA_SYM; 1014 } 1015 CASE("namespace") { 1016 m_token = NAMESPACE_SYM; 1017 } 1018 CASE("page") { 1019 m_token = PAGE_SYM; 1020 } 1021 CASE("right-top") { 1022 if (LIKELY(!hasEscape)) 1023 m_token = RIGHTTOP_SYM; 1024 } 1025 CASE("right-middle") { 1026 if (LIKELY(!hasEscape)) 1027 m_token = RIGHTMIDDLE_SYM; 1028 } 1029 CASE("right-bottom") { 1030 if (LIKELY(!hasEscape)) 1031 m_token = RIGHTBOTTOM_SYM; 1032 } 1033 CASE("supports") { 1034 m_parsingMode = SupportsMode; 1035 m_token = SUPPORTS_SYM; 1036 } 1037 CASE("top-left") { 1038 if (LIKELY(!hasEscape)) 1039 m_token = TOPLEFT_SYM; 1040 } 1041 CASE("top-right") { 1042 if (LIKELY(!hasEscape)) 1043 m_token = TOPRIGHT_SYM; 1044 } 1045 CASE("top-center") { 1046 if (LIKELY(!hasEscape)) 1047 m_token = TOPCENTER_SYM; 1048 } 1049 CASE("top-left-corner") { 1050 if (LIKELY(!hasEscape)) 1051 m_token = TOPLEFTCORNER_SYM; 1052 } 1053 CASE("top-right-corner") { 1054 if (LIKELY(!hasEscape)) 1055 m_token = TOPRIGHTCORNER_SYM; 1056 } 1057 CASE("viewport") { 1058 m_token = VIEWPORT_RULE_SYM; 1059 } 1060 CASE("-internal-rule") { 1061 if (LIKELY(!hasEscape && m_internal)) 1062 m_token = INTERNAL_RULE_SYM; 1063 } 1064 CASE("-internal-decls") { 1065 if (LIKELY(!hasEscape && m_internal)) 1066 m_token = INTERNAL_DECLS_SYM; 1067 } 1068 CASE("-internal-value") { 1069 if (LIKELY(!hasEscape && m_internal)) 1070 m_token = INTERNAL_VALUE_SYM; 1071 } 1072 CASE("-webkit-keyframes") { 1073 m_token = WEBKIT_KEYFRAMES_SYM; 1074 } 1075 CASE("-internal-selector") { 1076 if (LIKELY(!hasEscape && m_internal)) 1077 m_token = INTERNAL_SELECTOR_SYM; 1078 } 1079 CASE("-internal-medialist") { 1080 if (!m_internal) 1081 return; 1082 m_parsingMode = MediaQueryMode; 1083 m_token = INTERNAL_MEDIALIST_SYM; 1084 } 1085 CASE("-internal-keyframe-rule") { 1086 if (LIKELY(!hasEscape && m_internal)) 1087 m_token = INTERNAL_KEYFRAME_RULE_SYM; 1088 } 1089 CASE("-internal-keyframe-key-list") { 1090 if (!m_internal) 1091 return; 1092 m_token = INTERNAL_KEYFRAME_KEY_LIST_SYM; 1093 } 1094 CASE("-internal-supports-condition") { 1095 if (!m_internal) 1096 return; 1097 m_parsingMode = SupportsMode; 1098 m_token = INTERNAL_SUPPORTS_CONDITION_SYM; 1099 } 1100 } 1101 } 1102 1103 template <typename CharacterType> 1104 inline void CSSTokenizer::detectSupportsToken(int length) 1105 { 1106 ASSERT(m_parsingMode == SupportsMode); 1107 CharacterType* name = tokenStart<CharacterType>(); 1108 1109 SWITCH(name, length) { 1110 CASE("or") { 1111 m_token = SUPPORTS_OR; 1112 } 1113 CASE("and") { 1114 m_token = SUPPORTS_AND; 1115 } 1116 CASE("not") { 1117 m_token = SUPPORTS_NOT; 1118 } 1119 } 1120 } 1121 1122 template <typename SrcCharacterType> 1123 int CSSTokenizer::realLex(void* yylvalWithoutType) 1124 { 1125 YYSTYPE* yylval = static_cast<YYSTYPE*>(yylvalWithoutType); 1126 // Write pointer for the next character. 1127 SrcCharacterType* result; 1128 CSSParserString resultString; 1129 bool hasEscape; 1130 1131 // The input buffer is terminated by a \0 character, so 1132 // it is safe to read one character ahead of a known non-null. 1133 #ifndef NDEBUG 1134 // In debug we check with an ASSERT that the length is > 0 for string types. 1135 yylval->string.clear(); 1136 #endif 1137 1138 restartAfterComment: 1139 result = currentCharacter<SrcCharacterType>(); 1140 setTokenStart(result); 1141 m_tokenStartLineNumber = m_lineNumber; 1142 m_token = *currentCharacter<SrcCharacterType>(); 1143 ++currentCharacter<SrcCharacterType>(); 1144 1145 switch ((m_token <= 127) ? typesOfASCIICharacters[m_token] : CharacterIdentifierStart) { 1146 case CharacterCaselessU: 1147 if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '+')) { 1148 if (parseUnicodeRange<SrcCharacterType>()) { 1149 m_token = UNICODERANGE; 1150 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); 1151 break; 1152 } 1153 } 1154 // Fall through to CharacterIdentifierStart. 1155 1156 case CharacterIdentifierStart: 1157 --currentCharacter<SrcCharacterType>(); 1158 parseIdentifier(result, yylval->string, hasEscape); 1159 m_token = IDENT; 1160 1161 if (UNLIKELY(*currentCharacter<SrcCharacterType>() == '(')) { 1162 if (m_parsingMode == SupportsMode && !hasEscape) { 1163 detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>()); 1164 if (m_token != IDENT) 1165 break; 1166 } 1167 1168 m_token = FUNCTION; 1169 if (!hasEscape) 1170 detectFunctionTypeToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>()); 1171 1172 // Skip parenthesis 1173 ++currentCharacter<SrcCharacterType>(); 1174 ++result; 1175 ++yylval->string.m_length; 1176 1177 if (m_token == URI) { 1178 m_token = FUNCTION; 1179 // Check whether it is really an URI. 1180 if (yylval->string.is8Bit()) 1181 parseURI<LChar>(yylval->string); 1182 else 1183 parseURI<UChar>(yylval->string); 1184 } 1185 } else if (UNLIKELY(m_parsingMode != NormalMode) && !hasEscape) { 1186 if (m_parsingMode == MediaQueryMode) { 1187 detectMediaQueryToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>()); 1188 } else if (m_parsingMode == SupportsMode) { 1189 detectSupportsToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>()); 1190 } else if (m_parsingMode == NthChildMode && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[0], 'n')) { 1191 if (result - tokenStart<SrcCharacterType>() == 1) { 1192 // String "n" is IDENT but "n+1" is NTH. 1193 if (parseNthChildExtra<SrcCharacterType>()) { 1194 m_token = NTH; 1195 yylval->string.m_length = currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>(); 1196 } 1197 } else if (result - tokenStart<SrcCharacterType>() >= 2 && tokenStart<SrcCharacterType>()[1] == '-') { 1198 // String "n-" is IDENT but "n-1" is NTH. 1199 // Set currentCharacter to '-' to continue parsing. 1200 SrcCharacterType* nextCharacter = result; 1201 currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 1; 1202 if (parseNthChildExtra<SrcCharacterType>()) { 1203 m_token = NTH; 1204 yylval->string.setLength(currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); 1205 } else { 1206 // Revert the change to currentCharacter if unsuccessful. 1207 currentCharacter<SrcCharacterType>() = nextCharacter; 1208 } 1209 } 1210 } 1211 } 1212 break; 1213 1214 case CharacterDot: 1215 if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0])) 1216 break; 1217 // Fall through to CharacterNumber. 1218 1219 case CharacterNumber: { 1220 bool dotSeen = (m_token == '.'); 1221 1222 while (true) { 1223 if (!isASCIIDigit(currentCharacter<SrcCharacterType>()[0])) { 1224 // Only one dot is allowed for a number, 1225 // and it must be followed by a digit. 1226 if (currentCharacter<SrcCharacterType>()[0] != '.' || dotSeen || !isASCIIDigit(currentCharacter<SrcCharacterType>()[1])) 1227 break; 1228 dotSeen = true; 1229 } 1230 ++currentCharacter<SrcCharacterType>(); 1231 } 1232 1233 if (UNLIKELY(m_parsingMode == NthChildMode) && !dotSeen && isASCIIAlphaCaselessEqual(*currentCharacter<SrcCharacterType>(), 'n')) { 1234 // "[0-9]+n" is always an NthChild. 1235 ++currentCharacter<SrcCharacterType>(); 1236 parseNthChildExtra<SrcCharacterType>(); 1237 m_token = NTH; 1238 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); 1239 break; 1240 } 1241 1242 // Use SVG parser for numbers on SVG presentation attributes. 1243 if (isSVGNumberParsingEnabledForMode(m_parser.m_context.mode())) { 1244 // We need to take care of units like 'em' or 'ex'. 1245 SrcCharacterType* character = currentCharacter<SrcCharacterType>(); 1246 if (isASCIIAlphaCaselessEqual(*character, 'e')) { 1247 ASSERT(character - tokenStart<SrcCharacterType>() > 0); 1248 ++character; 1249 if (*character == '-' || *character == '+' || isASCIIDigit(*character)) { 1250 ++character; 1251 while (isASCIIDigit(*character)) 1252 ++character; 1253 // Use FLOATTOKEN if the string contains exponents. 1254 dotSeen = true; 1255 currentCharacter<SrcCharacterType>() = character; 1256 } 1257 } 1258 if (!parseSVGNumber(tokenStart<SrcCharacterType>(), character - tokenStart<SrcCharacterType>(), yylval->number)) 1259 break; 1260 } else { 1261 yylval->number = charactersToDouble(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); 1262 } 1263 1264 // Type of the function. 1265 if (isIdentifierStart<SrcCharacterType>()) { 1266 SrcCharacterType* type = currentCharacter<SrcCharacterType>(); 1267 result = currentCharacter<SrcCharacterType>(); 1268 1269 parseIdentifier(result, resultString, hasEscape); 1270 1271 m_token = DIMEN; 1272 if (!hasEscape) 1273 detectNumberToken(type, currentCharacter<SrcCharacterType>() - type); 1274 1275 if (m_token == DIMEN) { 1276 // The decoded number is overwritten, but this is intentional. 1277 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); 1278 } 1279 } else if (*currentCharacter<SrcCharacterType>() == '%') { 1280 // Although the CSS grammar says {num}% we follow 1281 // webkit at the moment which uses {num}%+. 1282 do { 1283 ++currentCharacter<SrcCharacterType>(); 1284 } while (*currentCharacter<SrcCharacterType>() == '%'); 1285 m_token = PERCENTAGE; 1286 } else { 1287 m_token = dotSeen ? FLOATTOKEN : INTEGER; 1288 } 1289 break; 1290 } 1291 1292 case CharacterDash: 1293 if (isIdentifierStartAfterDash(currentCharacter<SrcCharacterType>())) { 1294 --currentCharacter<SrcCharacterType>(); 1295 parseIdentifier(result, resultString, hasEscape); 1296 m_token = IDENT; 1297 1298 if (*currentCharacter<SrcCharacterType>() == '(') { 1299 m_token = FUNCTION; 1300 if (!hasEscape) 1301 detectDashToken<SrcCharacterType>(result - tokenStart<SrcCharacterType>()); 1302 ++currentCharacter<SrcCharacterType>(); 1303 ++result; 1304 } else if (UNLIKELY(m_parsingMode == NthChildMode) && !hasEscape && isASCIIAlphaCaselessEqual(tokenStart<SrcCharacterType>()[1], 'n')) { 1305 if (result - tokenStart<SrcCharacterType>() == 2) { 1306 // String "-n" is IDENT but "-n+1" is NTH. 1307 if (parseNthChildExtra<SrcCharacterType>()) { 1308 m_token = NTH; 1309 result = currentCharacter<SrcCharacterType>(); 1310 } 1311 } else if (result - tokenStart<SrcCharacterType>() >= 3 && tokenStart<SrcCharacterType>()[2] == '-') { 1312 // String "-n-" is IDENT but "-n-1" is NTH. 1313 // Set currentCharacter to second '-' of '-n-' to continue parsing. 1314 SrcCharacterType* nextCharacter = result; 1315 currentCharacter<SrcCharacterType>() = tokenStart<SrcCharacterType>() + 2; 1316 if (parseNthChildExtra<SrcCharacterType>()) { 1317 m_token = NTH; 1318 result = currentCharacter<SrcCharacterType>(); 1319 } else { 1320 // Revert the change to currentCharacter if unsuccessful. 1321 currentCharacter<SrcCharacterType>() = nextCharacter; 1322 } 1323 } 1324 } 1325 resultString.setLength(result - tokenStart<SrcCharacterType>()); 1326 yylval->string = resultString; 1327 } else if (currentCharacter<SrcCharacterType>()[0] == '-' && currentCharacter<SrcCharacterType>()[1] == '>') { 1328 currentCharacter<SrcCharacterType>() += 2; 1329 m_token = SGML_CD; 1330 } else if (UNLIKELY(m_parsingMode == NthChildMode)) { 1331 // "-[0-9]+n" is always an NthChild. 1332 if (parseNthChild<SrcCharacterType>()) { 1333 parseNthChildExtra<SrcCharacterType>(); 1334 m_token = NTH; 1335 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); 1336 } 1337 } 1338 break; 1339 1340 case CharacterOther: 1341 // m_token is simply the current character. 1342 break; 1343 1344 case CharacterNull: 1345 // Do not advance pointer at the end of input. 1346 --currentCharacter<SrcCharacterType>(); 1347 break; 1348 1349 case CharacterWhiteSpace: 1350 m_token = WHITESPACE; 1351 // Might start with a '\n'. 1352 --currentCharacter<SrcCharacterType>(); 1353 do { 1354 if (*currentCharacter<SrcCharacterType>() == '\n') 1355 ++m_lineNumber; 1356 ++currentCharacter<SrcCharacterType>(); 1357 } while (*currentCharacter<SrcCharacterType>() <= ' ' && (typesOfASCIICharacters[*currentCharacter<SrcCharacterType>()] == CharacterWhiteSpace)); 1358 break; 1359 1360 case CharacterEndMediaQueryOrSupports: 1361 if (m_parsingMode == MediaQueryMode || m_parsingMode == SupportsMode) 1362 m_parsingMode = NormalMode; 1363 break; 1364 1365 case CharacterEndNthChild: 1366 if (m_parsingMode == NthChildMode) 1367 m_parsingMode = NormalMode; 1368 break; 1369 1370 case CharacterQuote: 1371 if (checkAndSkipString(currentCharacter<SrcCharacterType>(), m_token, AbortIfInvalid)) { 1372 ++result; 1373 parseString<SrcCharacterType>(result, yylval->string, m_token); 1374 m_token = STRING; 1375 } 1376 break; 1377 1378 case CharacterExclamationMark: { 1379 SrcCharacterType* start = skipWhiteSpace(currentCharacter<SrcCharacterType>()); 1380 if (isEqualToCSSIdentifier(start, "important")) { 1381 m_token = IMPORTANT_SYM; 1382 currentCharacter<SrcCharacterType>() = start + 9; 1383 } 1384 break; 1385 } 1386 1387 case CharacterHashmark: { 1388 SrcCharacterType* start = currentCharacter<SrcCharacterType>(); 1389 result = currentCharacter<SrcCharacterType>(); 1390 1391 if (isASCIIDigit(*currentCharacter<SrcCharacterType>())) { 1392 // This must be a valid hex number token. 1393 do { 1394 ++currentCharacter<SrcCharacterType>(); 1395 } while (isASCIIHexDigit(*currentCharacter<SrcCharacterType>())); 1396 m_token = HEX; 1397 yylval->string.init(start, currentCharacter<SrcCharacterType>() - start); 1398 } else if (isIdentifierStart<SrcCharacterType>()) { 1399 m_token = IDSEL; 1400 parseIdentifier(result, yylval->string, hasEscape); 1401 if (!hasEscape) { 1402 // Check whether the identifier is also a valid hex number. 1403 SrcCharacterType* current = start; 1404 m_token = HEX; 1405 do { 1406 if (!isASCIIHexDigit(*current)) { 1407 m_token = IDSEL; 1408 break; 1409 } 1410 ++current; 1411 } while (current < result); 1412 } 1413 } 1414 break; 1415 } 1416 1417 case CharacterSlash: 1418 // Ignore comments. They are not even considered as white spaces. 1419 if (*currentCharacter<SrcCharacterType>() == '*') { 1420 const CSSParserLocation startLocation = currentLocation(); 1421 if (m_parser.m_observer) { 1422 unsigned startOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>() - 1; // Start with a slash. 1423 m_parser.m_observer->startComment(startOffset - m_parsedTextPrefixLength); 1424 } 1425 ++currentCharacter<SrcCharacterType>(); 1426 while (currentCharacter<SrcCharacterType>()[0] != '*' || currentCharacter<SrcCharacterType>()[1] != '/') { 1427 if (*currentCharacter<SrcCharacterType>() == '\n') 1428 ++m_lineNumber; 1429 if (*currentCharacter<SrcCharacterType>() == '\0') { 1430 // Unterminated comments are simply ignored. 1431 currentCharacter<SrcCharacterType>() -= 2; 1432 m_parser.reportError(startLocation, UnterminatedCommentCSSError); 1433 break; 1434 } 1435 ++currentCharacter<SrcCharacterType>(); 1436 } 1437 currentCharacter<SrcCharacterType>() += 2; 1438 if (m_parser.m_observer) { 1439 unsigned endOffset = currentCharacter<SrcCharacterType>() - dataStart<SrcCharacterType>(); 1440 unsigned userTextEndOffset = static_cast<unsigned>(m_length - 1 - m_parsedTextSuffixLength); 1441 m_parser.m_observer->endComment(std::min(endOffset, userTextEndOffset) - m_parsedTextPrefixLength); 1442 } 1443 goto restartAfterComment; 1444 } 1445 break; 1446 1447 case CharacterDollar: 1448 if (*currentCharacter<SrcCharacterType>() == '=') { 1449 ++currentCharacter<SrcCharacterType>(); 1450 m_token = ENDSWITH; 1451 } 1452 break; 1453 1454 case CharacterAsterisk: 1455 if (*currentCharacter<SrcCharacterType>() == '=') { 1456 ++currentCharacter<SrcCharacterType>(); 1457 m_token = CONTAINS; 1458 } 1459 break; 1460 1461 case CharacterPlus: 1462 if (UNLIKELY(m_parsingMode == NthChildMode)) { 1463 // Simplest case. "+[0-9]*n" is always NthChild. 1464 if (parseNthChild<SrcCharacterType>()) { 1465 parseNthChildExtra<SrcCharacterType>(); 1466 m_token = NTH; 1467 yylval->string.init(tokenStart<SrcCharacterType>(), currentCharacter<SrcCharacterType>() - tokenStart<SrcCharacterType>()); 1468 } 1469 } 1470 break; 1471 1472 case CharacterLess: 1473 if (currentCharacter<SrcCharacterType>()[0] == '!' && currentCharacter<SrcCharacterType>()[1] == '-' && currentCharacter<SrcCharacterType>()[2] == '-') { 1474 currentCharacter<SrcCharacterType>() += 3; 1475 m_token = SGML_CD; 1476 } 1477 break; 1478 1479 case CharacterAt: 1480 if (isIdentifierStart<SrcCharacterType>()) { 1481 m_token = ATKEYWORD; 1482 ++result; 1483 parseIdentifier(result, resultString, hasEscape); 1484 // The standard enables unicode escapes in at-rules. In this case only the resultString will contain the 1485 // correct identifier, hence we have to use it to determine its length instead of the usual pointer arithmetic. 1486 detectAtToken<SrcCharacterType>(resultString.length() + 1, hasEscape); 1487 } 1488 break; 1489 1490 case CharacterBackSlash: 1491 if (isCSSEscape(*currentCharacter<SrcCharacterType>())) { 1492 --currentCharacter<SrcCharacterType>(); 1493 parseIdentifier(result, yylval->string, hasEscape); 1494 m_token = IDENT; 1495 } 1496 break; 1497 1498 case CharacterXor: 1499 if (*currentCharacter<SrcCharacterType>() == '=') { 1500 ++currentCharacter<SrcCharacterType>(); 1501 m_token = BEGINSWITH; 1502 } 1503 break; 1504 1505 case CharacterVerticalBar: 1506 if (*currentCharacter<SrcCharacterType>() == '=') { 1507 ++currentCharacter<SrcCharacterType>(); 1508 m_token = DASHMATCH; 1509 } 1510 break; 1511 1512 case CharacterTilde: 1513 if (*currentCharacter<SrcCharacterType>() == '=') { 1514 ++currentCharacter<SrcCharacterType>(); 1515 m_token = INCLUDES; 1516 } 1517 break; 1518 1519 default: 1520 ASSERT_NOT_REACHED(); 1521 break; 1522 } 1523 1524 return m_token; 1525 } 1526 1527 template <> 1528 inline void CSSTokenizer::setTokenStart<LChar>(LChar* tokenStart) 1529 { 1530 m_tokenStart.ptr8 = tokenStart; 1531 } 1532 1533 template <> 1534 inline void CSSTokenizer::setTokenStart<UChar>(UChar* tokenStart) 1535 { 1536 m_tokenStart.ptr16 = tokenStart; 1537 } 1538 1539 void CSSTokenizer::setupTokenizer(const char* prefix, unsigned prefixLength, const String& string, const char* suffix, unsigned suffixLength) 1540 { 1541 m_parsedTextPrefixLength = prefixLength; 1542 m_parsedTextSuffixLength = suffixLength; 1543 unsigned stringLength = string.length(); 1544 unsigned length = stringLength + m_parsedTextPrefixLength + m_parsedTextSuffixLength + 1; 1545 m_length = length; 1546 1547 if (!stringLength || string.is8Bit()) { 1548 m_dataStart8 = adoptArrayPtr(new LChar[length]); 1549 for (unsigned i = 0; i < m_parsedTextPrefixLength; i++) 1550 m_dataStart8[i] = prefix[i]; 1551 1552 if (stringLength) 1553 memcpy(m_dataStart8.get() + m_parsedTextPrefixLength, string.characters8(), stringLength * sizeof(LChar)); 1554 1555 unsigned start = m_parsedTextPrefixLength + stringLength; 1556 unsigned end = start + suffixLength; 1557 for (unsigned i = start; i < end; i++) 1558 m_dataStart8[i] = suffix[i - start]; 1559 1560 m_dataStart8[length - 1] = 0; 1561 1562 m_is8BitSource = true; 1563 m_currentCharacter8 = m_dataStart8.get(); 1564 m_currentCharacter16 = 0; 1565 setTokenStart<LChar>(m_currentCharacter8); 1566 m_lexFunc = &CSSTokenizer::realLex<LChar>; 1567 return; 1568 } 1569 1570 m_dataStart16 = adoptArrayPtr(new UChar[length]); 1571 for (unsigned i = 0; i < m_parsedTextPrefixLength; i++) 1572 m_dataStart16[i] = prefix[i]; 1573 1574 ASSERT(stringLength); 1575 memcpy(m_dataStart16.get() + m_parsedTextPrefixLength, string.characters16(), stringLength * sizeof(UChar)); 1576 1577 unsigned start = m_parsedTextPrefixLength + stringLength; 1578 unsigned end = start + suffixLength; 1579 for (unsigned i = start; i < end; i++) 1580 m_dataStart16[i] = suffix[i - start]; 1581 1582 m_dataStart16[length - 1] = 0; 1583 1584 m_is8BitSource = false; 1585 m_currentCharacter8 = 0; 1586 m_currentCharacter16 = m_dataStart16.get(); 1587 setTokenStart<UChar>(m_currentCharacter16); 1588 m_lexFunc = &CSSTokenizer::realLex<UChar>; 1589 } 1590 1591 } // namespace WebCore 1592