1 /* 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 #include "config.h" 28 #include "PreloadScanner.h" 29 30 #include "AtomicString.h" 31 #include "CachedCSSStyleSheet.h" 32 #include "CachedImage.h" 33 #include "CachedResource.h" 34 #include "CachedResourceClient.h" 35 #include "CachedScript.h" 36 #include "CSSHelper.h" 37 #include "CString.h" 38 #include "DocLoader.h" 39 #include "Document.h" 40 #include "Frame.h" 41 #include "FrameLoader.h" 42 #include "HTMLLinkElement.h" 43 #include "HTMLNames.h" 44 #include <wtf/CurrentTime.h> 45 #include <wtf/unicode/Unicode.h> 46 47 // Use __GNUC__ instead of PLATFORM(GCC) to stay consistent with the gperf generated c file 48 #ifdef __GNUC__ 49 // The main tokenizer includes this too so we are getting two copies of the data. However, this way the code gets inlined. 50 #include "HTMLEntityNames.c" 51 #else 52 // Not inlined for non-GCC compilers 53 struct Entity { 54 const char* name; 55 int code; 56 }; 57 const struct Entity* findEntity(register const char* str, register unsigned int len); 58 #endif 59 60 #define PRELOAD_DEBUG 0 61 62 using namespace WTF; 63 64 namespace WebCore { 65 66 using namespace HTMLNames; 67 68 PreloadScanner::PreloadScanner(Document* doc) 69 : m_inProgress(false) 70 , m_timeUsed(0) 71 , m_bodySeen(false) 72 , m_document(doc) 73 { 74 #if PRELOAD_DEBUG 75 printf("CREATING PRELOAD SCANNER FOR %s\n", m_document->url().string().latin1().data()); 76 #endif 77 } 78 79 PreloadScanner::~PreloadScanner() 80 { 81 #if PRELOAD_DEBUG 82 printf("DELETING PRELOAD SCANNER FOR %s\n", m_document->url().string().latin1().data()); 83 printf("TOTAL TIME USED %.4fs\n", m_timeUsed); 84 #endif 85 } 86 87 void PreloadScanner::begin() 88 { 89 ASSERT(!m_inProgress); 90 reset(); 91 m_inProgress = true; 92 } 93 94 void PreloadScanner::end() 95 { 96 ASSERT(m_inProgress); 97 m_inProgress = false; 98 } 99 100 void PreloadScanner::reset() 101 { 102 m_source.clear(); 103 104 m_state = Data; 105 m_escape = false; 106 m_contentModel = PCDATA; 107 m_commentPos = 0; 108 109 m_closeTag = false; 110 m_tagName.clear(); 111 m_attributeName.clear(); 112 m_attributeValue.clear(); 113 m_lastStartTag = AtomicString(); 114 115 m_urlToLoad = String(); 116 m_charset = String(); 117 m_linkIsStyleSheet = false; 118 m_lastCharacterIndex = 0; 119 clearLastCharacters(); 120 121 m_cssState = CSSInitial; 122 m_cssRule.clear(); 123 m_cssRuleValue.clear(); 124 } 125 126 bool PreloadScanner::scanningBody() const 127 { 128 return m_document->body() || m_bodySeen; 129 } 130 131 void PreloadScanner::write(const SegmentedString& source) 132 { 133 #if PRELOAD_DEBUG 134 double startTime = currentTime(); 135 #endif 136 tokenize(source); 137 #if PRELOAD_DEBUG 138 m_timeUsed += currentTime() - startTime; 139 #endif 140 } 141 142 static inline bool isWhitespace(UChar c) 143 { 144 return c == ' ' || c == '\n' || c == '\r' || c == '\t'; 145 } 146 147 inline void PreloadScanner::clearLastCharacters() 148 { 149 memset(m_lastCharacters, 0, lastCharactersBufferSize * sizeof(UChar)); 150 } 151 152 inline void PreloadScanner::rememberCharacter(UChar c) 153 { 154 m_lastCharacterIndex = (m_lastCharacterIndex + 1) % lastCharactersBufferSize; 155 m_lastCharacters[m_lastCharacterIndex] = c; 156 } 157 158 inline bool PreloadScanner::lastCharactersMatch(const char* chars, unsigned count) const 159 { 160 unsigned pos = m_lastCharacterIndex; 161 while (count) { 162 if (chars[count - 1] != m_lastCharacters[pos]) 163 return false; 164 --count; 165 if (!pos) 166 pos = lastCharactersBufferSize; 167 --pos; 168 } 169 return true; 170 } 171 172 static inline unsigned legalEntityFor(unsigned value) 173 { 174 // FIXME There is a table for more exceptions in the HTML5 specification. 175 if (value == 0 || value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF)) 176 return 0xFFFD; 177 return value; 178 } 179 180 unsigned PreloadScanner::consumeEntity(SegmentedString& source, bool& notEnoughCharacters) 181 { 182 enum EntityState { 183 Initial, 184 NumberType, 185 MaybeHex, 186 Hex, 187 Decimal, 188 Named 189 }; 190 EntityState entityState = Initial; 191 unsigned result = 0; 192 Vector<UChar, 10> seenChars; 193 Vector<char, 10> entityName; 194 195 while (!source.isEmpty()) { 196 UChar cc = *source; 197 seenChars.append(cc); 198 switch (entityState) { 199 case Initial: 200 if (isWhitespace(cc) || cc == '<' || cc == '&') 201 return 0; 202 else if (cc == '#') 203 entityState = NumberType; 204 else if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) { 205 entityName.append(cc); 206 entityState = Named; 207 } else 208 return 0; 209 break; 210 case NumberType: 211 if (cc == 'x' || cc == 'X') 212 entityState = MaybeHex; 213 else if (cc >= '0' && cc <= '9') { 214 entityState = Decimal; 215 result = cc - '0'; 216 } else { 217 source.push('#'); 218 return 0; 219 } 220 break; 221 case MaybeHex: 222 if (cc >= '0' && cc <= '9') 223 result = cc - '0'; 224 else if (cc >= 'a' && cc <= 'f') 225 result = 10 + cc - 'a'; 226 else if (cc >= 'A' && cc <= 'F') 227 result = 10 + cc - 'A'; 228 else { 229 source.push('#'); 230 source.push(seenChars[1]); 231 return 0; 232 } 233 entityState = Hex; 234 break; 235 case Hex: 236 if (cc >= '0' && cc <= '9') 237 result = result * 16 + cc - '0'; 238 else if (cc >= 'a' && cc <= 'f') 239 result = result * 16 + 10 + cc - 'a'; 240 else if (cc >= 'A' && cc <= 'F') 241 result = result * 16 + 10 + cc - 'A'; 242 else if (cc == ';') { 243 source.advance(); 244 return legalEntityFor(result); 245 } else 246 return legalEntityFor(result); 247 break; 248 case Decimal: 249 if (cc >= '0' && cc <= '9') 250 result = result * 10 + cc - '0'; 251 else if (cc == ';') { 252 source.advance(); 253 return legalEntityFor(result); 254 } else 255 return legalEntityFor(result); 256 break; 257 case Named: 258 // This is the attribute only version, generic version matches somewhat differently 259 while (entityName.size() <= 8) { 260 if (cc == ';') { 261 const Entity* entity = findEntity(entityName.data(), entityName.size()); 262 if (entity) { 263 source.advance(); 264 return entity->code; 265 } 266 break; 267 } 268 if (!(cc >= 'a' && cc <= 'z') && !(cc >= 'A' && cc <= 'Z') && !(cc >= '0' && cc <= '9')) { 269 const Entity* entity = findEntity(entityName.data(), entityName.size()); 270 if (entity) 271 return entity->code; 272 break; 273 } 274 entityName.append(cc); 275 source.advance(); 276 if (source.isEmpty()) 277 goto outOfCharacters; 278 cc = *source; 279 seenChars.append(cc); 280 } 281 if (seenChars.size() == 2) 282 source.push(seenChars[0]); 283 else if (seenChars.size() == 3) { 284 source.push(seenChars[0]); 285 source.push(seenChars[1]); 286 } else 287 source.prepend(SegmentedString(String(seenChars.data(), seenChars.size() - 1))); 288 return 0; 289 } 290 source.advance(); 291 } 292 outOfCharacters: 293 notEnoughCharacters = true; 294 source.prepend(SegmentedString(String(seenChars.data(), seenChars.size()))); 295 return 0; 296 } 297 298 void PreloadScanner::tokenize(const SegmentedString& source) 299 { 300 ASSERT(m_inProgress); 301 302 m_source.append(source); 303 304 // This is a simplified HTML5 Tokenizer 305 // http://www.whatwg.org/specs/web-apps/current-work/#tokenisation0 306 while (!m_source.isEmpty()) { 307 UChar cc = *m_source; 308 switch (m_state) { 309 case Data: 310 while (1) { 311 rememberCharacter(cc); 312 if (cc == '&') { 313 if (m_contentModel == PCDATA || m_contentModel == RCDATA) { 314 m_state = EntityData; 315 break; 316 } 317 } else if (cc == '-') { 318 if ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape) { 319 if (lastCharactersMatch("<!--", 4)) 320 m_escape = true; 321 } 322 } else if (cc == '<') { 323 if (m_contentModel == PCDATA || ((m_contentModel == RCDATA || m_contentModel == CDATA) && !m_escape)) { 324 m_state = TagOpen; 325 break; 326 } 327 } else if (cc == '>') { 328 if ((m_contentModel == RCDATA || m_contentModel == CDATA) && m_escape) { 329 if (lastCharactersMatch("-->", 3)) 330 m_escape = false; 331 } 332 } 333 emitCharacter(cc); 334 m_source.advance(); 335 if (m_source.isEmpty()) 336 return; 337 cc = *m_source; 338 } 339 break; 340 case EntityData: 341 // should try to consume the entity but we only care about entities in attributes 342 m_state = Data; 343 break; 344 case TagOpen: 345 if (m_contentModel == RCDATA || m_contentModel == CDATA) { 346 if (cc == '/') 347 m_state = CloseTagOpen; 348 else { 349 m_state = Data; 350 continue; 351 } 352 } else if (m_contentModel == PCDATA) { 353 if (cc == '!') 354 m_state = MarkupDeclarationOpen; 355 else if (cc == '/') 356 m_state = CloseTagOpen; 357 else if (cc >= 'A' && cc <= 'Z') { 358 m_tagName.clear(); 359 m_charset = String(); 360 m_tagName.append(cc + 0x20); 361 m_closeTag = false; 362 m_state = TagName; 363 } else if (cc >= 'a' && cc <= 'z') { 364 m_tagName.clear(); 365 m_charset = String(); 366 m_tagName.append(cc); 367 m_closeTag = false; 368 m_state = TagName; 369 } else if (cc == '>') { 370 m_state = Data; 371 } else if (cc == '?') { 372 m_state = BogusComment; 373 } else { 374 m_state = Data; 375 continue; 376 } 377 } 378 break; 379 case CloseTagOpen: 380 if (m_contentModel == RCDATA || m_contentModel == CDATA) { 381 if (!m_lastStartTag.length()) { 382 m_state = Data; 383 continue; 384 } 385 if (m_source.length() < m_lastStartTag.length() + 1) 386 return; 387 Vector<UChar> tmpString; 388 UChar tmpChar = 0; 389 bool match = true; 390 for (unsigned n = 0; n < m_lastStartTag.length() + 1; n++) { 391 tmpChar = Unicode::toLower(*m_source); 392 if (n < m_lastStartTag.length() && tmpChar != m_lastStartTag[n]) 393 match = false; 394 tmpString.append(tmpChar); 395 m_source.advance(); 396 } 397 m_source.prepend(SegmentedString(String(tmpString.data(), tmpString.size()))); 398 if (!match || (!isWhitespace(tmpChar) && tmpChar != '>' && tmpChar != '/')) { 399 m_state = Data; 400 continue; 401 } 402 } 403 if (cc >= 'A' && cc <= 'Z') { 404 m_tagName.clear(); 405 m_charset = String(); 406 m_tagName.append(cc + 0x20); 407 m_closeTag = true; 408 m_state = TagName; 409 } else if (cc >= 'a' && cc <= 'z') { 410 m_tagName.clear(); 411 m_charset = String(); 412 m_tagName.append(cc); 413 m_closeTag = true; 414 m_state = TagName; 415 } else if (cc == '>') { 416 m_state = Data; 417 } else 418 m_state = BogusComment; 419 break; 420 case TagName: 421 while (1) { 422 if (isWhitespace(cc)) { 423 m_state = BeforeAttributeName; 424 break; 425 } 426 if (cc == '>') { 427 emitTag(); 428 m_state = Data; 429 break; 430 } 431 if (cc == '/') { 432 m_state = BeforeAttributeName; 433 break; 434 } 435 if (cc >= 'A' && cc <= 'Z') 436 m_tagName.append(cc + 0x20); 437 else 438 m_tagName.append(cc); 439 m_source.advance(); 440 if (m_source.isEmpty()) 441 return; 442 cc = *m_source; 443 } 444 break; 445 case BeforeAttributeName: 446 if (isWhitespace(cc)) 447 ; 448 else if (cc == '>') { 449 emitTag(); 450 m_state = Data; 451 } else if (cc >= 'A' && cc <= 'Z') { 452 m_attributeName.clear(); 453 m_attributeValue.clear(); 454 m_attributeName.append(cc + 0x20); 455 m_state = AttributeName; 456 } else if (cc == '/') 457 ; 458 else { 459 m_attributeName.clear(); 460 m_attributeValue.clear(); 461 m_attributeName.append(cc); 462 m_state = AttributeName; 463 } 464 break; 465 case AttributeName: 466 while (1) { 467 if (isWhitespace(cc)) { 468 m_state = AfterAttributeName; 469 break; 470 } 471 if (cc == '=') { 472 m_state = BeforeAttributeValue; 473 break; 474 } 475 if (cc == '>') { 476 emitTag(); 477 m_state = Data; 478 break; 479 } 480 if (cc == '/') { 481 m_state = BeforeAttributeName; 482 break; 483 } 484 if (cc >= 'A' && cc <= 'Z') 485 m_attributeName.append(cc + 0x20); 486 else 487 m_attributeName.append(cc); 488 m_source.advance(); 489 if (m_source.isEmpty()) 490 return; 491 cc = *m_source; 492 } 493 break; 494 case AfterAttributeName: 495 if (isWhitespace(cc)) 496 ; 497 else if (cc == '=') 498 m_state = BeforeAttributeValue; 499 else if (cc == '>') { 500 emitTag(); 501 m_state = Data; 502 } else if (cc >= 'A' && cc <= 'Z') { 503 m_attributeName.clear(); 504 m_attributeValue.clear(); 505 m_attributeName.append(cc + 0x20); 506 m_state = AttributeName; 507 } else if (cc == '/') 508 m_state = BeforeAttributeName; 509 else { 510 m_attributeName.clear(); 511 m_attributeValue.clear(); 512 m_attributeName.append(cc); 513 m_state = AttributeName; 514 } 515 break; 516 case BeforeAttributeValue: 517 if (isWhitespace(cc)) 518 ; 519 else if (cc == '"') 520 m_state = AttributeValueDoubleQuoted; 521 else if (cc == '&') { 522 m_state = AttributeValueUnquoted; 523 continue; 524 } else if (cc == '\'') 525 m_state = AttributeValueSingleQuoted; 526 else if (cc == '>') { 527 emitTag(); 528 m_state = Data; 529 } else { 530 m_attributeValue.append(cc); 531 m_state = AttributeValueUnquoted; 532 } 533 break; 534 case AttributeValueDoubleQuoted: 535 while (1) { 536 if (cc == '"') { 537 processAttribute(); 538 m_state = BeforeAttributeName; 539 break; 540 } 541 if (cc == '&') { 542 m_stateBeforeEntityInAttributeValue = m_state; 543 m_state = EntityInAttributeValue; 544 break; 545 } 546 m_attributeValue.append(cc); 547 m_source.advance(); 548 if (m_source.isEmpty()) 549 return; 550 cc = *m_source; 551 } 552 break; 553 case AttributeValueSingleQuoted: 554 while (1) { 555 if (cc == '\'') { 556 processAttribute(); 557 m_state = BeforeAttributeName; 558 break; 559 } 560 if (cc == '&') { 561 m_stateBeforeEntityInAttributeValue = m_state; 562 m_state = EntityInAttributeValue; 563 break; 564 } 565 m_attributeValue.append(cc); 566 m_source.advance(); 567 if (m_source.isEmpty()) 568 return; 569 cc = *m_source; 570 } 571 break; 572 case AttributeValueUnquoted: 573 while (1) { 574 if (isWhitespace(cc)) { 575 processAttribute(); 576 m_state = BeforeAttributeName; 577 break; 578 } 579 if (cc == '&') { 580 m_stateBeforeEntityInAttributeValue = m_state; 581 m_state = EntityInAttributeValue; 582 break; 583 } 584 if (cc == '>') { 585 processAttribute(); 586 emitTag(); 587 m_state = Data; 588 break; 589 } 590 m_attributeValue.append(cc); 591 m_source.advance(); 592 if (m_source.isEmpty()) 593 return; 594 cc = *m_source; 595 } 596 break; 597 case EntityInAttributeValue: 598 { 599 bool notEnoughCharacters = false; 600 unsigned entity = consumeEntity(m_source, notEnoughCharacters); 601 if (notEnoughCharacters) 602 return; 603 if (entity > 0xFFFF) { 604 m_attributeValue.append(U16_LEAD(entity)); 605 m_attributeValue.append(U16_TRAIL(entity)); 606 } else if (entity) 607 m_attributeValue.append(entity); 608 else 609 m_attributeValue.append('&'); 610 } 611 m_state = m_stateBeforeEntityInAttributeValue; 612 continue; 613 case BogusComment: 614 while (1) { 615 if (cc == '>') { 616 m_state = Data; 617 break; 618 } 619 m_source.advance(); 620 if (m_source.isEmpty()) 621 return; 622 cc = *m_source; 623 } 624 break; 625 case MarkupDeclarationOpen: { 626 if (cc == '-') { 627 if (m_source.length() < 2) 628 return; 629 m_source.advance(); 630 cc = *m_source; 631 if (cc == '-') 632 m_state = CommentStart; 633 else { 634 m_state = BogusComment; 635 continue; 636 } 637 // If we cared about the DOCTYPE we would test to enter those states here 638 } else { 639 m_state = BogusComment; 640 continue; 641 } 642 break; 643 } 644 case CommentStart: 645 if (cc == '-') 646 m_state = CommentStartDash; 647 else if (cc == '>') 648 m_state = Data; 649 else 650 m_state = Comment; 651 break; 652 case CommentStartDash: 653 if (cc == '-') 654 m_state = CommentEnd; 655 else if (cc == '>') 656 m_state = Data; 657 else 658 m_state = Comment; 659 break; 660 case Comment: 661 while (1) { 662 if (cc == '-') { 663 m_state = CommentEndDash; 664 break; 665 } 666 m_source.advance(); 667 if (m_source.isEmpty()) 668 return; 669 cc = *m_source; 670 } 671 break; 672 case CommentEndDash: 673 if (cc == '-') 674 m_state = CommentEnd; 675 else 676 m_state = Comment; 677 break; 678 case CommentEnd: 679 if (cc == '>') 680 m_state = Data; 681 else if (cc == '-') 682 ; 683 else 684 m_state = Comment; 685 break; 686 } 687 m_source.advance(); 688 } 689 } 690 691 void PreloadScanner::processAttribute() 692 { 693 AtomicString tag = AtomicString(m_tagName.data(), m_tagName.size()); 694 AtomicString attribute = AtomicString(m_attributeName.data(), m_attributeName.size()); 695 696 String value(m_attributeValue.data(), m_attributeValue.size()); 697 if (tag == scriptTag || tag == imgTag) { 698 if (attribute == srcAttr && m_urlToLoad.isEmpty()) 699 m_urlToLoad = deprecatedParseURL(value); 700 else if (attribute == charsetAttr) 701 m_charset = value; 702 } else if (tag == linkTag) { 703 if (attribute == hrefAttr && m_urlToLoad.isEmpty()) 704 m_urlToLoad = deprecatedParseURL(value); 705 else if (attribute == relAttr) { 706 bool styleSheet = false; 707 bool alternate = false; 708 bool icon = false; 709 bool dnsPrefetch = false; 710 #ifdef ANDROID_APPLE_TOUCH_ICON 711 bool touchIcon = false; 712 bool precomposedTouchIcon = false; 713 HTMLLinkElement::tokenizeRelAttribute(value, styleSheet, alternate, icon, touchIcon, precomposedTouchIcon, dnsPrefetch); 714 m_linkIsStyleSheet = styleSheet && !alternate && !icon && !touchIcon && !precomposedTouchIcon && !dnsPrefetch; 715 #else 716 HTMLLinkElement::tokenizeRelAttribute(value, styleSheet, alternate, icon, dnsPrefetch); 717 m_linkIsStyleSheet = styleSheet && !alternate && !icon && !dnsPrefetch; 718 #endif 719 } else if (attribute == charsetAttr) 720 m_charset = value; 721 } 722 } 723 724 inline void PreloadScanner::emitCharacter(UChar c) 725 { 726 if (m_contentModel == CDATA && m_lastStartTag == styleTag) 727 tokenizeCSS(c); 728 } 729 730 inline void PreloadScanner::tokenizeCSS(UChar c) 731 { 732 // We are just interested in @import rules, no need for real tokenization here 733 // Searching for other types of resources is probably low payoff 734 switch (m_cssState) { 735 case CSSInitial: 736 if (c == '@') 737 m_cssState = CSSRuleStart; 738 else if (c == '/') 739 m_cssState = CSSMaybeComment; 740 break; 741 case CSSMaybeComment: 742 if (c == '*') 743 m_cssState = CSSComment; 744 else 745 m_cssState = CSSInitial; 746 break; 747 case CSSComment: 748 if (c == '*') 749 m_cssState = CSSMaybeCommentEnd; 750 break; 751 case CSSMaybeCommentEnd: 752 if (c == '/') 753 m_cssState = CSSInitial; 754 else if (c == '*') 755 ; 756 else 757 m_cssState = CSSComment; 758 break; 759 case CSSRuleStart: 760 if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')) { 761 m_cssRule.clear(); 762 m_cssRuleValue.clear(); 763 m_cssRule.append(c); 764 m_cssState = CSSRule; 765 } else 766 m_cssState = CSSInitial; 767 break; 768 case CSSRule: 769 if (isWhitespace(c)) 770 m_cssState = CSSAfterRule; 771 else if (c == ';') 772 m_cssState = CSSInitial; 773 else 774 m_cssRule.append(c); 775 break; 776 case CSSAfterRule: 777 if (isWhitespace(c)) 778 ; 779 else if (c == ';') 780 m_cssState = CSSInitial; 781 else { 782 m_cssState = CSSRuleValue; 783 m_cssRuleValue.append(c); 784 } 785 break; 786 case CSSRuleValue: 787 if (isWhitespace(c)) 788 m_cssState = CSSAfterRuleValue; 789 else if (c == ';') { 790 emitCSSRule(); 791 m_cssState = CSSInitial; 792 } else 793 m_cssRuleValue.append(c); 794 break; 795 case CSSAfterRuleValue: 796 if (isWhitespace(c)) 797 ; 798 else if (c == ';') { 799 emitCSSRule(); 800 m_cssState = CSSInitial; 801 } else { 802 // FIXME media rules 803 m_cssState = CSSInitial; 804 } 805 break; 806 } 807 } 808 809 void PreloadScanner::emitTag() 810 { 811 if (m_closeTag) { 812 m_contentModel = PCDATA; 813 m_cssState = CSSInitial; 814 clearLastCharacters(); 815 return; 816 } 817 818 AtomicString tag(m_tagName.data(), m_tagName.size()); 819 m_lastStartTag = tag; 820 821 if (tag == textareaTag || tag == titleTag) 822 m_contentModel = RCDATA; 823 else if (tag == styleTag || tag == xmpTag || tag == scriptTag || tag == iframeTag || tag == noembedTag || tag == noframesTag) 824 m_contentModel = CDATA; 825 else if (tag == noscriptTag) 826 // we wouldn't be here if scripts were disabled 827 m_contentModel = CDATA; 828 else if (tag == plaintextTag) 829 m_contentModel = PLAINTEXT; 830 else 831 m_contentModel = PCDATA; 832 833 if (tag == bodyTag) 834 m_bodySeen = true; 835 836 if (m_urlToLoad.isEmpty()) { 837 m_linkIsStyleSheet = false; 838 return; 839 } 840 841 if (tag == scriptTag) 842 m_document->docLoader()->preload(CachedResource::Script, m_urlToLoad, m_charset, scanningBody()); 843 else if (tag == imgTag) 844 m_document->docLoader()->preload(CachedResource::ImageResource, m_urlToLoad, String(), scanningBody()); 845 else if (tag == linkTag && m_linkIsStyleSheet) 846 m_document->docLoader()->preload(CachedResource::CSSStyleSheet, m_urlToLoad, m_charset, scanningBody()); 847 848 m_urlToLoad = String(); 849 m_charset = String(); 850 m_linkIsStyleSheet = false; 851 } 852 853 void PreloadScanner::emitCSSRule() 854 { 855 String rule(m_cssRule.data(), m_cssRule.size()); 856 if (equalIgnoringCase(rule, "import") && !m_cssRuleValue.isEmpty()) { 857 String value(m_cssRuleValue.data(), m_cssRuleValue.size()); 858 String url = deprecatedParseURL(value); 859 if (!url.isEmpty()) 860 m_document->docLoader()->preload(CachedResource::CSSStyleSheet, url, String(), scanningBody()); 861 } 862 m_cssRule.clear(); 863 m_cssRuleValue.clear(); 864 } 865 866 } 867