1 /* 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ 4 * Copyright (C) 2010 Google Inc. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "config.h" 29 #include "core/html/parser/HTMLPreloadScanner.h" 30 31 #include "core/HTMLNames.h" 32 #include "core/InputTypeNames.h" 33 #include "core/css/MediaList.h" 34 #include "core/css/MediaQueryEvaluator.h" 35 #include "core/css/MediaValues.h" 36 #include "core/css/parser/SizesAttributeParser.h" 37 #include "core/html/LinkRelAttribute.h" 38 #include "core/html/parser/HTMLParserIdioms.h" 39 #include "core/html/parser/HTMLSrcsetParser.h" 40 #include "core/html/parser/HTMLTokenizer.h" 41 #include "platform/RuntimeEnabledFeatures.h" 42 #include "platform/TraceEvent.h" 43 #include "wtf/MainThread.h" 44 45 namespace WebCore { 46 47 using namespace HTMLNames; 48 49 static bool match(const StringImpl* impl, const QualifiedName& qName) 50 { 51 return impl == qName.localName().impl(); 52 } 53 54 static bool match(const AtomicString& name, const QualifiedName& qName) 55 { 56 ASSERT(isMainThread()); 57 return qName.localName() == name; 58 } 59 60 static bool match(const String& name, const QualifiedName& qName) 61 { 62 return threadSafeMatch(name, qName); 63 } 64 65 static const StringImpl* tagImplFor(const HTMLToken::DataVector& data) 66 { 67 AtomicString tagName(data); 68 const StringImpl* result = tagName.impl(); 69 if (result->isStatic()) 70 return result; 71 return 0; 72 } 73 74 static const StringImpl* tagImplFor(const String& tagName) 75 { 76 const StringImpl* result = tagName.impl(); 77 if (result->isStatic()) 78 return result; 79 return 0; 80 } 81 82 static String initiatorFor(const StringImpl* tagImpl) 83 { 84 ASSERT(tagImpl); 85 if (match(tagImpl, imgTag)) 86 return imgTag.localName(); 87 if (match(tagImpl, inputTag)) 88 return inputTag.localName(); 89 if (match(tagImpl, linkTag)) 90 return linkTag.localName(); 91 if (match(tagImpl, scriptTag)) 92 return scriptTag.localName(); 93 ASSERT_NOT_REACHED(); 94 return emptyString(); 95 } 96 97 static bool mediaAttributeMatches(const MediaValues& mediaValues, const String& attributeValue) 98 { 99 RefPtrWillBeRawPtr<MediaQuerySet> mediaQueries = MediaQuerySet::createOffMainThread(attributeValue); 100 MediaQueryEvaluator mediaQueryEvaluator("screen", mediaValues); 101 return mediaQueryEvaluator.eval(mediaQueries.get()); 102 } 103 104 class TokenPreloadScanner::StartTagScanner { 105 public: 106 StartTagScanner(const StringImpl* tagImpl, PassRefPtr<MediaValues> mediaValues) 107 : m_tagImpl(tagImpl) 108 , m_linkIsStyleSheet(false) 109 , m_matchedMediaAttribute(true) 110 , m_inputIsImage(false) 111 , m_sourceSize(0) 112 , m_sourceSizeSet(false) 113 , m_isCORSEnabled(false) 114 , m_allowCredentials(DoNotAllowStoredCredentials) 115 , m_mediaValues(mediaValues) 116 { 117 if (match(m_tagImpl, imgTag) 118 || match(m_tagImpl, sourceTag)) { 119 if (RuntimeEnabledFeatures::pictureSizesEnabled()) 120 m_sourceSize = SizesAttributeParser::findEffectiveSize(String(), m_mediaValues); 121 return; 122 } 123 if ( !match(m_tagImpl, inputTag) 124 && !match(m_tagImpl, linkTag) 125 && !match(m_tagImpl, scriptTag)) 126 m_tagImpl = 0; 127 } 128 129 enum URLReplacement { 130 AllowURLReplacement, 131 DisallowURLReplacement 132 }; 133 134 void processAttributes(const HTMLToken::AttributeList& attributes) 135 { 136 ASSERT(isMainThread()); 137 if (!m_tagImpl) 138 return; 139 for (HTMLToken::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) { 140 AtomicString attributeName(iter->name); 141 String attributeValue = StringImpl::create8BitIfPossible(iter->value); 142 processAttribute(attributeName, attributeValue); 143 } 144 } 145 146 void processAttributes(const Vector<CompactHTMLToken::Attribute>& attributes) 147 { 148 if (!m_tagImpl) 149 return; 150 for (Vector<CompactHTMLToken::Attribute>::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) 151 processAttribute(iter->name, iter->value); 152 } 153 154 void handlePictureSourceURL(String& sourceURL) 155 { 156 if (match(m_tagImpl, sourceTag) && m_matchedMediaAttribute && sourceURL.isEmpty()) 157 sourceURL = m_srcsetImageCandidate.toString(); 158 else if (match(m_tagImpl, imgTag) && !sourceURL.isEmpty()) 159 setUrlToLoad(sourceURL, AllowURLReplacement); 160 } 161 162 PassOwnPtr<PreloadRequest> createPreloadRequest(const KURL& predictedBaseURL, const SegmentedString& source) 163 { 164 if (!shouldPreload() || !m_matchedMediaAttribute) 165 return nullptr; 166 167 TRACE_EVENT_INSTANT1("net", "PreloadRequest", "url", m_urlToLoad.ascii()); 168 TextPosition position = TextPosition(source.currentLine(), source.currentColumn()); 169 OwnPtr<PreloadRequest> request = PreloadRequest::create(initiatorFor(m_tagImpl), position, m_urlToLoad, predictedBaseURL, resourceType()); 170 if (isCORSEnabled()) 171 request->setCrossOriginEnabled(allowStoredCredentials()); 172 request->setCharset(charset()); 173 return request.release(); 174 } 175 176 private: 177 template<typename NameType> 178 void processScriptAttribute(const NameType& attributeName, const String& attributeValue) 179 { 180 // FIXME - Don't set crossorigin multiple times. 181 if (match(attributeName, srcAttr)) 182 setUrlToLoad(attributeValue, DisallowURLReplacement); 183 else if (match(attributeName, crossoriginAttr)) 184 setCrossOriginAllowed(attributeValue); 185 } 186 187 template<typename NameType> 188 void processImgAttribute(const NameType& attributeName, const String& attributeValue) 189 { 190 if (match(attributeName, srcAttr) && m_imgSrcUrl.isNull()) { 191 m_imgSrcUrl = attributeValue; 192 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue, m_srcsetImageCandidate), AllowURLReplacement); 193 } else if (match(attributeName, crossoriginAttr)) { 194 setCrossOriginAllowed(attributeValue); 195 } else if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) { 196 m_srcsetAttributeValue = attributeValue; 197 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue); 198 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement); 199 } else if (RuntimeEnabledFeatures::pictureSizesEnabled() && match(attributeName, sizesAttr) && !m_sourceSizeSet) { 200 m_sourceSize = SizesAttributeParser::findEffectiveSize(attributeValue, m_mediaValues); 201 m_sourceSizeSet = true; 202 if (!m_srcsetImageCandidate.isEmpty()) { 203 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue); 204 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement); 205 } 206 } 207 } 208 209 template<typename NameType> 210 void processLinkAttribute(const NameType& attributeName, const String& attributeValue) 211 { 212 // FIXME - Don't set rel/media/crossorigin multiple times. 213 if (match(attributeName, hrefAttr)) 214 setUrlToLoad(attributeValue, DisallowURLReplacement); 215 else if (match(attributeName, relAttr)) 216 m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue); 217 else if (match(attributeName, mediaAttr)) 218 m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue); 219 else if (match(attributeName, crossoriginAttr)) 220 setCrossOriginAllowed(attributeValue); 221 } 222 223 template<typename NameType> 224 void processInputAttribute(const NameType& attributeName, const String& attributeValue) 225 { 226 // FIXME - Don't set type multiple times. 227 if (match(attributeName, srcAttr)) 228 setUrlToLoad(attributeValue, DisallowURLReplacement); 229 else if (match(attributeName, typeAttr)) 230 m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image); 231 } 232 233 template<typename NameType> 234 void processSourceAttribute(const NameType& attributeName, const String& attributeValue) 235 { 236 if (!RuntimeEnabledFeatures::pictureEnabled()) 237 return; 238 if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) { 239 m_srcsetAttributeValue = attributeValue; 240 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue); 241 } else if (match(attributeName, sizesAttr) && !m_sourceSizeSet) { 242 m_sourceSize = SizesAttributeParser::findEffectiveSize(attributeValue, m_mediaValues); 243 m_sourceSizeSet = true; 244 if (!m_srcsetImageCandidate.isEmpty()) { 245 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue); 246 } 247 } else if (match(attributeName, mediaAttr)) { 248 // FIXME - Don't match media multiple times. 249 m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue); 250 } 251 252 } 253 254 template<typename NameType> 255 void processAttribute(const NameType& attributeName, const String& attributeValue) 256 { 257 if (match(attributeName, charsetAttr)) 258 m_charset = attributeValue; 259 260 if (match(m_tagImpl, scriptTag)) 261 processScriptAttribute(attributeName, attributeValue); 262 else if (match(m_tagImpl, imgTag)) 263 processImgAttribute(attributeName, attributeValue); 264 else if (match(m_tagImpl, linkTag)) 265 processLinkAttribute(attributeName, attributeValue); 266 else if (match(m_tagImpl, inputTag)) 267 processInputAttribute(attributeName, attributeValue); 268 else if (match(m_tagImpl, sourceTag)) 269 processSourceAttribute(attributeName, attributeValue); 270 } 271 272 static bool relAttributeIsStyleSheet(const String& attributeValue) 273 { 274 LinkRelAttribute rel(attributeValue); 275 return rel.isStyleSheet() && !rel.isAlternate() && rel.iconType() == InvalidIcon && !rel.isDNSPrefetch(); 276 } 277 278 void setUrlToLoad(const String& value, URLReplacement replacement) 279 { 280 // We only respect the first src/href, per HTML5: 281 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state 282 if (replacement == DisallowURLReplacement && !m_urlToLoad.isEmpty()) 283 return; 284 String url = stripLeadingAndTrailingHTMLSpaces(value); 285 if (url.isEmpty()) 286 return; 287 m_urlToLoad = url; 288 } 289 290 const String& charset() const 291 { 292 // FIXME: Its not clear that this if is needed, the loader probably ignores charset for image requests anyway. 293 if (match(m_tagImpl, imgTag)) 294 return emptyString(); 295 return m_charset; 296 } 297 298 Resource::Type resourceType() const 299 { 300 if (match(m_tagImpl, scriptTag)) 301 return Resource::Script; 302 if (match(m_tagImpl, imgTag) || (match(m_tagImpl, inputTag) && m_inputIsImage)) 303 return Resource::Image; 304 if (match(m_tagImpl, linkTag) && m_linkIsStyleSheet) 305 return Resource::CSSStyleSheet; 306 ASSERT_NOT_REACHED(); 307 return Resource::Raw; 308 } 309 310 bool shouldPreload() const 311 { 312 if (m_urlToLoad.isEmpty()) 313 return false; 314 if (match(m_tagImpl, linkTag) && !m_linkIsStyleSheet) 315 return false; 316 if (match(m_tagImpl, inputTag) && !m_inputIsImage) 317 return false; 318 return true; 319 } 320 321 bool isCORSEnabled() const 322 { 323 return m_isCORSEnabled; 324 } 325 326 StoredCredentials allowStoredCredentials() const 327 { 328 return m_allowCredentials; 329 } 330 331 void setCrossOriginAllowed(const String& corsSetting) 332 { 333 m_isCORSEnabled = true; 334 if (!corsSetting.isNull() && equalIgnoringCase(stripLeadingAndTrailingHTMLSpaces(corsSetting), "use-credentials")) 335 m_allowCredentials = AllowStoredCredentials; 336 else 337 m_allowCredentials = DoNotAllowStoredCredentials; 338 } 339 340 const StringImpl* m_tagImpl; 341 String m_urlToLoad; 342 ImageCandidate m_srcsetImageCandidate; 343 String m_charset; 344 bool m_linkIsStyleSheet; 345 bool m_matchedMediaAttribute; 346 bool m_inputIsImage; 347 String m_imgSrcUrl; 348 String m_srcsetAttributeValue; 349 unsigned m_sourceSize; 350 bool m_sourceSizeSet; 351 bool m_isCORSEnabled; 352 StoredCredentials m_allowCredentials; 353 RefPtr<MediaValues> m_mediaValues; 354 }; 355 356 TokenPreloadScanner::TokenPreloadScanner(const KURL& documentURL, PassRefPtr<MediaValues> mediaValues) 357 : m_documentURL(documentURL) 358 , m_inStyle(false) 359 , m_inPicture(false) 360 , m_templateCount(0) 361 , m_mediaValues(mediaValues) 362 { 363 } 364 365 TokenPreloadScanner::~TokenPreloadScanner() 366 { 367 } 368 369 TokenPreloadScannerCheckpoint TokenPreloadScanner::createCheckpoint() 370 { 371 TokenPreloadScannerCheckpoint checkpoint = m_checkpoints.size(); 372 m_checkpoints.append(Checkpoint(m_predictedBaseElementURL, m_inStyle, m_templateCount)); 373 return checkpoint; 374 } 375 376 void TokenPreloadScanner::rewindTo(TokenPreloadScannerCheckpoint checkpointIndex) 377 { 378 ASSERT(checkpointIndex < m_checkpoints.size()); // If this ASSERT fires, checkpointIndex is invalid. 379 const Checkpoint& checkpoint = m_checkpoints[checkpointIndex]; 380 m_predictedBaseElementURL = checkpoint.predictedBaseElementURL; 381 m_inStyle = checkpoint.inStyle; 382 m_templateCount = checkpoint.templateCount; 383 m_cssScanner.reset(); 384 m_checkpoints.clear(); 385 } 386 387 void TokenPreloadScanner::scan(const HTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests) 388 { 389 scanCommon(token, source, requests); 390 } 391 392 void TokenPreloadScanner::scan(const CompactHTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests) 393 { 394 scanCommon(token, source, requests); 395 } 396 397 template<typename Token> 398 void TokenPreloadScanner::scanCommon(const Token& token, const SegmentedString& source, PreloadRequestStream& requests) 399 { 400 switch (token.type()) { 401 case HTMLToken::Character: { 402 if (!m_inStyle) 403 return; 404 m_cssScanner.scan(token.data(), source, requests); 405 return; 406 } 407 case HTMLToken::EndTag: { 408 const StringImpl* tagImpl = tagImplFor(token.data()); 409 if (match(tagImpl, templateTag)) { 410 if (m_templateCount) 411 --m_templateCount; 412 return; 413 } 414 if (match(tagImpl, styleTag)) { 415 if (m_inStyle) 416 m_cssScanner.reset(); 417 m_inStyle = false; 418 return; 419 } 420 if (match(tagImpl, pictureTag)) 421 m_inPicture = false; 422 return; 423 } 424 case HTMLToken::StartTag: { 425 if (m_templateCount) 426 return; 427 const StringImpl* tagImpl = tagImplFor(token.data()); 428 if (match(tagImpl, templateTag)) { 429 ++m_templateCount; 430 return; 431 } 432 if (match(tagImpl, styleTag)) { 433 m_inStyle = true; 434 return; 435 } 436 if (match(tagImpl, baseTag)) { 437 // The first <base> element is the one that wins. 438 if (!m_predictedBaseElementURL.isEmpty()) 439 return; 440 updatePredictedBaseURL(token); 441 return; 442 } 443 if (RuntimeEnabledFeatures::pictureEnabled() && (match(tagImpl, pictureTag))) { 444 m_inPicture = true; 445 m_pictureSourceURL = String(); 446 return; 447 } 448 449 StartTagScanner scanner(tagImpl, m_mediaValues); 450 scanner.processAttributes(token.attributes()); 451 if (m_inPicture) 452 scanner.handlePictureSourceURL(m_pictureSourceURL); 453 OwnPtr<PreloadRequest> request = scanner.createPreloadRequest(m_predictedBaseElementURL, source); 454 if (request) 455 requests.append(request.release()); 456 return; 457 } 458 default: { 459 return; 460 } 461 } 462 } 463 464 template<typename Token> 465 void TokenPreloadScanner::updatePredictedBaseURL(const Token& token) 466 { 467 ASSERT(m_predictedBaseElementURL.isEmpty()); 468 if (const typename Token::Attribute* hrefAttribute = token.getAttributeItem(hrefAttr)) 469 m_predictedBaseElementURL = KURL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(hrefAttribute->value)).copy(); 470 } 471 472 HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const KURL& documentURL, PassRefPtr<MediaValues> mediaValues) 473 : m_scanner(documentURL, mediaValues) 474 , m_tokenizer(HTMLTokenizer::create(options)) 475 { 476 } 477 478 HTMLPreloadScanner::~HTMLPreloadScanner() 479 { 480 } 481 482 void HTMLPreloadScanner::appendToEnd(const SegmentedString& source) 483 { 484 m_source.append(source); 485 } 486 487 void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, const KURL& startingBaseElementURL) 488 { 489 ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread. 490 491 TRACE_EVENT1("webkit", "HTMLPreloadScanner::scan", "source_length", m_source.length()); 492 493 // When we start scanning, our best prediction of the baseElementURL is the real one! 494 if (!startingBaseElementURL.isEmpty()) 495 m_scanner.setPredictedBaseElementURL(startingBaseElementURL); 496 497 PreloadRequestStream requests; 498 499 while (m_tokenizer->nextToken(m_source, m_token)) { 500 if (m_token.type() == HTMLToken::StartTag) 501 m_tokenizer->updateStateFor(attemptStaticStringCreation(m_token.name(), Likely8Bit)); 502 m_scanner.scan(m_token, m_source, requests); 503 m_token.clear(); 504 } 505 506 preloader->takeAndPreload(requests); 507 } 508 509 } 510