1 /* 2 * Copyright (C) 2008 Apple Inc. All Rights Reserved. 3 * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/ 4 * Copyright (C) 2010 Google Inc. All Rights Reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY 16 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 18 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR 19 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 20 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 22 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 23 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "config.h" 29 #include "core/html/parser/HTMLPreloadScanner.h" 30 31 #include "HTMLNames.h" 32 #include "core/html/InputTypeNames.h" 33 #include "core/html/LinkRelAttribute.h" 34 #include "core/html/parser/HTMLParserIdioms.h" 35 #include "core/html/parser/HTMLTokenizer.h" 36 #include "core/platform/chromium/TraceEvent.h" 37 #include "wtf/MainThread.h" 38 39 namespace WebCore { 40 41 using namespace HTMLNames; 42 43 static bool match(const StringImpl* impl, const QualifiedName& qName) 44 { 45 return impl == qName.localName().impl(); 46 } 47 48 static bool match(const HTMLIdentifier& name, const QualifiedName& qName) 49 { 50 return match(name.asStringImpl(), qName); 51 } 52 53 static bool match(const AtomicString& name, const QualifiedName& qName) 54 { 55 ASSERT(isMainThread()); 56 return qName.localName() == name; 57 } 58 59 static const StringImpl* tagImplFor(const HTMLToken::DataVector& data) 60 { 61 AtomicString tagName(data); 62 const StringImpl* result = tagName.impl(); 63 if (result->isStatic()) 64 return result; 65 return 0; 66 } 67 68 static const StringImpl* tagImplFor(const HTMLIdentifier& tagName) 69 { 70 const StringImpl* result = tagName.asStringImpl(); 71 if (result->isStatic()) 72 return result; 73 return 0; 74 } 75 76 static String initiatorFor(const StringImpl* tagImpl) 77 { 78 ASSERT(tagImpl); 79 if (match(tagImpl, imgTag)) 80 return imgTag.localName(); 81 if (match(tagImpl, inputTag)) 82 return inputTag.localName(); 83 if (match(tagImpl, linkTag)) 84 return linkTag.localName(); 85 if (match(tagImpl, scriptTag)) 86 return scriptTag.localName(); 87 ASSERT_NOT_REACHED(); 88 return emptyString(); 89 } 90 91 class TokenPreloadScanner::StartTagScanner { 92 public: 93 explicit StartTagScanner(const StringImpl* tagImpl) 94 : m_tagImpl(tagImpl) 95 , m_linkIsStyleSheet(false) 96 , m_inputIsImage(false) 97 { 98 if (!match(m_tagImpl, imgTag) 99 && !match(m_tagImpl, inputTag) 100 && !match(m_tagImpl, linkTag) 101 && !match(m_tagImpl, scriptTag)) 102 m_tagImpl = 0; 103 } 104 105 void processAttributes(const HTMLToken::AttributeList& attributes) 106 { 107 ASSERT(isMainThread()); 108 if (!m_tagImpl) 109 return; 110 for (HTMLToken::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) { 111 AtomicString attributeName(iter->name); 112 String attributeValue = StringImpl::create8BitIfPossible(iter->value); 113 processAttribute(attributeName, attributeValue); 114 } 115 } 116 117 void processAttributes(const Vector<CompactHTMLToken::Attribute>& attributes) 118 { 119 if (!m_tagImpl) 120 return; 121 for (Vector<CompactHTMLToken::Attribute>::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) 122 processAttribute(iter->name, iter->value); 123 } 124 125 PassOwnPtr<PreloadRequest> createPreloadRequest(const KURL& predictedBaseURL, const SegmentedString& source) 126 { 127 if (!shouldPreload()) 128 return nullptr; 129 130 TRACE_EVENT_INSTANT1("net", "PreloadRequest", "url", m_urlToLoad.ascii()); 131 TextPosition position = TextPosition(source.currentLine(), source.currentColumn()); 132 OwnPtr<PreloadRequest> request = PreloadRequest::create(initiatorFor(m_tagImpl), position, m_urlToLoad, predictedBaseURL, resourceType(), m_mediaAttribute); 133 request->setCrossOriginModeAllowsCookies(crossOriginModeAllowsCookies()); 134 request->setCharset(charset()); 135 return request.release(); 136 } 137 138 private: 139 template<typename NameType> 140 void processAttribute(const NameType& attributeName, const String& attributeValue) 141 { 142 if (match(attributeName, charsetAttr)) 143 m_charset = attributeValue; 144 145 if (match(m_tagImpl, scriptTag) || match(m_tagImpl, imgTag)) { 146 if (match(attributeName, srcAttr)) 147 setUrlToLoad(attributeValue); 148 else if (match(attributeName, crossoriginAttr) && !attributeValue.isNull()) 149 m_crossOriginMode = stripLeadingAndTrailingHTMLSpaces(attributeValue); 150 } else if (match(m_tagImpl, linkTag)) { 151 if (match(attributeName, hrefAttr)) 152 setUrlToLoad(attributeValue); 153 else if (match(attributeName, relAttr)) 154 m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue); 155 else if (match(attributeName, mediaAttr)) 156 m_mediaAttribute = attributeValue; 157 } else if (match(m_tagImpl, inputTag)) { 158 if (match(attributeName, srcAttr)) 159 setUrlToLoad(attributeValue); 160 else if (match(attributeName, typeAttr)) 161 m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image()); 162 } 163 } 164 165 static bool relAttributeIsStyleSheet(const String& attributeValue) 166 { 167 LinkRelAttribute rel(attributeValue); 168 return rel.isStyleSheet() && !rel.isAlternate() && rel.iconType() == InvalidIcon && !rel.isDNSPrefetch(); 169 } 170 171 void setUrlToLoad(const String& attributeValue) 172 { 173 // We only respect the first src/href, per HTML5: 174 // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state 175 if (!m_urlToLoad.isEmpty()) 176 return; 177 m_urlToLoad = stripLeadingAndTrailingHTMLSpaces(attributeValue); 178 } 179 180 const String& charset() const 181 { 182 // FIXME: Its not clear that this if is needed, the loader probably ignores charset for image requests anyway. 183 if (match(m_tagImpl, imgTag)) 184 return emptyString(); 185 return m_charset; 186 } 187 188 Resource::Type resourceType() const 189 { 190 if (match(m_tagImpl, scriptTag)) 191 return Resource::Script; 192 if (match(m_tagImpl, imgTag) || (match(m_tagImpl, inputTag) && m_inputIsImage)) 193 return Resource::Image; 194 if (match(m_tagImpl, linkTag) && m_linkIsStyleSheet) 195 return Resource::CSSStyleSheet; 196 ASSERT_NOT_REACHED(); 197 return Resource::Raw; 198 } 199 200 bool shouldPreload() 201 { 202 if (m_urlToLoad.isEmpty()) 203 return false; 204 if (match(m_tagImpl, linkTag) && !m_linkIsStyleSheet) 205 return false; 206 if (match(m_tagImpl, inputTag) && !m_inputIsImage) 207 return false; 208 return true; 209 } 210 211 bool crossOriginModeAllowsCookies() 212 { 213 return m_crossOriginMode.isNull() || equalIgnoringCase(m_crossOriginMode, "use-credentials"); 214 } 215 216 const StringImpl* m_tagImpl; 217 String m_urlToLoad; 218 String m_charset; 219 String m_crossOriginMode; 220 bool m_linkIsStyleSheet; 221 String m_mediaAttribute; 222 bool m_inputIsImage; 223 }; 224 225 TokenPreloadScanner::TokenPreloadScanner(const KURL& documentURL) 226 : m_documentURL(documentURL) 227 , m_inStyle(false) 228 , m_templateCount(0) 229 { 230 } 231 232 TokenPreloadScanner::~TokenPreloadScanner() 233 { 234 } 235 236 TokenPreloadScannerCheckpoint TokenPreloadScanner::createCheckpoint() 237 { 238 TokenPreloadScannerCheckpoint checkpoint = m_checkpoints.size(); 239 m_checkpoints.append(Checkpoint(m_predictedBaseElementURL, m_inStyle, m_templateCount)); 240 return checkpoint; 241 } 242 243 void TokenPreloadScanner::rewindTo(TokenPreloadScannerCheckpoint checkpointIndex) 244 { 245 ASSERT(checkpointIndex < m_checkpoints.size()); // If this ASSERT fires, checkpointIndex is invalid. 246 const Checkpoint& checkpoint = m_checkpoints[checkpointIndex]; 247 m_predictedBaseElementURL = checkpoint.predictedBaseElementURL; 248 m_inStyle = checkpoint.inStyle; 249 m_templateCount = checkpoint.templateCount; 250 m_cssScanner.reset(); 251 m_checkpoints.clear(); 252 } 253 254 void TokenPreloadScanner::scan(const HTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests) 255 { 256 scanCommon(token, source, requests); 257 } 258 259 void TokenPreloadScanner::scan(const CompactHTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests) 260 { 261 scanCommon(token, source, requests); 262 } 263 264 template<typename Token> 265 void TokenPreloadScanner::scanCommon(const Token& token, const SegmentedString& source, PreloadRequestStream& requests) 266 { 267 switch (token.type()) { 268 case HTMLToken::Character: { 269 if (!m_inStyle) 270 return; 271 m_cssScanner.scan(token.data(), source, requests); 272 return; 273 } 274 case HTMLToken::EndTag: { 275 const StringImpl* tagImpl = tagImplFor(token.data()); 276 if (match(tagImpl, templateTag)) { 277 if (m_templateCount) 278 --m_templateCount; 279 return; 280 } 281 if (match(tagImpl, styleTag)) { 282 if (m_inStyle) 283 m_cssScanner.reset(); 284 m_inStyle = false; 285 } 286 return; 287 } 288 case HTMLToken::StartTag: { 289 if (m_templateCount) 290 return; 291 const StringImpl* tagImpl = tagImplFor(token.data()); 292 if (match(tagImpl, templateTag)) { 293 ++m_templateCount; 294 return; 295 } 296 if (match(tagImpl, styleTag)) { 297 m_inStyle = true; 298 return; 299 } 300 if (match(tagImpl, baseTag)) { 301 // The first <base> element is the one that wins. 302 if (!m_predictedBaseElementURL.isEmpty()) 303 return; 304 updatePredictedBaseURL(token); 305 return; 306 } 307 308 StartTagScanner scanner(tagImpl); 309 scanner.processAttributes(token.attributes()); 310 OwnPtr<PreloadRequest> request = scanner.createPreloadRequest(m_predictedBaseElementURL, source); 311 if (request) 312 requests.append(request.release()); 313 return; 314 } 315 default: { 316 return; 317 } 318 } 319 } 320 321 template<typename Token> 322 void TokenPreloadScanner::updatePredictedBaseURL(const Token& token) 323 { 324 ASSERT(m_predictedBaseElementURL.isEmpty()); 325 if (const typename Token::Attribute* hrefAttribute = token.getAttributeItem(hrefAttr)) 326 m_predictedBaseElementURL = KURL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(hrefAttribute->value)).copy(); 327 } 328 329 HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const KURL& documentURL) 330 : m_scanner(documentURL) 331 , m_tokenizer(HTMLTokenizer::create(options)) 332 { 333 } 334 335 HTMLPreloadScanner::~HTMLPreloadScanner() 336 { 337 } 338 339 void HTMLPreloadScanner::appendToEnd(const SegmentedString& source) 340 { 341 m_source.append(source); 342 } 343 344 void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, const KURL& startingBaseElementURL) 345 { 346 ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread. 347 348 // When we start scanning, our best prediction of the baseElementURL is the real one! 349 if (!startingBaseElementURL.isEmpty()) 350 m_scanner.setPredictedBaseElementURL(startingBaseElementURL); 351 352 PreloadRequestStream requests; 353 354 while (m_tokenizer->nextToken(m_source, m_token)) { 355 if (m_token.type() == HTMLToken::StartTag) 356 m_tokenizer->updateStateFor(AtomicString(m_token.name())); 357 m_scanner.scan(m_token, m_source, requests); 358 m_token.clear(); 359 } 360 361 preloader->takeAndPreload(requests); 362 } 363 364 } 365