Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2008 Apple Inc. All Rights Reserved.
      3  * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
      4  * Copyright (C) 2010 Google Inc. All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     16  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     22  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     23  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "config.h"
     29 #include "core/html/parser/HTMLPreloadScanner.h"
     30 
     31 #include "HTMLNames.h"
     32 #include "core/html/InputTypeNames.h"
     33 #include "core/html/LinkRelAttribute.h"
     34 #include "core/html/parser/HTMLParserIdioms.h"
     35 #include "core/html/parser/HTMLTokenizer.h"
     36 #include "core/platform/chromium/TraceEvent.h"
     37 #include "wtf/MainThread.h"
     38 
     39 namespace WebCore {
     40 
     41 using namespace HTMLNames;
     42 
     43 static bool match(const StringImpl* impl, const QualifiedName& qName)
     44 {
     45     return impl == qName.localName().impl();
     46 }
     47 
     48 static bool match(const HTMLIdentifier& name, const QualifiedName& qName)
     49 {
     50     return match(name.asStringImpl(), qName);
     51 }
     52 
     53 static bool match(const AtomicString& name, const QualifiedName& qName)
     54 {
     55     ASSERT(isMainThread());
     56     return qName.localName() == name;
     57 }
     58 
     59 static const StringImpl* tagImplFor(const HTMLToken::DataVector& data)
     60 {
     61     AtomicString tagName(data);
     62     const StringImpl* result = tagName.impl();
     63     if (result->isStatic())
     64         return result;
     65     return 0;
     66 }
     67 
     68 static const StringImpl* tagImplFor(const HTMLIdentifier& tagName)
     69 {
     70     const StringImpl* result = tagName.asStringImpl();
     71     if (result->isStatic())
     72         return result;
     73     return 0;
     74 }
     75 
     76 static String initiatorFor(const StringImpl* tagImpl)
     77 {
     78     ASSERT(tagImpl);
     79     if (match(tagImpl, imgTag))
     80         return imgTag.localName();
     81     if (match(tagImpl, inputTag))
     82         return inputTag.localName();
     83     if (match(tagImpl, linkTag))
     84         return linkTag.localName();
     85     if (match(tagImpl, scriptTag))
     86         return scriptTag.localName();
     87     ASSERT_NOT_REACHED();
     88     return emptyString();
     89 }
     90 
     91 class TokenPreloadScanner::StartTagScanner {
     92 public:
     93     explicit StartTagScanner(const StringImpl* tagImpl)
     94         : m_tagImpl(tagImpl)
     95         , m_linkIsStyleSheet(false)
     96         , m_inputIsImage(false)
     97     {
     98         if (!match(m_tagImpl, imgTag)
     99             && !match(m_tagImpl, inputTag)
    100             && !match(m_tagImpl, linkTag)
    101             && !match(m_tagImpl, scriptTag))
    102             m_tagImpl = 0;
    103     }
    104 
    105     void processAttributes(const HTMLToken::AttributeList& attributes)
    106     {
    107         ASSERT(isMainThread());
    108         if (!m_tagImpl)
    109             return;
    110         for (HTMLToken::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) {
    111             AtomicString attributeName(iter->name);
    112             String attributeValue = StringImpl::create8BitIfPossible(iter->value);
    113             processAttribute(attributeName, attributeValue);
    114         }
    115     }
    116 
    117     void processAttributes(const Vector<CompactHTMLToken::Attribute>& attributes)
    118     {
    119         if (!m_tagImpl)
    120             return;
    121         for (Vector<CompactHTMLToken::Attribute>::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter)
    122             processAttribute(iter->name, iter->value);
    123     }
    124 
    125     PassOwnPtr<PreloadRequest> createPreloadRequest(const KURL& predictedBaseURL, const SegmentedString& source)
    126     {
    127         if (!shouldPreload())
    128             return nullptr;
    129 
    130         TRACE_EVENT_INSTANT1("net", "PreloadRequest", "url", m_urlToLoad.ascii());
    131         TextPosition position = TextPosition(source.currentLine(), source.currentColumn());
    132         OwnPtr<PreloadRequest> request = PreloadRequest::create(initiatorFor(m_tagImpl), position, m_urlToLoad, predictedBaseURL, resourceType(), m_mediaAttribute);
    133         request->setCrossOriginModeAllowsCookies(crossOriginModeAllowsCookies());
    134         request->setCharset(charset());
    135         return request.release();
    136     }
    137 
    138 private:
    139     template<typename NameType>
    140     void processAttribute(const NameType& attributeName, const String& attributeValue)
    141     {
    142         if (match(attributeName, charsetAttr))
    143             m_charset = attributeValue;
    144 
    145         if (match(m_tagImpl, scriptTag) || match(m_tagImpl, imgTag)) {
    146             if (match(attributeName, srcAttr))
    147                 setUrlToLoad(attributeValue);
    148             else if (match(attributeName, crossoriginAttr) && !attributeValue.isNull())
    149                 m_crossOriginMode = stripLeadingAndTrailingHTMLSpaces(attributeValue);
    150         } else if (match(m_tagImpl, linkTag)) {
    151             if (match(attributeName, hrefAttr))
    152                 setUrlToLoad(attributeValue);
    153             else if (match(attributeName, relAttr))
    154                 m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue);
    155             else if (match(attributeName, mediaAttr))
    156                 m_mediaAttribute = attributeValue;
    157         } else if (match(m_tagImpl, inputTag)) {
    158             if (match(attributeName, srcAttr))
    159                 setUrlToLoad(attributeValue);
    160             else if (match(attributeName, typeAttr))
    161                 m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image());
    162         }
    163     }
    164 
    165     static bool relAttributeIsStyleSheet(const String& attributeValue)
    166     {
    167         LinkRelAttribute rel(attributeValue);
    168         return rel.isStyleSheet() && !rel.isAlternate() && rel.iconType() == InvalidIcon && !rel.isDNSPrefetch();
    169     }
    170 
    171     void setUrlToLoad(const String& attributeValue)
    172     {
    173         // We only respect the first src/href, per HTML5:
    174         // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state
    175         if (!m_urlToLoad.isEmpty())
    176             return;
    177         m_urlToLoad = stripLeadingAndTrailingHTMLSpaces(attributeValue);
    178     }
    179 
    180     const String& charset() const
    181     {
    182         // FIXME: Its not clear that this if is needed, the loader probably ignores charset for image requests anyway.
    183         if (match(m_tagImpl, imgTag))
    184             return emptyString();
    185         return m_charset;
    186     }
    187 
    188     Resource::Type resourceType() const
    189     {
    190         if (match(m_tagImpl, scriptTag))
    191             return Resource::Script;
    192         if (match(m_tagImpl, imgTag) || (match(m_tagImpl, inputTag) && m_inputIsImage))
    193             return Resource::Image;
    194         if (match(m_tagImpl, linkTag) && m_linkIsStyleSheet)
    195             return Resource::CSSStyleSheet;
    196         ASSERT_NOT_REACHED();
    197         return Resource::Raw;
    198     }
    199 
    200     bool shouldPreload()
    201     {
    202         if (m_urlToLoad.isEmpty())
    203             return false;
    204         if (match(m_tagImpl, linkTag) && !m_linkIsStyleSheet)
    205             return false;
    206         if (match(m_tagImpl, inputTag) && !m_inputIsImage)
    207             return false;
    208         return true;
    209     }
    210 
    211     bool crossOriginModeAllowsCookies()
    212     {
    213         return m_crossOriginMode.isNull() || equalIgnoringCase(m_crossOriginMode, "use-credentials");
    214     }
    215 
    216     const StringImpl* m_tagImpl;
    217     String m_urlToLoad;
    218     String m_charset;
    219     String m_crossOriginMode;
    220     bool m_linkIsStyleSheet;
    221     String m_mediaAttribute;
    222     bool m_inputIsImage;
    223 };
    224 
    225 TokenPreloadScanner::TokenPreloadScanner(const KURL& documentURL)
    226     : m_documentURL(documentURL)
    227     , m_inStyle(false)
    228     , m_templateCount(0)
    229 {
    230 }
    231 
    232 TokenPreloadScanner::~TokenPreloadScanner()
    233 {
    234 }
    235 
    236 TokenPreloadScannerCheckpoint TokenPreloadScanner::createCheckpoint()
    237 {
    238     TokenPreloadScannerCheckpoint checkpoint = m_checkpoints.size();
    239     m_checkpoints.append(Checkpoint(m_predictedBaseElementURL, m_inStyle, m_templateCount));
    240     return checkpoint;
    241 }
    242 
    243 void TokenPreloadScanner::rewindTo(TokenPreloadScannerCheckpoint checkpointIndex)
    244 {
    245     ASSERT(checkpointIndex < m_checkpoints.size()); // If this ASSERT fires, checkpointIndex is invalid.
    246     const Checkpoint& checkpoint = m_checkpoints[checkpointIndex];
    247     m_predictedBaseElementURL = checkpoint.predictedBaseElementURL;
    248     m_inStyle = checkpoint.inStyle;
    249     m_templateCount = checkpoint.templateCount;
    250     m_cssScanner.reset();
    251     m_checkpoints.clear();
    252 }
    253 
    254 void TokenPreloadScanner::scan(const HTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
    255 {
    256     scanCommon(token, source, requests);
    257 }
    258 
    259 void TokenPreloadScanner::scan(const CompactHTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
    260 {
    261     scanCommon(token, source, requests);
    262 }
    263 
    264 template<typename Token>
    265 void TokenPreloadScanner::scanCommon(const Token& token, const SegmentedString& source, PreloadRequestStream& requests)
    266 {
    267     switch (token.type()) {
    268     case HTMLToken::Character: {
    269         if (!m_inStyle)
    270             return;
    271         m_cssScanner.scan(token.data(), source, requests);
    272         return;
    273     }
    274     case HTMLToken::EndTag: {
    275         const StringImpl* tagImpl = tagImplFor(token.data());
    276         if (match(tagImpl, templateTag)) {
    277             if (m_templateCount)
    278                 --m_templateCount;
    279             return;
    280         }
    281         if (match(tagImpl, styleTag)) {
    282             if (m_inStyle)
    283                 m_cssScanner.reset();
    284             m_inStyle = false;
    285         }
    286         return;
    287     }
    288     case HTMLToken::StartTag: {
    289         if (m_templateCount)
    290             return;
    291         const StringImpl* tagImpl = tagImplFor(token.data());
    292         if (match(tagImpl, templateTag)) {
    293             ++m_templateCount;
    294             return;
    295         }
    296         if (match(tagImpl, styleTag)) {
    297             m_inStyle = true;
    298             return;
    299         }
    300         if (match(tagImpl, baseTag)) {
    301             // The first <base> element is the one that wins.
    302             if (!m_predictedBaseElementURL.isEmpty())
    303                 return;
    304             updatePredictedBaseURL(token);
    305             return;
    306         }
    307 
    308         StartTagScanner scanner(tagImpl);
    309         scanner.processAttributes(token.attributes());
    310         OwnPtr<PreloadRequest> request = scanner.createPreloadRequest(m_predictedBaseElementURL, source);
    311         if (request)
    312             requests.append(request.release());
    313         return;
    314     }
    315     default: {
    316         return;
    317     }
    318     }
    319 }
    320 
    321 template<typename Token>
    322 void TokenPreloadScanner::updatePredictedBaseURL(const Token& token)
    323 {
    324     ASSERT(m_predictedBaseElementURL.isEmpty());
    325     if (const typename Token::Attribute* hrefAttribute = token.getAttributeItem(hrefAttr))
    326         m_predictedBaseElementURL = KURL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(hrefAttribute->value)).copy();
    327 }
    328 
    329 HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const KURL& documentURL)
    330     : m_scanner(documentURL)
    331     , m_tokenizer(HTMLTokenizer::create(options))
    332 {
    333 }
    334 
    335 HTMLPreloadScanner::~HTMLPreloadScanner()
    336 {
    337 }
    338 
    339 void HTMLPreloadScanner::appendToEnd(const SegmentedString& source)
    340 {
    341     m_source.append(source);
    342 }
    343 
    344 void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, const KURL& startingBaseElementURL)
    345 {
    346     ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread.
    347 
    348     // When we start scanning, our best prediction of the baseElementURL is the real one!
    349     if (!startingBaseElementURL.isEmpty())
    350         m_scanner.setPredictedBaseElementURL(startingBaseElementURL);
    351 
    352     PreloadRequestStream requests;
    353 
    354     while (m_tokenizer->nextToken(m_source, m_token)) {
    355         if (m_token.type() == HTMLToken::StartTag)
    356             m_tokenizer->updateStateFor(AtomicString(m_token.name()));
    357         m_scanner.scan(m_token, m_source, requests);
    358         m_token.clear();
    359     }
    360 
    361     preloader->takeAndPreload(requests);
    362 }
    363 
    364 }
    365