Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2008 Apple Inc. All Rights Reserved.
      3  * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
      4  * Copyright (C) 2010 Google Inc. All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     16  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     22  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     23  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "config.h"
     29 #include "core/html/parser/HTMLPreloadScanner.h"
     30 
     31 #include "core/HTMLNames.h"
     32 #include "core/InputTypeNames.h"
     33 #include "core/css/MediaList.h"
     34 #include "core/css/MediaQueryEvaluator.h"
     35 #include "core/css/MediaValues.h"
     36 #include "core/css/parser/SizesAttributeParser.h"
     37 #include "core/html/LinkRelAttribute.h"
     38 #include "core/html/parser/HTMLParserIdioms.h"
     39 #include "core/html/parser/HTMLSrcsetParser.h"
     40 #include "core/html/parser/HTMLTokenizer.h"
     41 #include "platform/RuntimeEnabledFeatures.h"
     42 #include "platform/TraceEvent.h"
     43 #include "wtf/MainThread.h"
     44 
     45 namespace WebCore {
     46 
     47 using namespace HTMLNames;
     48 
     49 static bool match(const StringImpl* impl, const QualifiedName& qName)
     50 {
     51     return impl == qName.localName().impl();
     52 }
     53 
     54 static bool match(const AtomicString& name, const QualifiedName& qName)
     55 {
     56     ASSERT(isMainThread());
     57     return qName.localName() == name;
     58 }
     59 
     60 static bool match(const String& name, const QualifiedName& qName)
     61 {
     62     return threadSafeMatch(name, qName);
     63 }
     64 
     65 static const StringImpl* tagImplFor(const HTMLToken::DataVector& data)
     66 {
     67     AtomicString tagName(data);
     68     const StringImpl* result = tagName.impl();
     69     if (result->isStatic())
     70         return result;
     71     return 0;
     72 }
     73 
     74 static const StringImpl* tagImplFor(const String& tagName)
     75 {
     76     const StringImpl* result = tagName.impl();
     77     if (result->isStatic())
     78         return result;
     79     return 0;
     80 }
     81 
     82 static String initiatorFor(const StringImpl* tagImpl)
     83 {
     84     ASSERT(tagImpl);
     85     if (match(tagImpl, imgTag))
     86         return imgTag.localName();
     87     if (match(tagImpl, inputTag))
     88         return inputTag.localName();
     89     if (match(tagImpl, linkTag))
     90         return linkTag.localName();
     91     if (match(tagImpl, scriptTag))
     92         return scriptTag.localName();
     93     ASSERT_NOT_REACHED();
     94     return emptyString();
     95 }
     96 
     97 static bool mediaAttributeMatches(const MediaValues& mediaValues, const String& attributeValue)
     98 {
     99     RefPtrWillBeRawPtr<MediaQuerySet> mediaQueries = MediaQuerySet::createOffMainThread(attributeValue);
    100     MediaQueryEvaluator mediaQueryEvaluator("screen", mediaValues);
    101     return mediaQueryEvaluator.eval(mediaQueries.get());
    102 }
    103 
    104 class TokenPreloadScanner::StartTagScanner {
    105 public:
    106     StartTagScanner(const StringImpl* tagImpl, PassRefPtr<MediaValues> mediaValues)
    107         : m_tagImpl(tagImpl)
    108         , m_linkIsStyleSheet(false)
    109         , m_matchedMediaAttribute(true)
    110         , m_inputIsImage(false)
    111         , m_sourceSize(0)
    112         , m_sourceSizeSet(false)
    113         , m_isCORSEnabled(false)
    114         , m_allowCredentials(DoNotAllowStoredCredentials)
    115         , m_mediaValues(mediaValues)
    116     {
    117         if (match(m_tagImpl, imgTag)
    118             || match(m_tagImpl, sourceTag)) {
    119             if (RuntimeEnabledFeatures::pictureSizesEnabled())
    120                 m_sourceSize = SizesAttributeParser::findEffectiveSize(String(), m_mediaValues);
    121             return;
    122         }
    123         if ( !match(m_tagImpl, inputTag)
    124             && !match(m_tagImpl, linkTag)
    125             && !match(m_tagImpl, scriptTag))
    126             m_tagImpl = 0;
    127     }
    128 
    129     enum URLReplacement {
    130         AllowURLReplacement,
    131         DisallowURLReplacement
    132     };
    133 
    134     void processAttributes(const HTMLToken::AttributeList& attributes)
    135     {
    136         ASSERT(isMainThread());
    137         if (!m_tagImpl)
    138             return;
    139         for (HTMLToken::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) {
    140             AtomicString attributeName(iter->name);
    141             String attributeValue = StringImpl::create8BitIfPossible(iter->value);
    142             processAttribute(attributeName, attributeValue);
    143         }
    144     }
    145 
    146     void processAttributes(const Vector<CompactHTMLToken::Attribute>& attributes)
    147     {
    148         if (!m_tagImpl)
    149             return;
    150         for (Vector<CompactHTMLToken::Attribute>::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter)
    151             processAttribute(iter->name, iter->value);
    152     }
    153 
    154     void handlePictureSourceURL(String& sourceURL)
    155     {
    156         if (match(m_tagImpl, sourceTag) && m_matchedMediaAttribute && sourceURL.isEmpty())
    157             sourceURL = m_srcsetImageCandidate.toString();
    158         else if (match(m_tagImpl, imgTag) && !sourceURL.isEmpty())
    159             setUrlToLoad(sourceURL, AllowURLReplacement);
    160     }
    161 
    162     PassOwnPtr<PreloadRequest> createPreloadRequest(const KURL& predictedBaseURL, const SegmentedString& source)
    163     {
    164         if (!shouldPreload() || !m_matchedMediaAttribute)
    165             return nullptr;
    166 
    167         TRACE_EVENT_INSTANT1("net", "PreloadRequest", "url", m_urlToLoad.ascii());
    168         TextPosition position = TextPosition(source.currentLine(), source.currentColumn());
    169         OwnPtr<PreloadRequest> request = PreloadRequest::create(initiatorFor(m_tagImpl), position, m_urlToLoad, predictedBaseURL, resourceType());
    170         if (isCORSEnabled())
    171             request->setCrossOriginEnabled(allowStoredCredentials());
    172         request->setCharset(charset());
    173         return request.release();
    174     }
    175 
    176 private:
    177     template<typename NameType>
    178     void processScriptAttribute(const NameType& attributeName, const String& attributeValue)
    179     {
    180         // FIXME - Don't set crossorigin multiple times.
    181         if (match(attributeName, srcAttr))
    182             setUrlToLoad(attributeValue, DisallowURLReplacement);
    183         else if (match(attributeName, crossoriginAttr))
    184             setCrossOriginAllowed(attributeValue);
    185     }
    186 
    187     template<typename NameType>
    188     void processImgAttribute(const NameType& attributeName, const String& attributeValue)
    189     {
    190         if (match(attributeName, srcAttr) && m_imgSrcUrl.isNull()) {
    191             m_imgSrcUrl = attributeValue;
    192             setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue, m_srcsetImageCandidate), AllowURLReplacement);
    193         } else if (match(attributeName, crossoriginAttr)) {
    194             setCrossOriginAllowed(attributeValue);
    195         } else if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) {
    196             m_srcsetAttributeValue = attributeValue;
    197             m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue);
    198             setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement);
    199         } else if (RuntimeEnabledFeatures::pictureSizesEnabled() && match(attributeName, sizesAttr) && !m_sourceSizeSet) {
    200             m_sourceSize = SizesAttributeParser::findEffectiveSize(attributeValue, m_mediaValues);
    201             m_sourceSizeSet = true;
    202             if (!m_srcsetImageCandidate.isEmpty()) {
    203                 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue);
    204                 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement);
    205             }
    206         }
    207     }
    208 
    209     template<typename NameType>
    210     void processLinkAttribute(const NameType& attributeName, const String& attributeValue)
    211     {
    212         // FIXME - Don't set rel/media/crossorigin multiple times.
    213         if (match(attributeName, hrefAttr))
    214             setUrlToLoad(attributeValue, DisallowURLReplacement);
    215         else if (match(attributeName, relAttr))
    216             m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue);
    217         else if (match(attributeName, mediaAttr))
    218             m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue);
    219         else if (match(attributeName, crossoriginAttr))
    220             setCrossOriginAllowed(attributeValue);
    221     }
    222 
    223     template<typename NameType>
    224     void processInputAttribute(const NameType& attributeName, const String& attributeValue)
    225     {
    226         // FIXME - Don't set type multiple times.
    227         if (match(attributeName, srcAttr))
    228             setUrlToLoad(attributeValue, DisallowURLReplacement);
    229         else if (match(attributeName, typeAttr))
    230             m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image);
    231     }
    232 
    233     template<typename NameType>
    234     void processSourceAttribute(const NameType& attributeName, const String& attributeValue)
    235     {
    236         if (!RuntimeEnabledFeatures::pictureEnabled())
    237             return;
    238         if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) {
    239             m_srcsetAttributeValue = attributeValue;
    240             m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue);
    241         } else if (match(attributeName, sizesAttr) && !m_sourceSizeSet) {
    242             m_sourceSize = SizesAttributeParser::findEffectiveSize(attributeValue, m_mediaValues);
    243             m_sourceSizeSet = true;
    244             if (!m_srcsetImageCandidate.isEmpty()) {
    245                 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue);
    246             }
    247         } else if (match(attributeName, mediaAttr)) {
    248             // FIXME - Don't match media multiple times.
    249             m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue);
    250         }
    251 
    252     }
    253 
    254     template<typename NameType>
    255     void processAttribute(const NameType& attributeName, const String& attributeValue)
    256     {
    257         if (match(attributeName, charsetAttr))
    258             m_charset = attributeValue;
    259 
    260         if (match(m_tagImpl, scriptTag))
    261             processScriptAttribute(attributeName, attributeValue);
    262         else if (match(m_tagImpl, imgTag))
    263             processImgAttribute(attributeName, attributeValue);
    264         else if (match(m_tagImpl, linkTag))
    265             processLinkAttribute(attributeName, attributeValue);
    266         else if (match(m_tagImpl, inputTag))
    267             processInputAttribute(attributeName, attributeValue);
    268         else if (match(m_tagImpl, sourceTag))
    269             processSourceAttribute(attributeName, attributeValue);
    270     }
    271 
    272     static bool relAttributeIsStyleSheet(const String& attributeValue)
    273     {
    274         LinkRelAttribute rel(attributeValue);
    275         return rel.isStyleSheet() && !rel.isAlternate() && rel.iconType() == InvalidIcon && !rel.isDNSPrefetch();
    276     }
    277 
    278     void setUrlToLoad(const String& value, URLReplacement replacement)
    279     {
    280         // We only respect the first src/href, per HTML5:
    281         // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state
    282         if (replacement == DisallowURLReplacement && !m_urlToLoad.isEmpty())
    283             return;
    284         String url = stripLeadingAndTrailingHTMLSpaces(value);
    285         if (url.isEmpty())
    286             return;
    287         m_urlToLoad = url;
    288     }
    289 
    290     const String& charset() const
    291     {
    292         // FIXME: Its not clear that this if is needed, the loader probably ignores charset for image requests anyway.
    293         if (match(m_tagImpl, imgTag))
    294             return emptyString();
    295         return m_charset;
    296     }
    297 
    298     Resource::Type resourceType() const
    299     {
    300         if (match(m_tagImpl, scriptTag))
    301             return Resource::Script;
    302         if (match(m_tagImpl, imgTag) || (match(m_tagImpl, inputTag) && m_inputIsImage))
    303             return Resource::Image;
    304         if (match(m_tagImpl, linkTag) && m_linkIsStyleSheet)
    305             return Resource::CSSStyleSheet;
    306         ASSERT_NOT_REACHED();
    307         return Resource::Raw;
    308     }
    309 
    310     bool shouldPreload() const
    311     {
    312         if (m_urlToLoad.isEmpty())
    313             return false;
    314         if (match(m_tagImpl, linkTag) && !m_linkIsStyleSheet)
    315             return false;
    316         if (match(m_tagImpl, inputTag) && !m_inputIsImage)
    317             return false;
    318         return true;
    319     }
    320 
    321     bool isCORSEnabled() const
    322     {
    323         return m_isCORSEnabled;
    324     }
    325 
    326     StoredCredentials allowStoredCredentials() const
    327     {
    328         return m_allowCredentials;
    329     }
    330 
    331     void setCrossOriginAllowed(const String& corsSetting)
    332     {
    333         m_isCORSEnabled = true;
    334         if (!corsSetting.isNull() && equalIgnoringCase(stripLeadingAndTrailingHTMLSpaces(corsSetting), "use-credentials"))
    335             m_allowCredentials = AllowStoredCredentials;
    336         else
    337             m_allowCredentials = DoNotAllowStoredCredentials;
    338     }
    339 
    340     const StringImpl* m_tagImpl;
    341     String m_urlToLoad;
    342     ImageCandidate m_srcsetImageCandidate;
    343     String m_charset;
    344     bool m_linkIsStyleSheet;
    345     bool m_matchedMediaAttribute;
    346     bool m_inputIsImage;
    347     String m_imgSrcUrl;
    348     String m_srcsetAttributeValue;
    349     unsigned m_sourceSize;
    350     bool m_sourceSizeSet;
    351     bool m_isCORSEnabled;
    352     StoredCredentials m_allowCredentials;
    353     RefPtr<MediaValues> m_mediaValues;
    354 };
    355 
    356 TokenPreloadScanner::TokenPreloadScanner(const KURL& documentURL, PassRefPtr<MediaValues> mediaValues)
    357     : m_documentURL(documentURL)
    358     , m_inStyle(false)
    359     , m_inPicture(false)
    360     , m_templateCount(0)
    361     , m_mediaValues(mediaValues)
    362 {
    363 }
    364 
    365 TokenPreloadScanner::~TokenPreloadScanner()
    366 {
    367 }
    368 
    369 TokenPreloadScannerCheckpoint TokenPreloadScanner::createCheckpoint()
    370 {
    371     TokenPreloadScannerCheckpoint checkpoint = m_checkpoints.size();
    372     m_checkpoints.append(Checkpoint(m_predictedBaseElementURL, m_inStyle, m_templateCount));
    373     return checkpoint;
    374 }
    375 
    376 void TokenPreloadScanner::rewindTo(TokenPreloadScannerCheckpoint checkpointIndex)
    377 {
    378     ASSERT(checkpointIndex < m_checkpoints.size()); // If this ASSERT fires, checkpointIndex is invalid.
    379     const Checkpoint& checkpoint = m_checkpoints[checkpointIndex];
    380     m_predictedBaseElementURL = checkpoint.predictedBaseElementURL;
    381     m_inStyle = checkpoint.inStyle;
    382     m_templateCount = checkpoint.templateCount;
    383     m_cssScanner.reset();
    384     m_checkpoints.clear();
    385 }
    386 
    387 void TokenPreloadScanner::scan(const HTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
    388 {
    389     scanCommon(token, source, requests);
    390 }
    391 
    392 void TokenPreloadScanner::scan(const CompactHTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
    393 {
    394     scanCommon(token, source, requests);
    395 }
    396 
    397 template<typename Token>
    398 void TokenPreloadScanner::scanCommon(const Token& token, const SegmentedString& source, PreloadRequestStream& requests)
    399 {
    400     switch (token.type()) {
    401     case HTMLToken::Character: {
    402         if (!m_inStyle)
    403             return;
    404         m_cssScanner.scan(token.data(), source, requests);
    405         return;
    406     }
    407     case HTMLToken::EndTag: {
    408         const StringImpl* tagImpl = tagImplFor(token.data());
    409         if (match(tagImpl, templateTag)) {
    410             if (m_templateCount)
    411                 --m_templateCount;
    412             return;
    413         }
    414         if (match(tagImpl, styleTag)) {
    415             if (m_inStyle)
    416                 m_cssScanner.reset();
    417             m_inStyle = false;
    418             return;
    419         }
    420         if (match(tagImpl, pictureTag))
    421             m_inPicture = false;
    422         return;
    423     }
    424     case HTMLToken::StartTag: {
    425         if (m_templateCount)
    426             return;
    427         const StringImpl* tagImpl = tagImplFor(token.data());
    428         if (match(tagImpl, templateTag)) {
    429             ++m_templateCount;
    430             return;
    431         }
    432         if (match(tagImpl, styleTag)) {
    433             m_inStyle = true;
    434             return;
    435         }
    436         if (match(tagImpl, baseTag)) {
    437             // The first <base> element is the one that wins.
    438             if (!m_predictedBaseElementURL.isEmpty())
    439                 return;
    440             updatePredictedBaseURL(token);
    441             return;
    442         }
    443         if (RuntimeEnabledFeatures::pictureEnabled() && (match(tagImpl, pictureTag))) {
    444             m_inPicture = true;
    445             m_pictureSourceURL = String();
    446             return;
    447         }
    448 
    449         StartTagScanner scanner(tagImpl, m_mediaValues);
    450         scanner.processAttributes(token.attributes());
    451         if (m_inPicture)
    452             scanner.handlePictureSourceURL(m_pictureSourceURL);
    453         OwnPtr<PreloadRequest> request = scanner.createPreloadRequest(m_predictedBaseElementURL, source);
    454         if (request)
    455             requests.append(request.release());
    456         return;
    457     }
    458     default: {
    459         return;
    460     }
    461     }
    462 }
    463 
    464 template<typename Token>
    465 void TokenPreloadScanner::updatePredictedBaseURL(const Token& token)
    466 {
    467     ASSERT(m_predictedBaseElementURL.isEmpty());
    468     if (const typename Token::Attribute* hrefAttribute = token.getAttributeItem(hrefAttr))
    469         m_predictedBaseElementURL = KURL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(hrefAttribute->value)).copy();
    470 }
    471 
    472 HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const KURL& documentURL, PassRefPtr<MediaValues> mediaValues)
    473     : m_scanner(documentURL, mediaValues)
    474     , m_tokenizer(HTMLTokenizer::create(options))
    475 {
    476 }
    477 
    478 HTMLPreloadScanner::~HTMLPreloadScanner()
    479 {
    480 }
    481 
    482 void HTMLPreloadScanner::appendToEnd(const SegmentedString& source)
    483 {
    484     m_source.append(source);
    485 }
    486 
    487 void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, const KURL& startingBaseElementURL)
    488 {
    489     ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread.
    490 
    491     TRACE_EVENT1("webkit", "HTMLPreloadScanner::scan", "source_length", m_source.length());
    492 
    493     // When we start scanning, our best prediction of the baseElementURL is the real one!
    494     if (!startingBaseElementURL.isEmpty())
    495         m_scanner.setPredictedBaseElementURL(startingBaseElementURL);
    496 
    497     PreloadRequestStream requests;
    498 
    499     while (m_tokenizer->nextToken(m_source, m_token)) {
    500         if (m_token.type() == HTMLToken::StartTag)
    501             m_tokenizer->updateStateFor(attemptStaticStringCreation(m_token.name(), Likely8Bit));
    502         m_scanner.scan(m_token, m_source, requests);
    503         m_token.clear();
    504     }
    505 
    506     preloader->takeAndPreload(requests);
    507 }
    508 
    509 }
    510