Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2008 Apple Inc. All Rights Reserved.
      3  * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
      4  * Copyright (C) 2010 Google Inc. All Rights Reserved.
      5  *
      6  * Redistribution and use in source and binary forms, with or without
      7  * modification, are permitted provided that the following conditions
      8  * are met:
      9  * 1. Redistributions of source code must retain the above copyright
     10  *    notice, this list of conditions and the following disclaimer.
     11  * 2. Redistributions in binary form must reproduce the above copyright
     12  *    notice, this list of conditions and the following disclaimer in the
     13  *    documentation and/or other materials provided with the distribution.
     14  *
     15  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
     16  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     18  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
     19  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     20  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     22  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
     23  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     25  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "config.h"
     29 #include "core/html/parser/HTMLPreloadScanner.h"
     30 
     31 #include "core/HTMLNames.h"
     32 #include "core/InputTypeNames.h"
     33 #include "core/css/MediaList.h"
     34 #include "core/css/MediaQueryEvaluator.h"
     35 #include "core/css/MediaValues.h"
     36 #include "core/css/parser/SizesAttributeParser.h"
     37 #include "core/html/LinkRelAttribute.h"
     38 #include "core/html/parser/HTMLParserIdioms.h"
     39 #include "core/html/parser/HTMLSrcsetParser.h"
     40 #include "core/html/parser/HTMLTokenizer.h"
     41 #include "platform/RuntimeEnabledFeatures.h"
     42 #include "platform/TraceEvent.h"
     43 #include "wtf/MainThread.h"
     44 
     45 namespace blink {
     46 
     47 using namespace HTMLNames;
     48 
     49 static bool match(const StringImpl* impl, const QualifiedName& qName)
     50 {
     51     return impl == qName.localName().impl();
     52 }
     53 
     54 static bool match(const AtomicString& name, const QualifiedName& qName)
     55 {
     56     ASSERT(isMainThread());
     57     return qName.localName() == name;
     58 }
     59 
     60 static bool match(const String& name, const QualifiedName& qName)
     61 {
     62     return threadSafeMatch(name, qName);
     63 }
     64 
     65 static const StringImpl* tagImplFor(const HTMLToken::DataVector& data)
     66 {
     67     AtomicString tagName(data);
     68     const StringImpl* result = tagName.impl();
     69     if (result->isStatic())
     70         return result;
     71     return 0;
     72 }
     73 
     74 static const StringImpl* tagImplFor(const String& tagName)
     75 {
     76     const StringImpl* result = tagName.impl();
     77     if (result->isStatic())
     78         return result;
     79     return 0;
     80 }
     81 
     82 static String initiatorFor(const StringImpl* tagImpl)
     83 {
     84     ASSERT(tagImpl);
     85     if (match(tagImpl, imgTag))
     86         return imgTag.localName();
     87     if (match(tagImpl, inputTag))
     88         return inputTag.localName();
     89     if (match(tagImpl, linkTag))
     90         return linkTag.localName();
     91     if (match(tagImpl, scriptTag))
     92         return scriptTag.localName();
     93     ASSERT_NOT_REACHED();
     94     return emptyString();
     95 }
     96 
     97 static bool mediaAttributeMatches(const MediaValues& mediaValues, const String& attributeValue)
     98 {
     99     RefPtrWillBeRawPtr<MediaQuerySet> mediaQueries = MediaQuerySet::createOffMainThread(attributeValue);
    100     MediaQueryEvaluator mediaQueryEvaluator(mediaValues);
    101     return mediaQueryEvaluator.eval(mediaQueries.get());
    102 }
    103 
    104 class TokenPreloadScanner::StartTagScanner {
    105 public:
    106     StartTagScanner(const StringImpl* tagImpl, PassRefPtr<MediaValues> mediaValues)
    107         : m_tagImpl(tagImpl)
    108         , m_linkIsStyleSheet(false)
    109         , m_matchedMediaAttribute(true)
    110         , m_inputIsImage(false)
    111         , m_sourceSize(0)
    112         , m_sourceSizeSet(false)
    113         , m_isCORSEnabled(false)
    114         , m_defer(FetchRequest::NoDefer)
    115         , m_allowCredentials(DoNotAllowStoredCredentials)
    116         , m_mediaValues(mediaValues)
    117     {
    118         if (match(m_tagImpl, imgTag)
    119             || match(m_tagImpl, sourceTag)) {
    120             if (RuntimeEnabledFeatures::pictureSizesEnabled())
    121                 m_sourceSize = SizesAttributeParser(m_mediaValues, String()).length();
    122             return;
    123         }
    124         if ( !match(m_tagImpl, inputTag)
    125             && !match(m_tagImpl, linkTag)
    126             && !match(m_tagImpl, scriptTag))
    127             m_tagImpl = 0;
    128     }
    129 
    130     enum URLReplacement {
    131         AllowURLReplacement,
    132         DisallowURLReplacement
    133     };
    134 
    135     void processAttributes(const HTMLToken::AttributeList& attributes)
    136     {
    137         ASSERT(isMainThread());
    138         if (!m_tagImpl)
    139             return;
    140         for (HTMLToken::AttributeList::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter) {
    141             AtomicString attributeName(iter->name);
    142             String attributeValue = StringImpl::create8BitIfPossible(iter->value);
    143             processAttribute(attributeName, attributeValue);
    144         }
    145     }
    146 
    147     void processAttributes(const Vector<CompactHTMLToken::Attribute>& attributes)
    148     {
    149         if (!m_tagImpl)
    150             return;
    151         for (Vector<CompactHTMLToken::Attribute>::const_iterator iter = attributes.begin(); iter != attributes.end(); ++iter)
    152             processAttribute(iter->name, iter->value);
    153     }
    154 
    155     void handlePictureSourceURL(String& sourceURL)
    156     {
    157         if (match(m_tagImpl, sourceTag) && m_matchedMediaAttribute && sourceURL.isEmpty())
    158             sourceURL = m_srcsetImageCandidate.toString();
    159         else if (match(m_tagImpl, imgTag) && !sourceURL.isEmpty())
    160             setUrlToLoad(sourceURL, AllowURLReplacement);
    161     }
    162 
    163     PassOwnPtr<PreloadRequest> createPreloadRequest(const KURL& predictedBaseURL, const SegmentedString& source)
    164     {
    165         if (!shouldPreload() || !m_matchedMediaAttribute)
    166             return nullptr;
    167 
    168         TRACE_EVENT_INSTANT1("net", "PreloadRequest", "url", m_urlToLoad.ascii());
    169         TextPosition position = TextPosition(source.currentLine(), source.currentColumn());
    170         OwnPtr<PreloadRequest> request = PreloadRequest::create(initiatorFor(m_tagImpl), position, m_urlToLoad, predictedBaseURL, resourceType());
    171         if (isCORSEnabled())
    172             request->setCrossOriginEnabled(allowStoredCredentials());
    173         request->setCharset(charset());
    174         request->setDefer(m_defer);
    175         return request.release();
    176     }
    177 
    178 private:
    179     template<typename NameType>
    180     void processScriptAttribute(const NameType& attributeName, const String& attributeValue)
    181     {
    182         // FIXME - Don't set crossorigin multiple times.
    183         if (match(attributeName, srcAttr))
    184             setUrlToLoad(attributeValue, DisallowURLReplacement);
    185         else if (match(attributeName, crossoriginAttr))
    186             setCrossOriginAllowed(attributeValue);
    187         else if (match(attributeName, asyncAttr))
    188             setDefer(FetchRequest::LazyLoad);
    189         else if (match(attributeName, deferAttr))
    190             setDefer(FetchRequest::LazyLoad);
    191     }
    192 
    193     template<typename NameType>
    194     void processImgAttribute(const NameType& attributeName, const String& attributeValue)
    195     {
    196         if (match(attributeName, srcAttr) && m_imgSrcUrl.isNull()) {
    197             m_imgSrcUrl = attributeValue;
    198             setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue, m_srcsetImageCandidate), AllowURLReplacement);
    199         } else if (match(attributeName, crossoriginAttr)) {
    200             setCrossOriginAllowed(attributeValue);
    201         } else if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) {
    202             m_srcsetAttributeValue = attributeValue;
    203             m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue);
    204             setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement);
    205         } else if (RuntimeEnabledFeatures::pictureSizesEnabled() && match(attributeName, sizesAttr) && !m_sourceSizeSet) {
    206             m_sourceSize = SizesAttributeParser(m_mediaValues, attributeValue).length();
    207             m_sourceSizeSet = true;
    208             if (!m_srcsetImageCandidate.isEmpty()) {
    209                 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue);
    210                 setUrlToLoad(bestFitSourceForImageAttributes(m_mediaValues->devicePixelRatio(), m_sourceSize, m_imgSrcUrl, m_srcsetImageCandidate), AllowURLReplacement);
    211             }
    212         }
    213     }
    214 
    215     template<typename NameType>
    216     void processLinkAttribute(const NameType& attributeName, const String& attributeValue)
    217     {
    218         // FIXME - Don't set rel/media/crossorigin multiple times.
    219         if (match(attributeName, hrefAttr))
    220             setUrlToLoad(attributeValue, DisallowURLReplacement);
    221         else if (match(attributeName, relAttr))
    222             m_linkIsStyleSheet = relAttributeIsStyleSheet(attributeValue);
    223         else if (match(attributeName, mediaAttr))
    224             m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue);
    225         else if (match(attributeName, crossoriginAttr))
    226             setCrossOriginAllowed(attributeValue);
    227     }
    228 
    229     template<typename NameType>
    230     void processInputAttribute(const NameType& attributeName, const String& attributeValue)
    231     {
    232         // FIXME - Don't set type multiple times.
    233         if (match(attributeName, srcAttr))
    234             setUrlToLoad(attributeValue, DisallowURLReplacement);
    235         else if (match(attributeName, typeAttr))
    236             m_inputIsImage = equalIgnoringCase(attributeValue, InputTypeNames::image);
    237     }
    238 
    239     template<typename NameType>
    240     void processSourceAttribute(const NameType& attributeName, const String& attributeValue)
    241     {
    242         if (!RuntimeEnabledFeatures::pictureEnabled())
    243             return;
    244         if (match(attributeName, srcsetAttr) && m_srcsetImageCandidate.isEmpty()) {
    245             m_srcsetAttributeValue = attributeValue;
    246             m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, attributeValue);
    247         } else if (match(attributeName, sizesAttr) && !m_sourceSizeSet) {
    248             m_sourceSize = SizesAttributeParser(m_mediaValues, attributeValue).length();
    249             m_sourceSizeSet = true;
    250             if (!m_srcsetImageCandidate.isEmpty()) {
    251                 m_srcsetImageCandidate = bestFitSourceForSrcsetAttribute(m_mediaValues->devicePixelRatio(), m_sourceSize, m_srcsetAttributeValue);
    252             }
    253         } else if (match(attributeName, mediaAttr)) {
    254             // FIXME - Don't match media multiple times.
    255             m_matchedMediaAttribute = mediaAttributeMatches(*m_mediaValues, attributeValue);
    256         }
    257 
    258     }
    259 
    260     template<typename NameType>
    261     void processAttribute(const NameType& attributeName, const String& attributeValue)
    262     {
    263         if (match(attributeName, charsetAttr))
    264             m_charset = attributeValue;
    265 
    266         if (match(m_tagImpl, scriptTag))
    267             processScriptAttribute(attributeName, attributeValue);
    268         else if (match(m_tagImpl, imgTag))
    269             processImgAttribute(attributeName, attributeValue);
    270         else if (match(m_tagImpl, linkTag))
    271             processLinkAttribute(attributeName, attributeValue);
    272         else if (match(m_tagImpl, inputTag))
    273             processInputAttribute(attributeName, attributeValue);
    274         else if (match(m_tagImpl, sourceTag))
    275             processSourceAttribute(attributeName, attributeValue);
    276     }
    277 
    278     static bool relAttributeIsStyleSheet(const String& attributeValue)
    279     {
    280         LinkRelAttribute rel(attributeValue);
    281         return rel.isStyleSheet() && !rel.isAlternate() && rel.iconType() == InvalidIcon && !rel.isDNSPrefetch();
    282     }
    283 
    284     void setUrlToLoad(const String& value, URLReplacement replacement)
    285     {
    286         // We only respect the first src/href, per HTML5:
    287         // http://www.whatwg.org/specs/web-apps/current-work/multipage/tokenization.html#attribute-name-state
    288         if (replacement == DisallowURLReplacement && !m_urlToLoad.isEmpty())
    289             return;
    290         String url = stripLeadingAndTrailingHTMLSpaces(value);
    291         if (url.isEmpty())
    292             return;
    293         m_urlToLoad = url;
    294     }
    295 
    296     const String& charset() const
    297     {
    298         // FIXME: Its not clear that this if is needed, the loader probably ignores charset for image requests anyway.
    299         if (match(m_tagImpl, imgTag))
    300             return emptyString();
    301         return m_charset;
    302     }
    303 
    304     Resource::Type resourceType() const
    305     {
    306         if (match(m_tagImpl, scriptTag))
    307             return Resource::Script;
    308         if (match(m_tagImpl, imgTag) || (match(m_tagImpl, inputTag) && m_inputIsImage))
    309             return Resource::Image;
    310         if (match(m_tagImpl, linkTag) && m_linkIsStyleSheet)
    311             return Resource::CSSStyleSheet;
    312         ASSERT_NOT_REACHED();
    313         return Resource::Raw;
    314     }
    315 
    316     bool shouldPreload() const
    317     {
    318         if (m_urlToLoad.isEmpty())
    319             return false;
    320         if (match(m_tagImpl, linkTag) && !m_linkIsStyleSheet)
    321             return false;
    322         if (match(m_tagImpl, inputTag) && !m_inputIsImage)
    323             return false;
    324         return true;
    325     }
    326 
    327     bool isCORSEnabled() const
    328     {
    329         return m_isCORSEnabled;
    330     }
    331 
    332     StoredCredentials allowStoredCredentials() const
    333     {
    334         return m_allowCredentials;
    335     }
    336 
    337     void setCrossOriginAllowed(const String& corsSetting)
    338     {
    339         m_isCORSEnabled = true;
    340         if (!corsSetting.isNull() && equalIgnoringCase(stripLeadingAndTrailingHTMLSpaces(corsSetting), "use-credentials"))
    341             m_allowCredentials = AllowStoredCredentials;
    342         else
    343             m_allowCredentials = DoNotAllowStoredCredentials;
    344     }
    345 
    346     void setDefer(FetchRequest::DeferOption defer)
    347     {
    348         m_defer = defer;
    349     }
    350 
    351     bool defer() const
    352     {
    353         return m_defer;
    354     }
    355 
    356     const StringImpl* m_tagImpl;
    357     String m_urlToLoad;
    358     ImageCandidate m_srcsetImageCandidate;
    359     String m_charset;
    360     bool m_linkIsStyleSheet;
    361     bool m_matchedMediaAttribute;
    362     bool m_inputIsImage;
    363     String m_imgSrcUrl;
    364     String m_srcsetAttributeValue;
    365     unsigned m_sourceSize;
    366     bool m_sourceSizeSet;
    367     bool m_isCORSEnabled;
    368     FetchRequest::DeferOption m_defer;
    369     StoredCredentials m_allowCredentials;
    370     RefPtr<MediaValues> m_mediaValues;
    371 };
    372 
    373 TokenPreloadScanner::TokenPreloadScanner(const KURL& documentURL, PassRefPtr<MediaValues> mediaValues)
    374     : m_documentURL(documentURL)
    375     , m_inStyle(false)
    376     , m_inPicture(false)
    377     , m_templateCount(0)
    378     , m_mediaValues(mediaValues)
    379 {
    380 }
    381 
    382 TokenPreloadScanner::~TokenPreloadScanner()
    383 {
    384 }
    385 
    386 TokenPreloadScannerCheckpoint TokenPreloadScanner::createCheckpoint()
    387 {
    388     TokenPreloadScannerCheckpoint checkpoint = m_checkpoints.size();
    389     m_checkpoints.append(Checkpoint(m_predictedBaseElementURL, m_inStyle, m_templateCount));
    390     return checkpoint;
    391 }
    392 
    393 void TokenPreloadScanner::rewindTo(TokenPreloadScannerCheckpoint checkpointIndex)
    394 {
    395     ASSERT(checkpointIndex < m_checkpoints.size()); // If this ASSERT fires, checkpointIndex is invalid.
    396     const Checkpoint& checkpoint = m_checkpoints[checkpointIndex];
    397     m_predictedBaseElementURL = checkpoint.predictedBaseElementURL;
    398     m_inStyle = checkpoint.inStyle;
    399     m_templateCount = checkpoint.templateCount;
    400     m_cssScanner.reset();
    401     m_checkpoints.clear();
    402 }
    403 
    404 void TokenPreloadScanner::scan(const HTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
    405 {
    406     scanCommon(token, source, requests);
    407 }
    408 
    409 void TokenPreloadScanner::scan(const CompactHTMLToken& token, const SegmentedString& source, PreloadRequestStream& requests)
    410 {
    411     scanCommon(token, source, requests);
    412 }
    413 
    414 template<typename Token>
    415 void TokenPreloadScanner::scanCommon(const Token& token, const SegmentedString& source, PreloadRequestStream& requests)
    416 {
    417     switch (token.type()) {
    418     case HTMLToken::Character: {
    419         if (!m_inStyle)
    420             return;
    421         m_cssScanner.scan(token.data(), source, requests);
    422         return;
    423     }
    424     case HTMLToken::EndTag: {
    425         const StringImpl* tagImpl = tagImplFor(token.data());
    426         if (match(tagImpl, templateTag)) {
    427             if (m_templateCount)
    428                 --m_templateCount;
    429             return;
    430         }
    431         if (match(tagImpl, styleTag)) {
    432             if (m_inStyle)
    433                 m_cssScanner.reset();
    434             m_inStyle = false;
    435             return;
    436         }
    437         if (match(tagImpl, pictureTag))
    438             m_inPicture = false;
    439         return;
    440     }
    441     case HTMLToken::StartTag: {
    442         if (m_templateCount)
    443             return;
    444         const StringImpl* tagImpl = tagImplFor(token.data());
    445         if (match(tagImpl, templateTag)) {
    446             ++m_templateCount;
    447             return;
    448         }
    449         if (match(tagImpl, styleTag)) {
    450             m_inStyle = true;
    451             return;
    452         }
    453         if (match(tagImpl, baseTag)) {
    454             // The first <base> element is the one that wins.
    455             if (!m_predictedBaseElementURL.isEmpty())
    456                 return;
    457             updatePredictedBaseURL(token);
    458             return;
    459         }
    460         if (RuntimeEnabledFeatures::pictureEnabled() && (match(tagImpl, pictureTag))) {
    461             m_inPicture = true;
    462             m_pictureSourceURL = String();
    463             return;
    464         }
    465 
    466         StartTagScanner scanner(tagImpl, m_mediaValues);
    467         scanner.processAttributes(token.attributes());
    468         if (m_inPicture)
    469             scanner.handlePictureSourceURL(m_pictureSourceURL);
    470         OwnPtr<PreloadRequest> request = scanner.createPreloadRequest(m_predictedBaseElementURL, source);
    471         if (request)
    472             requests.append(request.release());
    473         return;
    474     }
    475     default: {
    476         return;
    477     }
    478     }
    479 }
    480 
    481 template<typename Token>
    482 void TokenPreloadScanner::updatePredictedBaseURL(const Token& token)
    483 {
    484     ASSERT(m_predictedBaseElementURL.isEmpty());
    485     if (const typename Token::Attribute* hrefAttribute = token.getAttributeItem(hrefAttr))
    486         m_predictedBaseElementURL = KURL(m_documentURL, stripLeadingAndTrailingHTMLSpaces(hrefAttribute->value)).copy();
    487 }
    488 
    489 HTMLPreloadScanner::HTMLPreloadScanner(const HTMLParserOptions& options, const KURL& documentURL, PassRefPtr<MediaValues> mediaValues)
    490     : m_scanner(documentURL, mediaValues)
    491     , m_tokenizer(HTMLTokenizer::create(options))
    492 {
    493 }
    494 
    495 HTMLPreloadScanner::~HTMLPreloadScanner()
    496 {
    497 }
    498 
    499 void HTMLPreloadScanner::appendToEnd(const SegmentedString& source)
    500 {
    501     m_source.append(source);
    502 }
    503 
    504 void HTMLPreloadScanner::scan(HTMLResourcePreloader* preloader, const KURL& startingBaseElementURL)
    505 {
    506     ASSERT(isMainThread()); // HTMLTokenizer::updateStateFor only works on the main thread.
    507 
    508     TRACE_EVENT1("blink", "HTMLPreloadScanner::scan", "source_length", m_source.length());
    509 
    510     // When we start scanning, our best prediction of the baseElementURL is the real one!
    511     if (!startingBaseElementURL.isEmpty())
    512         m_scanner.setPredictedBaseElementURL(startingBaseElementURL);
    513 
    514     PreloadRequestStream requests;
    515 
    516     while (m_tokenizer->nextToken(m_source, m_token)) {
    517         if (m_token.type() == HTMLToken::StartTag)
    518             m_tokenizer->updateStateFor(attemptStaticStringCreation(m_token.name(), Likely8Bit));
    519         m_scanner.scan(m_token, m_source, requests);
    520         m_token.clear();
    521     }
    522 
    523     preloader->takeAndPreload(requests);
    524 }
    525 
    526 }
    527