Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2013 Apple Inc. All rights reserved.
      3  * Copyright (C) 2013 Google Inc. All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions are
      7  * met:
      8  *
      9  *     * Redistributions of source code must retain the above copyright
     10  * notice, this list of conditions and the following disclaimer.
     11  *     * Redistributions in binary form must reproduce the above
     12  * copyright notice, this list of conditions and the following disclaimer
     13  * in the documentation and/or other materials provided with the
     14  * distribution.
     15  *     * Neither the name of Google Inc. nor the names of its
     16  * contributors may be used to endorse or promote products derived from
     17  * this software without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     20  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     21  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     22  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     23  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     24  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     25  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     26  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     27  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     29  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 #include "config.h"
     33 #include "core/html/parser/HTMLSrcsetParser.h"
     34 
     35 #include "core/html/parser/HTMLParserIdioms.h"
     36 #include "platform/ParsingUtilities.h"
     37 #include "platform/RuntimeEnabledFeatures.h"
     38 
     39 namespace WebCore {
     40 
     41 static bool compareByDensity(const ImageCandidate& first, const ImageCandidate& second)
     42 {
     43     return first.density() < second.density();
     44 }
     45 
     46 enum DescriptorTokenizerState {
     47     Start,
     48     InParenthesis,
     49     AfterToken,
     50 };
     51 
     52 struct DescriptorToken {
     53     unsigned start;
     54     unsigned length;
     55 
     56     DescriptorToken(unsigned start, unsigned length)
     57         : start(start)
     58         , length(length)
     59     {
     60     }
     61 
     62     unsigned lastIndex()
     63     {
     64         return start + length - 1;
     65     }
     66 
     67     template<typename CharType>
     68     int toInt(const CharType* attribute, bool& isValid)
     69     {
     70         return charactersToInt(attribute + start, length - 1, &isValid);
     71     }
     72 
     73     template<typename CharType>
     74     float toFloat(const CharType* attribute, bool& isValid)
     75     {
     76         return charactersToFloat(attribute + start, length - 1, &isValid);
     77     }
     78 };
     79 
     80 template<typename CharType>
     81 static void appendDescriptorAndReset(const CharType* attributeStart, const CharType*& descriptorStart, const CharType* position, Vector<DescriptorToken>& descriptors)
     82 {
     83     if (position > descriptorStart)
     84         descriptors.append(DescriptorToken(descriptorStart - attributeStart, position - descriptorStart));
     85     descriptorStart = 0;
     86 }
     87 
     88 // The following is called appendCharacter to match the spec's terminology.
     89 template<typename CharType>
     90 static void appendCharacter(const CharType* descriptorStart, const CharType* position)
     91 {
     92     // Since we don't copy the tokens, this just set the point where the descriptor tokens start.
     93     if (!descriptorStart)
     94         descriptorStart = position;
     95 }
     96 
     97 template<typename CharType>
     98 static bool isEOF(const CharType* position, const CharType* end)
     99 {
    100     return position >= end;
    101 }
    102 
    103 template<typename CharType>
    104 static void tokenizeDescriptors(const CharType* attributeStart,
    105     const CharType*& position,
    106     const CharType* attributeEnd,
    107     Vector<DescriptorToken>& descriptors)
    108 {
    109     DescriptorTokenizerState state = Start;
    110     const CharType* descriptorsStart = position;
    111     const CharType* currentDescriptorStart = descriptorsStart;
    112     while (true) {
    113         switch (state) {
    114         case Start:
    115             if (isEOF(position, attributeEnd)) {
    116                 appendDescriptorAndReset(attributeStart, currentDescriptorStart, attributeEnd, descriptors);
    117                 return;
    118             }
    119             if (isComma(*position)) {
    120                 appendDescriptorAndReset(attributeStart, currentDescriptorStart, position, descriptors);
    121                 ++position;
    122                 return;
    123             }
    124             if (isHTMLSpace(*position)) {
    125                 appendDescriptorAndReset(attributeStart, currentDescriptorStart, position, descriptors);
    126                 currentDescriptorStart = position + 1;
    127                 state = AfterToken;
    128             } else if (*position == '(') {
    129                 appendCharacter(currentDescriptorStart, position);
    130                 state = InParenthesis;
    131             } else {
    132                 appendCharacter(currentDescriptorStart, position);
    133             }
    134             break;
    135         case InParenthesis:
    136             if (isEOF(position, attributeEnd)) {
    137                 appendDescriptorAndReset(attributeStart, currentDescriptorStart, attributeEnd, descriptors);
    138                 return;
    139             }
    140             if (*position == ')') {
    141                 appendCharacter(currentDescriptorStart, position);
    142                 state = Start;
    143             } else {
    144                 appendCharacter(currentDescriptorStart, position);
    145             }
    146             break;
    147         case AfterToken:
    148             if (isEOF(position, attributeEnd))
    149                 return;
    150             if (!isHTMLSpace(*position)) {
    151                 state = Start;
    152                 currentDescriptorStart = position;
    153                 --position;
    154             }
    155             break;
    156         }
    157         ++position;
    158     }
    159 }
    160 
    161 template<typename CharType>
    162 static bool parseDescriptors(const CharType* attribute, Vector<DescriptorToken>& descriptors, DescriptorParsingResult& result)
    163 {
    164     for (Vector<DescriptorToken>::iterator it = descriptors.begin(); it != descriptors.end(); ++it) {
    165         if (it->length == 0)
    166             continue;
    167         CharType c = attribute[it->lastIndex()];
    168         bool isValid = false;
    169         if (RuntimeEnabledFeatures::pictureSizesEnabled() && c == 'w') {
    170             if (result.hasDensity() || result.hasWidth())
    171                 return false;
    172             int resourceWidth = it->toInt(attribute, isValid);
    173             if (!isValid || resourceWidth <= 0)
    174                 return false;
    175             result.setResourceWidth(resourceWidth);
    176         } else if (RuntimeEnabledFeatures::pictureSizesEnabled() && c == 'h') {
    177             // This is here only for future compat purposes.
    178             // The value of the 'h' descriptor is not used.
    179             if (result.hasDensity() || result.hasHeight())
    180                 return false;
    181             int resourceHeight = it->toInt(attribute, isValid);
    182             if (!isValid || resourceHeight <= 0)
    183                 return false;
    184             result.setResourceHeight(resourceHeight);
    185         } else if (c == 'x') {
    186             if (result.hasDensity() || result.hasHeight() || result.hasWidth())
    187                 return false;
    188             float density = it->toFloat(attribute, isValid);
    189             if (!isValid || density < 0)
    190                 return false;
    191             result.setDensity(density);
    192         }
    193     }
    194     return true;
    195 }
    196 
    197 static bool parseDescriptors(const String& attribute, Vector<DescriptorToken>& descriptors, DescriptorParsingResult& result)
    198 {
    199     // FIXME: See if StringView can't be extended to replace DescriptorToken here.
    200     if (attribute.is8Bit()) {
    201         return parseDescriptors(attribute.characters8(), descriptors, result);
    202     }
    203     return parseDescriptors(attribute.characters16(), descriptors, result);
    204 }
    205 
    206 // http://picture.responsiveimages.org/#parse-srcset-attr
    207 template<typename CharType>
    208 static void parseImageCandidatesFromSrcsetAttribute(const String& attribute, const CharType* attributeStart, unsigned length, Vector<ImageCandidate>& imageCandidates)
    209 {
    210     const CharType* position = attributeStart;
    211     const CharType* attributeEnd = position + length;
    212 
    213     while (position < attributeEnd) {
    214         // 4. Splitting loop: Collect a sequence of characters that are space characters or U+002C COMMA characters.
    215         skipWhile<CharType, isHTMLSpaceOrComma<CharType> >(position, attributeEnd);
    216         if (position == attributeEnd) {
    217             // Contrary to spec language - descriptor parsing happens on each candidate, so when we reach the attributeEnd, we can exit.
    218             break;
    219         }
    220         const CharType* imageURLStart = position;
    221         // 6. Collect a sequence of characters that are not space characters, and let that be url.
    222 
    223         skipUntil<CharType, isHTMLSpace<CharType> >(position, attributeEnd);
    224         const CharType* imageURLEnd = position;
    225 
    226         DescriptorParsingResult result;
    227 
    228         // 8. If url ends with a U+002C COMMA character (,)
    229         if (isComma(*(position - 1))) {
    230             // Remove all trailing U+002C COMMA characters from url.
    231             imageURLEnd = position - 1;
    232             reverseSkipWhile<CharType, isComma>(imageURLEnd, imageURLStart);
    233             ++imageURLEnd;
    234             // If url is empty, then jump to the step labeled splitting loop.
    235             if (imageURLStart == imageURLEnd)
    236                 continue;
    237         } else {
    238             // Advancing position here (contrary to spec) to avoid an useless extra state machine step.
    239             // Filed a spec bug: https://github.com/ResponsiveImagesCG/picture-element/issues/189
    240             ++position;
    241             Vector<DescriptorToken> descriptorTokens;
    242             tokenizeDescriptors(attributeStart, position, attributeEnd, descriptorTokens);
    243             // Contrary to spec language - descriptor parsing happens on each candidate.
    244             // This is a black-box equivalent, to avoid storing descriptor lists for each candidate.
    245             if (!parseDescriptors(attribute, descriptorTokens, result))
    246                 continue;
    247         }
    248 
    249         ASSERT(imageURLEnd > attributeStart);
    250         unsigned imageURLStartingPosition = imageURLStart - attributeStart;
    251         ASSERT(imageURLEnd > imageURLStart);
    252         unsigned imageURLLength = imageURLEnd - imageURLStart;
    253         imageCandidates.append(ImageCandidate(attribute, imageURLStartingPosition, imageURLLength, result, ImageCandidate::SrcsetOrigin));
    254         // 11. Return to the step labeled splitting loop.
    255     }
    256 }
    257 
    258 static void parseImageCandidatesFromSrcsetAttribute(const String& attribute, Vector<ImageCandidate>& imageCandidates)
    259 {
    260     if (attribute.isNull())
    261         return;
    262 
    263     if (attribute.is8Bit())
    264         parseImageCandidatesFromSrcsetAttribute<LChar>(attribute, attribute.characters8(), attribute.length(), imageCandidates);
    265     else
    266         parseImageCandidatesFromSrcsetAttribute<UChar>(attribute, attribute.characters16(), attribute.length(), imageCandidates);
    267 }
    268 
    269 static ImageCandidate pickBestImageCandidate(float deviceScaleFactor, unsigned sourceSize, Vector<ImageCandidate>& imageCandidates)
    270 {
    271     const float defaultDensityValue = 1.0;
    272     bool ignoreSrc = false;
    273     if (imageCandidates.isEmpty())
    274         return ImageCandidate();
    275 
    276     // http://picture.responsiveimages.org/#normalize-source-densities
    277     for (Vector<ImageCandidate>::iterator it = imageCandidates.begin(); it != imageCandidates.end(); ++it) {
    278         if (it->resourceWidth() > 0) {
    279             it->setDensity((float)it->resourceWidth() / (float)sourceSize);
    280             ignoreSrc = true;
    281         } else if (it->density() < 0) {
    282             it->setDensity(defaultDensityValue);
    283         }
    284     }
    285 
    286     std::stable_sort(imageCandidates.begin(), imageCandidates.end(), compareByDensity);
    287 
    288     unsigned i;
    289     for (i = 0; i < imageCandidates.size() - 1; ++i) {
    290         if ((imageCandidates[i].density() >= deviceScaleFactor) && (!ignoreSrc || !imageCandidates[i].srcOrigin()))
    291             break;
    292     }
    293 
    294     if (imageCandidates[i].srcOrigin() && ignoreSrc) {
    295         ASSERT(i > 0);
    296         --i;
    297     }
    298     float winningDensity = imageCandidates[i].density();
    299 
    300     unsigned winner = i;
    301     // 16. If an entry b in candidates has the same associated ... pixel density as an earlier entry a in candidates,
    302     // then remove entry b
    303     while ((i > 0) && (imageCandidates[--i].density() == winningDensity))
    304         winner = i;
    305 
    306     return imageCandidates[winner];
    307 }
    308 
    309 ImageCandidate bestFitSourceForSrcsetAttribute(float deviceScaleFactor, unsigned sourceSize, const String& srcsetAttribute)
    310 {
    311     Vector<ImageCandidate> imageCandidates;
    312 
    313     parseImageCandidatesFromSrcsetAttribute(srcsetAttribute, imageCandidates);
    314 
    315     return pickBestImageCandidate(deviceScaleFactor, sourceSize, imageCandidates);
    316 }
    317 
    318 ImageCandidate bestFitSourceForImageAttributes(float deviceScaleFactor, unsigned sourceSize, const String& srcAttribute, const String& srcsetAttribute)
    319 {
    320     if (srcsetAttribute.isNull()) {
    321         if (srcAttribute.isNull())
    322             return ImageCandidate();
    323         return ImageCandidate(srcAttribute, 0, srcAttribute.length(), DescriptorParsingResult(), ImageCandidate::SrcOrigin);
    324     }
    325 
    326     Vector<ImageCandidate> imageCandidates;
    327 
    328     parseImageCandidatesFromSrcsetAttribute(srcsetAttribute, imageCandidates);
    329 
    330     if (!srcAttribute.isEmpty())
    331         imageCandidates.append(ImageCandidate(srcAttribute, 0, srcAttribute.length(), DescriptorParsingResult(), ImageCandidate::SrcOrigin));
    332 
    333     return pickBestImageCandidate(deviceScaleFactor, sourceSize, imageCandidates);
    334 }
    335 
    336 String bestFitSourceForImageAttributes(float deviceScaleFactor, unsigned sourceSize, const String& srcAttribute, ImageCandidate& srcsetImageCandidate)
    337 {
    338     if (srcsetImageCandidate.isEmpty())
    339         return srcAttribute;
    340 
    341     Vector<ImageCandidate> imageCandidates;
    342     imageCandidates.append(srcsetImageCandidate);
    343 
    344     if (!srcAttribute.isEmpty())
    345         imageCandidates.append(ImageCandidate(srcAttribute, 0, srcAttribute.length(), DescriptorParsingResult(), ImageCandidate::SrcOrigin));
    346 
    347     return pickBestImageCandidate(deviceScaleFactor, sourceSize, imageCandidates).toString();
    348 }
    349 
    350 }
    351