Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1.  Redistributions of source code must retain the above copyright
      8  *     notice, this list of conditions and the following disclaimer.
      9  * 2.  Redistributions in binary form must reproduce the above copyright
     10  *     notice, this list of conditions and the following disclaimer in the
     11  *     documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     15  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     16  * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     17  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     18  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     19  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     20  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     21  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     22  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     23  */
     24 
     25 #include "config.h"
     26 #include "core/html/parser/HTMLParserIdioms.h"
     27 
     28 #include <limits>
     29 #include "wtf/MathExtras.h"
     30 #include "wtf/text/AtomicString.h"
     31 #include "wtf/text/StringBuilder.h"
     32 #include "wtf/text/StringHash.h"
     33 
     34 namespace WebCore {
     35 
     36 template <typename CharType>
     37 static String stripLeadingAndTrailingHTMLSpaces(String string, const CharType* characters, unsigned length)
     38 {
     39     unsigned numLeadingSpaces = 0;
     40     unsigned numTrailingSpaces = 0;
     41 
     42     for (; numLeadingSpaces < length; ++numLeadingSpaces) {
     43         if (isNotHTMLSpace<CharType>(characters[numLeadingSpaces]))
     44             break;
     45     }
     46 
     47     if (numLeadingSpaces == length)
     48         return string.isNull() ? string : emptyAtom.string();
     49 
     50     for (; numTrailingSpaces < length; ++numTrailingSpaces) {
     51         if (isNotHTMLSpace<CharType>(characters[length - numTrailingSpaces - 1]))
     52             break;
     53     }
     54 
     55     ASSERT(numLeadingSpaces + numTrailingSpaces < length);
     56 
     57     if (!(numLeadingSpaces | numTrailingSpaces))
     58         return string;
     59 
     60     return string.substring(numLeadingSpaces, length - (numLeadingSpaces + numTrailingSpaces));
     61 }
     62 
     63 String stripLeadingAndTrailingHTMLSpaces(const String& string)
     64 {
     65     unsigned length = string.length();
     66 
     67     if (!length)
     68         return string.isNull() ? string : emptyAtom.string();
     69 
     70     if (string.is8Bit())
     71         return stripLeadingAndTrailingHTMLSpaces<LChar>(string, string.characters8(), length);
     72 
     73     return stripLeadingAndTrailingHTMLSpaces<UChar>(string, string.characters16(), length);
     74 }
     75 
     76 String serializeForNumberType(const Decimal& number)
     77 {
     78     if (number.isZero()) {
     79         // Decimal::toString appends exponent, e.g. "0e-18"
     80         return number.isNegative() ? "-0" : "0";
     81     }
     82     return number.toString();
     83 }
     84 
     85 String serializeForNumberType(double number)
     86 {
     87     // According to HTML5, "the best representation of the number n as a floating
     88     // point number" is a string produced by applying ToString() to n.
     89     return String::numberToStringECMAScript(number);
     90 }
     91 
     92 Decimal parseToDecimalForNumberType(const String& string, const Decimal& fallbackValue)
     93 {
     94     // See HTML5 2.5.4.3 `Real numbers.' and parseToDoubleForNumberType
     95 
     96     // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
     97     const UChar firstCharacter = string[0];
     98     if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter))
     99         return fallbackValue;
    100 
    101     const Decimal value = Decimal::fromString(string);
    102     if (!value.isFinite())
    103         return fallbackValue;
    104 
    105     // Numbers are considered finite IEEE 754 single-precision floating point values.
    106     // See HTML5 2.5.4.3 `Real numbers.'
    107     // FIXME: We should use numeric_limits<double>::max for number input type.
    108     const Decimal floatMax = Decimal::fromDouble(std::numeric_limits<float>::max());
    109     if (value < -floatMax || value > floatMax)
    110         return fallbackValue;
    111 
    112     // We return +0 for -0 case.
    113     return value.isZero() ? Decimal(0) : value;
    114 }
    115 
    116 double parseToDoubleForNumberType(const String& string, double fallbackValue)
    117 {
    118     // See HTML5 2.5.4.3 `Real numbers.'
    119 
    120     // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
    121     UChar firstCharacter = string[0];
    122     if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter))
    123         return fallbackValue;
    124 
    125     bool valid = false;
    126     double value = string.toDouble(&valid);
    127     if (!valid)
    128         return fallbackValue;
    129 
    130     // NaN and infinity are considered valid by String::toDouble, but not valid here.
    131     if (!std::isfinite(value))
    132         return fallbackValue;
    133 
    134     // Numbers are considered finite IEEE 754 single-precision floating point values.
    135     // See HTML5 2.5.4.3 `Real numbers.'
    136     if (-std::numeric_limits<float>::max() > value || value > std::numeric_limits<float>::max())
    137         return fallbackValue;
    138 
    139     // The following expression converts -0 to +0.
    140     return value ? value : 0;
    141 }
    142 
    143 template <typename CharacterType>
    144 static bool parseHTMLIntegerInternal(const CharacterType* position, const CharacterType* end, int& value)
    145 {
    146     // Step 3
    147     int sign = 1;
    148 
    149     // Step 4
    150     while (position < end) {
    151         if (!isHTMLSpace<CharacterType>(*position))
    152             break;
    153         ++position;
    154     }
    155 
    156     // Step 5
    157     if (position == end)
    158         return false;
    159     ASSERT(position < end);
    160 
    161     // Step 6
    162     if (*position == '-') {
    163         sign = -1;
    164         ++position;
    165     } else if (*position == '+')
    166         ++position;
    167     if (position == end)
    168         return false;
    169     ASSERT(position < end);
    170 
    171     // Step 7
    172     if (!isASCIIDigit(*position))
    173         return false;
    174 
    175     // Step 8
    176     StringBuilder digits;
    177     while (position < end) {
    178         if (!isASCIIDigit(*position))
    179             break;
    180         digits.append(*position++);
    181     }
    182 
    183     // Step 9
    184     bool ok;
    185     if (digits.is8Bit())
    186         value = sign * charactersToIntStrict(digits.characters8(), digits.length(), &ok);
    187     else
    188         value = sign * charactersToIntStrict(digits.characters16(), digits.length(), &ok);
    189     return ok;
    190 }
    191 
    192 // http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
    193 bool parseHTMLInteger(const String& input, int& value)
    194 {
    195     // Step 1
    196     // Step 2
    197     unsigned length = input.length();
    198     if (!length || input.is8Bit()) {
    199         const LChar* start = input.characters8();
    200         return parseHTMLIntegerInternal(start, start + length, value);
    201     }
    202 
    203     const UChar* start = input.characters16();
    204     return parseHTMLIntegerInternal(start, start + length, value);
    205 }
    206 
    207 template <typename CharacterType>
    208 static bool parseHTMLNonNegativeIntegerInternal(const CharacterType* position, const CharacterType* end, unsigned& value)
    209 {
    210     // Step 3
    211     while (position < end) {
    212         if (!isHTMLSpace<CharacterType>(*position))
    213             break;
    214         ++position;
    215     }
    216 
    217     // Step 4
    218     if (position == end)
    219         return false;
    220     ASSERT(position < end);
    221 
    222     // Step 5
    223     if (*position == '+')
    224         ++position;
    225 
    226     // Step 6
    227     if (position == end)
    228         return false;
    229     ASSERT(position < end);
    230 
    231     // Step 7
    232     if (!isASCIIDigit(*position))
    233         return false;
    234 
    235     // Step 8
    236     StringBuilder digits;
    237     while (position < end) {
    238         if (!isASCIIDigit(*position))
    239             break;
    240         digits.append(*position++);
    241     }
    242 
    243     // Step 9
    244     bool ok;
    245     if (digits.is8Bit())
    246         value = charactersToUIntStrict(digits.characters8(), digits.length(), &ok);
    247     else
    248         value = charactersToUIntStrict(digits.characters16(), digits.length(), &ok);
    249     return ok;
    250 }
    251 
    252 
    253 // http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-non-negative-integers
    254 bool parseHTMLNonNegativeInteger(const String& input, unsigned& value)
    255 {
    256     // Step 1
    257     // Step 2
    258     unsigned length = input.length();
    259     if (length && input.is8Bit()) {
    260         const LChar* start = input.characters8();
    261         return parseHTMLNonNegativeIntegerInternal(start, start + length, value);
    262     }
    263 
    264     const UChar* start = input.characters16();
    265     return parseHTMLNonNegativeIntegerInternal(start, start + length, value);
    266 }
    267 
    268 static bool threadSafeEqual(const StringImpl* a, const StringImpl* b)
    269 {
    270     if (a == b)
    271         return true;
    272     if (a->hash() != b->hash())
    273         return false;
    274     return equalNonNull(a, b);
    275 }
    276 
    277 bool threadSafeMatch(const QualifiedName& a, const QualifiedName& b)
    278 {
    279     return threadSafeEqual(a.localName().impl(), b.localName().impl());
    280 }
    281 
    282 bool threadSafeMatch(const String& localName, const QualifiedName& qName)
    283 {
    284     return threadSafeEqual(localName.impl(), qName.localName().impl());
    285 }
    286 
    287 StringImpl* findStringIfStatic(const UChar* characters, unsigned length)
    288 {
    289     // We don't need to try hashing if we know the string is too long.
    290     if (length > StringImpl::highestStaticStringLength())
    291         return 0;
    292     // computeHashAndMaskTop8Bits is the function StringImpl::hash() uses.
    293     unsigned hash = StringHasher::computeHashAndMaskTop8Bits(characters, length);
    294     const WTF::StaticStringsTable& table = StringImpl::allStaticStrings();
    295     ASSERT(!table.isEmpty());
    296 
    297     WTF::StaticStringsTable::const_iterator it = table.find(hash);
    298     if (it == table.end())
    299         return 0;
    300     // It's possible to have hash collisions between arbitrary strings and
    301     // known identifiers (e.g. "bvvfg" collides with "script").
    302     // However ASSERTs in StringImpl::createStatic guard against there ever being collisions
    303     // between static strings.
    304     if (!equal(it->value, characters, length))
    305         return 0;
    306     return it->value;
    307 }
    308 
    309 }
    310