Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1.  Redistributions of source code must retain the above copyright
      8  *     notice, this list of conditions and the following disclaimer.
      9  * 2.  Redistributions in binary form must reproduce the above copyright
     10  *     notice, this list of conditions and the following disclaimer in the
     11  *     documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     15  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     16  * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     17  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     18  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     19  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     20  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     21  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     22  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     23  */
     24 
     25 #include "config.h"
     26 #include "core/html/parser/HTMLParserIdioms.h"
     27 
     28 #include <limits>
     29 #include "core/dom/QualifiedName.h"
     30 #include "core/html/parser/HTMLIdentifier.h"
     31 #include "core/platform/Decimal.h"
     32 #include "wtf/MathExtras.h"
     33 #include "wtf/text/AtomicString.h"
     34 #include "wtf/text/StringBuilder.h"
     35 
     36 namespace WebCore {
     37 
     38 template <typename CharType>
     39 static String stripLeadingAndTrailingHTMLSpaces(String string, CharType characters, unsigned length)
     40 {
     41     unsigned numLeadingSpaces = 0;
     42     unsigned numTrailingSpaces = 0;
     43 
     44     for (; numLeadingSpaces < length; ++numLeadingSpaces) {
     45         if (isNotHTMLSpace(characters[numLeadingSpaces]))
     46             break;
     47     }
     48 
     49     if (numLeadingSpaces == length)
     50         return string.isNull() ? string : emptyAtom.string();
     51 
     52     for (; numTrailingSpaces < length; ++numTrailingSpaces) {
     53         if (isNotHTMLSpace(characters[length - numTrailingSpaces - 1]))
     54             break;
     55     }
     56 
     57     ASSERT(numLeadingSpaces + numTrailingSpaces < length);
     58 
     59     if (!(numLeadingSpaces | numTrailingSpaces))
     60         return string;
     61 
     62     return string.substring(numLeadingSpaces, length - (numLeadingSpaces + numTrailingSpaces));
     63 }
     64 
     65 String stripLeadingAndTrailingHTMLSpaces(const String& string)
     66 {
     67     unsigned length = string.length();
     68 
     69     if (!length)
     70         return string.isNull() ? string : emptyAtom.string();
     71 
     72     if (string.is8Bit())
     73         return stripLeadingAndTrailingHTMLSpaces(string, string.characters8(), length);
     74 
     75     return stripLeadingAndTrailingHTMLSpaces(string, string.characters16(), length);
     76 }
     77 
     78 String serializeForNumberType(const Decimal& number)
     79 {
     80     if (number.isZero()) {
     81         // Decimal::toString appends exponent, e.g. "0e-18"
     82         return number.isNegative() ? "-0" : "0";
     83     }
     84     return number.toString();
     85 }
     86 
     87 String serializeForNumberType(double number)
     88 {
     89     // According to HTML5, "the best representation of the number n as a floating
     90     // point number" is a string produced by applying ToString() to n.
     91     return String::numberToStringECMAScript(number);
     92 }
     93 
     94 Decimal parseToDecimalForNumberType(const String& string, const Decimal& fallbackValue)
     95 {
     96     // See HTML5 2.5.4.3 `Real numbers.' and parseToDoubleForNumberType
     97 
     98     // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
     99     const UChar firstCharacter = string[0];
    100     if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter))
    101         return fallbackValue;
    102 
    103     const Decimal value = Decimal::fromString(string);
    104     if (!value.isFinite())
    105         return fallbackValue;
    106 
    107     // Numbers are considered finite IEEE 754 single-precision floating point values.
    108     // See HTML5 2.5.4.3 `Real numbers.'
    109     // FIXME: We should use numeric_limits<double>::max for number input type.
    110     const Decimal floatMax = Decimal::fromDouble(std::numeric_limits<float>::max());
    111     if (value < -floatMax || value > floatMax)
    112         return fallbackValue;
    113 
    114     // We return +0 for -0 case.
    115     return value.isZero() ? Decimal(0) : value;
    116 }
    117 
    118 Decimal parseToDecimalForNumberType(const String& string)
    119 {
    120     return parseToDecimalForNumberType(string, Decimal::nan());
    121 }
    122 
    123 double parseToDoubleForNumberType(const String& string, double fallbackValue)
    124 {
    125     // See HTML5 2.5.4.3 `Real numbers.'
    126 
    127     // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
    128     UChar firstCharacter = string[0];
    129     if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter))
    130         return fallbackValue;
    131 
    132     bool valid = false;
    133     double value = string.toDouble(&valid);
    134     if (!valid)
    135         return fallbackValue;
    136 
    137     // NaN and infinity are considered valid by String::toDouble, but not valid here.
    138     if (!std::isfinite(value))
    139         return fallbackValue;
    140 
    141     // Numbers are considered finite IEEE 754 single-precision floating point values.
    142     // See HTML5 2.5.4.3 `Real numbers.'
    143     if (-std::numeric_limits<float>::max() > value || value > std::numeric_limits<float>::max())
    144         return fallbackValue;
    145 
    146     // The following expression converts -0 to +0.
    147     return value ? value : 0;
    148 }
    149 
    150 double parseToDoubleForNumberType(const String& string)
    151 {
    152     return parseToDoubleForNumberType(string, std::numeric_limits<double>::quiet_NaN());
    153 }
    154 
    155 template <typename CharacterType>
    156 static bool parseHTMLIntegerInternal(const CharacterType* position, const CharacterType* end, int& value)
    157 {
    158     // Step 3
    159     int sign = 1;
    160 
    161     // Step 4
    162     while (position < end) {
    163         if (!isHTMLSpace(*position))
    164             break;
    165         ++position;
    166     }
    167 
    168     // Step 5
    169     if (position == end)
    170         return false;
    171     ASSERT(position < end);
    172 
    173     // Step 6
    174     if (*position == '-') {
    175         sign = -1;
    176         ++position;
    177     } else if (*position == '+')
    178         ++position;
    179     if (position == end)
    180         return false;
    181     ASSERT(position < end);
    182 
    183     // Step 7
    184     if (!isASCIIDigit(*position))
    185         return false;
    186 
    187     // Step 8
    188     StringBuilder digits;
    189     while (position < end) {
    190         if (!isASCIIDigit(*position))
    191             break;
    192         digits.append(*position++);
    193     }
    194 
    195     // Step 9
    196     bool ok;
    197     if (digits.is8Bit())
    198         value = sign * charactersToIntStrict(digits.characters8(), digits.length(), &ok);
    199     else
    200         value = sign * charactersToIntStrict(digits.characters16(), digits.length(), &ok);
    201     return ok;
    202 }
    203 
    204 // http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
    205 bool parseHTMLInteger(const String& input, int& value)
    206 {
    207     // Step 1
    208     // Step 2
    209     unsigned length = input.length();
    210     if (!length || input.is8Bit()) {
    211         const LChar* start = input.characters8();
    212         return parseHTMLIntegerInternal(start, start + length, value);
    213     }
    214 
    215     const UChar* start = input.characters16();
    216     return parseHTMLIntegerInternal(start, start + length, value);
    217 }
    218 
    219 template <typename CharacterType>
    220 static bool parseHTMLNonNegativeIntegerInternal(const CharacterType* position, const CharacterType* end, unsigned& value)
    221 {
    222     // Step 3
    223     while (position < end) {
    224         if (!isHTMLSpace(*position))
    225             break;
    226         ++position;
    227     }
    228 
    229     // Step 4
    230     if (position == end)
    231         return false;
    232     ASSERT(position < end);
    233 
    234     // Step 5
    235     if (*position == '+')
    236         ++position;
    237 
    238     // Step 6
    239     if (position == end)
    240         return false;
    241     ASSERT(position < end);
    242 
    243     // Step 7
    244     if (!isASCIIDigit(*position))
    245         return false;
    246 
    247     // Step 8
    248     StringBuilder digits;
    249     while (position < end) {
    250         if (!isASCIIDigit(*position))
    251             break;
    252         digits.append(*position++);
    253     }
    254 
    255     // Step 9
    256     bool ok;
    257     if (digits.is8Bit())
    258         value = charactersToUIntStrict(digits.characters8(), digits.length(), &ok);
    259     else
    260         value = charactersToUIntStrict(digits.characters16(), digits.length(), &ok);
    261     return ok;
    262 }
    263 
    264 
    265 // http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-non-negative-integers
    266 bool parseHTMLNonNegativeInteger(const String& input, unsigned& value)
    267 {
    268     // Step 1
    269     // Step 2
    270     unsigned length = input.length();
    271     if (length && input.is8Bit()) {
    272         const LChar* start = input.characters8();
    273         return parseHTMLNonNegativeIntegerInternal(start, start + length, value);
    274     }
    275 
    276     const UChar* start = input.characters16();
    277     return parseHTMLNonNegativeIntegerInternal(start, start + length, value);
    278 }
    279 
    280 static bool threadSafeEqual(const StringImpl* a, const StringImpl* b)
    281 {
    282     if (a == b)
    283         return true;
    284     if (a->hash() != b->hash())
    285         return false;
    286     return equalNonNull(a, b);
    287 }
    288 
    289 bool threadSafeMatch(const QualifiedName& a, const QualifiedName& b)
    290 {
    291     return threadSafeEqual(a.localName().impl(), b.localName().impl());
    292 }
    293 
    294 bool threadSafeMatch(const HTMLIdentifier& localName, const QualifiedName& qName)
    295 {
    296     return threadSafeEqual(localName.asStringImpl(), qName.localName().impl());
    297 }
    298 
    299 }
    300