Home | History | Annotate | Download | only in parser
      1 /*
      2  * Copyright (C) 2010 Apple Inc. All rights reserved.
      3  *
      4  * Redistribution and use in source and binary forms, with or without
      5  * modification, are permitted provided that the following conditions
      6  * are met:
      7  * 1.  Redistributions of source code must retain the above copyright
      8  *     notice, this list of conditions and the following disclaimer.
      9  * 2.  Redistributions in binary form must reproduce the above copyright
     10  *     notice, this list of conditions and the following disclaimer in the
     11  *     documentation and/or other materials provided with the distribution.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
     14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     15  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     16  * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
     17  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     18  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     19  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
     20  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     21  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     22  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     23  */
     24 
     25 #include "config.h"
     26 #include "HTMLParserIdioms.h"
     27 
     28 #include <limits>
     29 #include <wtf/MathExtras.h>
     30 #include <wtf/dtoa.h>
     31 #include <wtf/text/AtomicString.h>
     32 
     33 namespace WebCore {
     34 
     35 String stripLeadingAndTrailingHTMLSpaces(const String& string)
     36 {
     37     const UChar* characters = string.characters();
     38     unsigned length = string.length();
     39 
     40     unsigned numLeadingSpaces;
     41     for (numLeadingSpaces = 0; numLeadingSpaces < length; ++numLeadingSpaces) {
     42         if (isNotHTMLSpace(characters[numLeadingSpaces]))
     43             break;
     44     }
     45 
     46     if (numLeadingSpaces == length)
     47         return string.isNull() ? string : emptyAtom.string();
     48 
     49     unsigned numTrailingSpaces;
     50     for (numTrailingSpaces = 0; numTrailingSpaces < length; ++numTrailingSpaces) {
     51         if (isNotHTMLSpace(characters[length - numTrailingSpaces - 1]))
     52             break;
     53     }
     54 
     55     ASSERT(numLeadingSpaces + numTrailingSpaces < length);
     56 
     57     return string.substring(numLeadingSpaces, length - (numLeadingSpaces + numTrailingSpaces));
     58 }
     59 
     60 String serializeForNumberType(double number)
     61 {
     62     // According to HTML5, "the best representation of the number n as a floating
     63     // point number" is a string produced by applying ToString() to n.
     64     NumberToStringBuffer buffer;
     65     unsigned length = numberToString(number, buffer);
     66     return String(buffer, length);
     67 }
     68 
     69 bool parseToDoubleForNumberType(const String& string, double* result)
     70 {
     71     // See HTML5 2.4.4.3 `Real numbers.'
     72 
     73     // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
     74     UChar firstCharacter = string[0];
     75     if (firstCharacter != '-' && !isASCIIDigit(firstCharacter))
     76         return false;
     77 
     78     bool valid = false;
     79     double value = string.toDouble(&valid);
     80     if (!valid)
     81         return false;
     82 
     83     // NaN and infinity are considered valid by String::toDouble, but not valid here.
     84     if (!isfinite(value))
     85         return false;
     86 
     87     // Numbers are considered finite IEEE 754 single-precision floating point values.
     88     // See HTML5 2.4.4.3 `Real numbers.'
     89     if (-std::numeric_limits<float>::max() > value || value > std::numeric_limits<float>::max())
     90         return false;
     91 
     92     if (result) {
     93         // The following expression converts -0 to +0.
     94         *result = value ? value : 0;
     95     }
     96 
     97     return true;
     98 }
     99 
    100 bool parseToDoubleForNumberTypeWithDecimalPlaces(const String& string, double *result, unsigned *decimalPlaces)
    101 {
    102     if (decimalPlaces)
    103         *decimalPlaces = 0;
    104 
    105     if (!parseToDoubleForNumberType(string, result))
    106         return false;
    107 
    108     if (!decimalPlaces)
    109         return true;
    110 
    111     size_t dotIndex = string.find('.');
    112     size_t eIndex = string.find('e');
    113     if (eIndex == notFound)
    114         eIndex = string.find('E');
    115 
    116     unsigned baseDecimalPlaces = 0;
    117     if (dotIndex != notFound) {
    118         if (eIndex == notFound)
    119             baseDecimalPlaces = string.length() - dotIndex - 1;
    120         else
    121             baseDecimalPlaces = eIndex - dotIndex - 1;
    122     }
    123 
    124     int exponent = 0;
    125     if (eIndex != notFound) {
    126         unsigned cursor = eIndex + 1, cursorSaved;
    127         int digit, exponentSign;
    128         int32_t exponent32;
    129         size_t length = string.length();
    130 
    131         // Not using String.toInt() in order to perform the same computation as dtoa() does.
    132         exponentSign = 0;
    133         switch (digit = string[cursor]) {
    134         case '-':
    135             exponentSign = 1;
    136         case '+':
    137             digit = string[++cursor];
    138         }
    139         if (digit >= '0' && digit <= '9') {
    140             while (cursor < length && digit == '0')
    141                 digit = string[++cursor];
    142             if (digit > '0' && digit <= '9') {
    143                 exponent32 = digit - '0';
    144                 cursorSaved = cursor;
    145                 while (cursor < length && (digit = string[++cursor]) >= '0' && digit <= '9')
    146                     exponent32 = (10 * exponent32) + digit - '0';
    147                 if (cursor - cursorSaved > 8 || exponent32 > 19999)
    148                     /* Avoid confusion from exponents
    149                      * so large that e might overflow.
    150                      */
    151                     exponent = 19999; /* safe for 16 bit ints */
    152                 else
    153                     exponent = static_cast<int>(exponent32);
    154                 if (exponentSign)
    155                     exponent = -exponent;
    156             } else
    157                 exponent = 0;
    158         }
    159     }
    160 
    161     int intDecimalPlaces = baseDecimalPlaces - exponent;
    162     if (intDecimalPlaces < 0)
    163         *decimalPlaces = 0;
    164     else if (intDecimalPlaces > 19999)
    165         *decimalPlaces = 19999;
    166     else
    167         *decimalPlaces = static_cast<unsigned>(intDecimalPlaces);
    168 
    169     return true;
    170 }
    171 
    172 // http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
    173 bool parseHTMLInteger(const String& input, int& value)
    174 {
    175     // Step 1
    176     // Step 2
    177     const UChar* position = input.characters();
    178     const UChar* end = position + input.length();
    179 
    180     // Step 3
    181     int sign = 1;
    182 
    183     // Step 4
    184     while (position < end) {
    185         if (!isHTMLSpace(*position))
    186             break;
    187         ++position;
    188     }
    189 
    190     // Step 5
    191     if (position == end)
    192         return false;
    193     ASSERT(position < end);
    194 
    195     // Step 6
    196     if (*position == '-') {
    197         sign = -1;
    198         ++position;
    199     } else if (*position == '+')
    200         ++position;
    201     if (position == end)
    202         return false;
    203     ASSERT(position < end);
    204 
    205     // Step 7
    206     if (!isASCIIDigit(*position))
    207         return false;
    208 
    209     // Step 8
    210     Vector<UChar, 16> digits;
    211     while (position < end) {
    212         if (!isASCIIDigit(*position))
    213             break;
    214         digits.append(*position++);
    215     }
    216 
    217     // Step 9
    218     value = sign * charactersToIntStrict(digits.data(), digits.size());
    219     return true;
    220 }
    221 
    222 }
    223