1 /* 2 * Copyright (C) 2010 Apple Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY 14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 15 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 16 * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY 17 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 18 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 19 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 20 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 21 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 22 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 23 */ 24 25 #include "config.h" 26 #include "core/html/parser/HTMLParserIdioms.h" 27 28 #include <limits> 29 #include "wtf/MathExtras.h" 30 #include "wtf/text/AtomicString.h" 31 #include "wtf/text/StringBuilder.h" 32 #include "wtf/text/StringHash.h" 33 34 namespace WebCore { 35 36 template <typename CharType> 37 static String stripLeadingAndTrailingHTMLSpaces(String string, const CharType* characters, unsigned length) 38 { 39 unsigned numLeadingSpaces = 0; 40 unsigned numTrailingSpaces = 0; 41 42 for (; numLeadingSpaces < length; ++numLeadingSpaces) { 43 if (isNotHTMLSpace<CharType>(characters[numLeadingSpaces])) 44 break; 45 } 46 47 if (numLeadingSpaces == length) 48 return string.isNull() ? string : emptyAtom.string(); 49 50 for (; numTrailingSpaces < length; ++numTrailingSpaces) { 51 if (isNotHTMLSpace<CharType>(characters[length - numTrailingSpaces - 1])) 52 break; 53 } 54 55 ASSERT(numLeadingSpaces + numTrailingSpaces < length); 56 57 if (!(numLeadingSpaces | numTrailingSpaces)) 58 return string; 59 60 return string.substring(numLeadingSpaces, length - (numLeadingSpaces + numTrailingSpaces)); 61 } 62 63 String stripLeadingAndTrailingHTMLSpaces(const String& string) 64 { 65 unsigned length = string.length(); 66 67 if (!length) 68 return string.isNull() ? string : emptyAtom.string(); 69 70 if (string.is8Bit()) 71 return stripLeadingAndTrailingHTMLSpaces<LChar>(string, string.characters8(), length); 72 73 return stripLeadingAndTrailingHTMLSpaces<UChar>(string, string.characters16(), length); 74 } 75 76 String serializeForNumberType(const Decimal& number) 77 { 78 if (number.isZero()) { 79 // Decimal::toString appends exponent, e.g. "0e-18" 80 return number.isNegative() ? "-0" : "0"; 81 } 82 return number.toString(); 83 } 84 85 String serializeForNumberType(double number) 86 { 87 // According to HTML5, "the best representation of the number n as a floating 88 // point number" is a string produced by applying ToString() to n. 89 return String::numberToStringECMAScript(number); 90 } 91 92 Decimal parseToDecimalForNumberType(const String& string, const Decimal& fallbackValue) 93 { 94 // See HTML5 2.5.4.3 `Real numbers.' and parseToDoubleForNumberType 95 96 // String::toDouble() accepts leading + and whitespace characters, which are not valid here. 97 const UChar firstCharacter = string[0]; 98 if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter)) 99 return fallbackValue; 100 101 const Decimal value = Decimal::fromString(string); 102 if (!value.isFinite()) 103 return fallbackValue; 104 105 // Numbers are considered finite IEEE 754 single-precision floating point values. 106 // See HTML5 2.5.4.3 `Real numbers.' 107 // FIXME: We should use numeric_limits<double>::max for number input type. 108 const Decimal floatMax = Decimal::fromDouble(std::numeric_limits<float>::max()); 109 if (value < -floatMax || value > floatMax) 110 return fallbackValue; 111 112 // We return +0 for -0 case. 113 return value.isZero() ? Decimal(0) : value; 114 } 115 116 double parseToDoubleForNumberType(const String& string, double fallbackValue) 117 { 118 // See HTML5 2.5.4.3 `Real numbers.' 119 120 // String::toDouble() accepts leading + and whitespace characters, which are not valid here. 121 UChar firstCharacter = string[0]; 122 if (firstCharacter != '-' && firstCharacter != '.' && !isASCIIDigit(firstCharacter)) 123 return fallbackValue; 124 125 bool valid = false; 126 double value = string.toDouble(&valid); 127 if (!valid) 128 return fallbackValue; 129 130 // NaN and infinity are considered valid by String::toDouble, but not valid here. 131 if (!std::isfinite(value)) 132 return fallbackValue; 133 134 // Numbers are considered finite IEEE 754 single-precision floating point values. 135 // See HTML5 2.5.4.3 `Real numbers.' 136 if (-std::numeric_limits<float>::max() > value || value > std::numeric_limits<float>::max()) 137 return fallbackValue; 138 139 // The following expression converts -0 to +0. 140 return value ? value : 0; 141 } 142 143 template <typename CharacterType> 144 static bool parseHTMLIntegerInternal(const CharacterType* position, const CharacterType* end, int& value) 145 { 146 // Step 3 147 int sign = 1; 148 149 // Step 4 150 while (position < end) { 151 if (!isHTMLSpace<CharacterType>(*position)) 152 break; 153 ++position; 154 } 155 156 // Step 5 157 if (position == end) 158 return false; 159 ASSERT(position < end); 160 161 // Step 6 162 if (*position == '-') { 163 sign = -1; 164 ++position; 165 } else if (*position == '+') 166 ++position; 167 if (position == end) 168 return false; 169 ASSERT(position < end); 170 171 // Step 7 172 if (!isASCIIDigit(*position)) 173 return false; 174 175 // Step 8 176 StringBuilder digits; 177 while (position < end) { 178 if (!isASCIIDigit(*position)) 179 break; 180 digits.append(*position++); 181 } 182 183 // Step 9 184 bool ok; 185 if (digits.is8Bit()) 186 value = sign * charactersToIntStrict(digits.characters8(), digits.length(), &ok); 187 else 188 value = sign * charactersToIntStrict(digits.characters16(), digits.length(), &ok); 189 return ok; 190 } 191 192 // http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers 193 bool parseHTMLInteger(const String& input, int& value) 194 { 195 // Step 1 196 // Step 2 197 unsigned length = input.length(); 198 if (!length || input.is8Bit()) { 199 const LChar* start = input.characters8(); 200 return parseHTMLIntegerInternal(start, start + length, value); 201 } 202 203 const UChar* start = input.characters16(); 204 return parseHTMLIntegerInternal(start, start + length, value); 205 } 206 207 template <typename CharacterType> 208 static bool parseHTMLNonNegativeIntegerInternal(const CharacterType* position, const CharacterType* end, unsigned& value) 209 { 210 // Step 3 211 while (position < end) { 212 if (!isHTMLSpace<CharacterType>(*position)) 213 break; 214 ++position; 215 } 216 217 // Step 4 218 if (position == end) 219 return false; 220 ASSERT(position < end); 221 222 // Step 5 223 if (*position == '+') 224 ++position; 225 226 // Step 6 227 if (position == end) 228 return false; 229 ASSERT(position < end); 230 231 // Step 7 232 if (!isASCIIDigit(*position)) 233 return false; 234 235 // Step 8 236 StringBuilder digits; 237 while (position < end) { 238 if (!isASCIIDigit(*position)) 239 break; 240 digits.append(*position++); 241 } 242 243 // Step 9 244 bool ok; 245 if (digits.is8Bit()) 246 value = charactersToUIntStrict(digits.characters8(), digits.length(), &ok); 247 else 248 value = charactersToUIntStrict(digits.characters16(), digits.length(), &ok); 249 return ok; 250 } 251 252 253 // http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-non-negative-integers 254 bool parseHTMLNonNegativeInteger(const String& input, unsigned& value) 255 { 256 // Step 1 257 // Step 2 258 unsigned length = input.length(); 259 if (length && input.is8Bit()) { 260 const LChar* start = input.characters8(); 261 return parseHTMLNonNegativeIntegerInternal(start, start + length, value); 262 } 263 264 const UChar* start = input.characters16(); 265 return parseHTMLNonNegativeIntegerInternal(start, start + length, value); 266 } 267 268 static bool threadSafeEqual(const StringImpl* a, const StringImpl* b) 269 { 270 if (a == b) 271 return true; 272 if (a->hash() != b->hash()) 273 return false; 274 return equalNonNull(a, b); 275 } 276 277 bool threadSafeMatch(const QualifiedName& a, const QualifiedName& b) 278 { 279 return threadSafeEqual(a.localName().impl(), b.localName().impl()); 280 } 281 282 bool threadSafeMatch(const String& localName, const QualifiedName& qName) 283 { 284 return threadSafeEqual(localName.impl(), qName.localName().impl()); 285 } 286 287 StringImpl* findStringIfStatic(const UChar* characters, unsigned length) 288 { 289 // We don't need to try hashing if we know the string is too long. 290 if (length > StringImpl::highestStaticStringLength()) 291 return 0; 292 // computeHashAndMaskTop8Bits is the function StringImpl::hash() uses. 293 unsigned hash = StringHasher::computeHashAndMaskTop8Bits(characters, length); 294 const WTF::StaticStringsTable& table = StringImpl::allStaticStrings(); 295 ASSERT(!table.isEmpty()); 296 297 WTF::StaticStringsTable::const_iterator it = table.find(hash); 298 if (it == table.end()) 299 return 0; 300 // It's possible to have hash collisions between arbitrary strings and 301 // known identifiers (e.g. "bvvfg" collides with "script"). 302 // However ASSERTs in StringImpl::createStatic guard against there ever being collisions 303 // between static strings. 304 if (!equal(it->value, characters, length)) 305 return 0; 306 return it->value; 307 } 308 309 } 310