1 /* 2 * Copyright (C) 1999-2002 Harri Porten (porten (at) kde.org) 3 * Copyright (C) 2001 Peter Kelly (pmk (at) post.com) 4 * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 5 * Copyright (C) 2007 Cameron Zwarich (cwzwarich (at) uwaterloo.ca) 6 * Copyright (C) 2007 Maks Orlovich 7 * 8 * This library is free software; you can redistribute it and/or 9 * modify it under the terms of the GNU Library General Public 10 * License as published by the Free Software Foundation; either 11 * version 2 of the License, or (at your option) any later version. 12 * 13 * This library is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * Library General Public License for more details. 17 * 18 * You should have received a copy of the GNU Library General Public License 19 * along with this library; see the file COPYING.LIB. If not, write to 20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 21 * Boston, MA 02110-1301, USA. 22 * 23 */ 24 25 #include "config.h" 26 #include "JSGlobalObjectFunctions.h" 27 28 #include "CallFrame.h" 29 #include "Interpreter.h" 30 #include "JSGlobalObject.h" 31 #include "JSString.h" 32 #include "JSStringBuilder.h" 33 #include "Lexer.h" 34 #include "LiteralParser.h" 35 #include "Nodes.h" 36 #include "Parser.h" 37 #include "UStringBuilder.h" 38 #include "dtoa.h" 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <wtf/ASCIICType.h> 42 #include <wtf/Assertions.h> 43 #include <wtf/MathExtras.h> 44 #include <wtf/StringExtras.h> 45 #include <wtf/unicode/UTF8.h> 46 47 using namespace WTF; 48 using namespace Unicode; 49 50 namespace JSC { 51 52 static JSValue encode(ExecState* exec, const char* doNotEscape) 53 { 54 UString str = exec->argument(0).toString(exec); 55 CString cstr = str.utf8(true); 56 if (!cstr.data()) 57 return throwError(exec, createURIError(exec, "String contained an illegal UTF-16 sequence.")); 58 59 JSStringBuilder builder; 60 const char* p = cstr.data(); 61 for (size_t k = 0; k < cstr.length(); k++, p++) { 62 char c = *p; 63 if (c && strchr(doNotEscape, c)) 64 builder.append(c); 65 else { 66 char tmp[4]; 67 snprintf(tmp, sizeof(tmp), "%%%02X", static_cast<unsigned char>(c)); 68 builder.append(tmp); 69 } 70 } 71 return builder.build(exec); 72 } 73 74 static JSValue decode(ExecState* exec, const char* doNotUnescape, bool strict) 75 { 76 JSStringBuilder builder; 77 UString str = exec->argument(0).toString(exec); 78 int k = 0; 79 int len = str.length(); 80 const UChar* d = str.characters(); 81 UChar u = 0; 82 while (k < len) { 83 const UChar* p = d + k; 84 UChar c = *p; 85 if (c == '%') { 86 int charLen = 0; 87 if (k <= len - 3 && isASCIIHexDigit(p[1]) && isASCIIHexDigit(p[2])) { 88 const char b0 = Lexer::convertHex(p[1], p[2]); 89 const int sequenceLen = UTF8SequenceLength(b0); 90 if (sequenceLen != 0 && k <= len - sequenceLen * 3) { 91 charLen = sequenceLen * 3; 92 char sequence[5]; 93 sequence[0] = b0; 94 for (int i = 1; i < sequenceLen; ++i) { 95 const UChar* q = p + i * 3; 96 if (q[0] == '%' && isASCIIHexDigit(q[1]) && isASCIIHexDigit(q[2])) 97 sequence[i] = Lexer::convertHex(q[1], q[2]); 98 else { 99 charLen = 0; 100 break; 101 } 102 } 103 if (charLen != 0) { 104 sequence[sequenceLen] = 0; 105 const int character = decodeUTF8Sequence(sequence); 106 if (character < 0 || character >= 0x110000) 107 charLen = 0; 108 else if (character >= 0x10000) { 109 // Convert to surrogate pair. 110 builder.append(static_cast<UChar>(0xD800 | ((character - 0x10000) >> 10))); 111 u = static_cast<UChar>(0xDC00 | ((character - 0x10000) & 0x3FF)); 112 } else 113 u = static_cast<UChar>(character); 114 } 115 } 116 } 117 if (charLen == 0) { 118 if (strict) 119 return throwError(exec, createURIError(exec, "URI error")); 120 // The only case where we don't use "strict" mode is the "unescape" function. 121 // For that, it's good to support the wonky "%u" syntax for compatibility with WinIE. 122 if (k <= len - 6 && p[1] == 'u' 123 && isASCIIHexDigit(p[2]) && isASCIIHexDigit(p[3]) 124 && isASCIIHexDigit(p[4]) && isASCIIHexDigit(p[5])) { 125 charLen = 6; 126 u = Lexer::convertUnicode(p[2], p[3], p[4], p[5]); 127 } 128 } 129 if (charLen && (u == 0 || u >= 128 || !strchr(doNotUnescape, u))) { 130 c = u; 131 k += charLen - 1; 132 } 133 } 134 k++; 135 builder.append(c); 136 } 137 return builder.build(exec); 138 } 139 140 bool isStrWhiteSpace(UChar c) 141 { 142 switch (c) { 143 // ECMA-262-5th 7.2 & 7.3 144 case 0x0009: 145 case 0x000A: 146 case 0x000B: 147 case 0x000C: 148 case 0x000D: 149 case 0x0020: 150 case 0x00A0: 151 case 0x2028: 152 case 0x2029: 153 case 0xFEFF: 154 return true; 155 default: 156 return c > 0xff && isSeparatorSpace(c); 157 } 158 } 159 160 static int parseDigit(unsigned short c, int radix) 161 { 162 int digit = -1; 163 164 if (c >= '0' && c <= '9') 165 digit = c - '0'; 166 else if (c >= 'A' && c <= 'Z') 167 digit = c - 'A' + 10; 168 else if (c >= 'a' && c <= 'z') 169 digit = c - 'a' + 10; 170 171 if (digit >= radix) 172 return -1; 173 return digit; 174 } 175 176 double parseIntOverflow(const char* s, int length, int radix) 177 { 178 double number = 0.0; 179 double radixMultiplier = 1.0; 180 181 for (const char* p = s + length - 1; p >= s; p--) { 182 if (radixMultiplier == Inf) { 183 if (*p != '0') { 184 number = Inf; 185 break; 186 } 187 } else { 188 int digit = parseDigit(*p, radix); 189 number += digit * radixMultiplier; 190 } 191 192 radixMultiplier *= radix; 193 } 194 195 return number; 196 } 197 198 double parseIntOverflow(const UChar* s, int length, int radix) 199 { 200 double number = 0.0; 201 double radixMultiplier = 1.0; 202 203 for (const UChar* p = s + length - 1; p >= s; p--) { 204 if (radixMultiplier == Inf) { 205 if (*p != '0') { 206 number = Inf; 207 break; 208 } 209 } else { 210 int digit = parseDigit(*p, radix); 211 number += digit * radixMultiplier; 212 } 213 214 radixMultiplier *= radix; 215 } 216 217 return number; 218 } 219 220 static double parseInt(const UString& s, int radix) 221 { 222 int length = s.length(); 223 const UChar* data = s.characters(); 224 int p = 0; 225 226 while (p < length && isStrWhiteSpace(data[p])) 227 ++p; 228 229 double sign = 1; 230 if (p < length) { 231 if (data[p] == '+') 232 ++p; 233 else if (data[p] == '-') { 234 sign = -1; 235 ++p; 236 } 237 } 238 239 if ((radix == 0 || radix == 16) && length - p >= 2 && data[p] == '0' && (data[p + 1] == 'x' || data[p + 1] == 'X')) { 240 radix = 16; 241 p += 2; 242 } else if (radix == 0) { 243 if (p < length && data[p] == '0') 244 radix = 8; 245 else 246 radix = 10; 247 } 248 249 if (radix < 2 || radix > 36) 250 return NaN; 251 252 int firstDigitPosition = p; 253 bool sawDigit = false; 254 double number = 0; 255 while (p < length) { 256 int digit = parseDigit(data[p], radix); 257 if (digit == -1) 258 break; 259 sawDigit = true; 260 number *= radix; 261 number += digit; 262 ++p; 263 } 264 265 if (number >= mantissaOverflowLowerBound) { 266 if (radix == 10) 267 number = WTF::strtod(s.substringSharingImpl(firstDigitPosition, p - firstDigitPosition).utf8().data(), 0); 268 else if (radix == 2 || radix == 4 || radix == 8 || radix == 16 || radix == 32) 269 number = parseIntOverflow(s.substringSharingImpl(firstDigitPosition, p - firstDigitPosition).utf8().data(), p - firstDigitPosition, radix); 270 } 271 272 if (!sawDigit) 273 return NaN; 274 275 return sign * number; 276 } 277 278 static const int SizeOfInfinity = 8; 279 280 static bool isInfinity(const UChar* data, const UChar* end) 281 { 282 return (end - data) >= SizeOfInfinity 283 && data[0] == 'I' 284 && data[1] == 'n' 285 && data[2] == 'f' 286 && data[3] == 'i' 287 && data[4] == 'n' 288 && data[5] == 'i' 289 && data[6] == 't' 290 && data[7] == 'y'; 291 } 292 293 // See ecma-262 9.3.1 294 static double jsHexIntegerLiteral(const UChar*& data, const UChar* end) 295 { 296 // Hex number. 297 data += 2; 298 const UChar* firstDigitPosition = data; 299 double number = 0; 300 while (true) { 301 number = number * 16 + toASCIIHexValue(*data); 302 ++data; 303 if (data == end) 304 break; 305 if (!isASCIIHexDigit(*data)) 306 break; 307 } 308 if (number >= mantissaOverflowLowerBound) 309 number = parseIntOverflow(firstDigitPosition, data - firstDigitPosition, 16); 310 311 return number; 312 } 313 314 // See ecma-262 9.3.1 315 static double jsStrDecimalLiteral(const UChar*& data, const UChar* end) 316 { 317 ASSERT(data < end); 318 319 // Copy the sting into a null-terminated byte buffer, and call strtod. 320 Vector<char, 32> byteBuffer; 321 for (const UChar* characters = data; characters < end; ++characters) { 322 UChar character = *characters; 323 byteBuffer.append(isASCII(character) ? character : 0); 324 } 325 byteBuffer.append(0); 326 char* endOfNumber; 327 double number = WTF::strtod(byteBuffer.data(), &endOfNumber); 328 329 // Check if strtod found a number; if so return it. 330 ptrdiff_t consumed = endOfNumber - byteBuffer.data(); 331 if (consumed) { 332 data += consumed; 333 return number; 334 } 335 336 // Check for [+-]?Infinity 337 switch (*data) { 338 case 'I': 339 if (isInfinity(data, end)) { 340 data += SizeOfInfinity; 341 return Inf; 342 } 343 break; 344 345 case '+': 346 if (isInfinity(data + 1, end)) { 347 data += SizeOfInfinity + 1; 348 return Inf; 349 } 350 break; 351 352 case '-': 353 if (isInfinity(data + 1, end)) { 354 data += SizeOfInfinity + 1; 355 return -Inf; 356 } 357 break; 358 } 359 360 // Not a number. 361 return NaN; 362 } 363 364 // See ecma-262 9.3.1 365 double jsToNumber(const UString& s) 366 { 367 unsigned size = s.length(); 368 369 if (size == 1) { 370 UChar c = s.characters()[0]; 371 if (isASCIIDigit(c)) 372 return c - '0'; 373 if (isStrWhiteSpace(c)) 374 return 0; 375 return NaN; 376 } 377 378 const UChar* data = s.characters(); 379 const UChar* end = data + size; 380 381 // Skip leading white space. 382 for (; data < end; ++data) { 383 if (!isStrWhiteSpace(*data)) 384 break; 385 } 386 387 // Empty string. 388 if (data == end) 389 return 0.0; 390 391 double number; 392 if (data[0] == '0' && data + 2 < end && (data[1] | 0x20) == 'x' && isASCIIHexDigit(data[2])) 393 number = jsHexIntegerLiteral(data, end); 394 else 395 number = jsStrDecimalLiteral(data, end); 396 397 // Allow trailing white space. 398 for (; data < end; ++data) { 399 if (!isStrWhiteSpace(*data)) 400 break; 401 } 402 if (data != end) 403 return NaN; 404 405 return number; 406 } 407 408 static double parseFloat(const UString& s) 409 { 410 unsigned size = s.length(); 411 412 if (size == 1) { 413 UChar c = s.characters()[0]; 414 if (isASCIIDigit(c)) 415 return c - '0'; 416 return NaN; 417 } 418 419 const UChar* data = s.characters(); 420 const UChar* end = data + size; 421 422 // Skip leading white space. 423 for (; data < end; ++data) { 424 if (!isStrWhiteSpace(*data)) 425 break; 426 } 427 428 // Empty string. 429 if (data == end) 430 return NaN; 431 432 return jsStrDecimalLiteral(data, end); 433 } 434 435 EncodedJSValue JSC_HOST_CALL globalFuncEval(ExecState* exec) 436 { 437 JSObject* thisObject = exec->hostThisValue().toThisObject(exec); 438 JSObject* unwrappedObject = thisObject->unwrappedObject(); 439 if (!unwrappedObject->isGlobalObject() || static_cast<JSGlobalObject*>(unwrappedObject)->evalFunction() != exec->callee()) 440 return throwVMError(exec, createEvalError(exec, "The \"this\" value passed to eval must be the global object from which eval originated")); 441 442 JSValue x = exec->argument(0); 443 if (!x.isString()) 444 return JSValue::encode(x); 445 446 UString s = x.toString(exec); 447 448 LiteralParser preparser(exec, s, LiteralParser::NonStrictJSON); 449 if (JSValue parsedObject = preparser.tryLiteralParse()) 450 return JSValue::encode(parsedObject); 451 452 EvalExecutable* eval = EvalExecutable::create(exec, makeSource(s), false); 453 JSObject* error = eval->compile(exec, static_cast<JSGlobalObject*>(unwrappedObject)->globalScopeChain()); 454 if (error) 455 return throwVMError(exec, error); 456 457 return JSValue::encode(exec->interpreter()->execute(eval, exec, thisObject, static_cast<JSGlobalObject*>(unwrappedObject)->globalScopeChain())); 458 } 459 460 EncodedJSValue JSC_HOST_CALL globalFuncParseInt(ExecState* exec) 461 { 462 JSValue value = exec->argument(0); 463 int32_t radix = exec->argument(1).toInt32(exec); 464 465 if (radix != 0 && radix != 10) 466 return JSValue::encode(jsNumber(parseInt(value.toString(exec), radix))); 467 468 if (value.isInt32()) 469 return JSValue::encode(value); 470 471 if (value.isDouble()) { 472 double d = value.asDouble(); 473 if (isfinite(d)) 474 return JSValue::encode(jsNumber((d > 0) ? floor(d) : ceil(d))); 475 if (isnan(d) || isinf(d)) 476 return JSValue::encode(jsNaN()); 477 return JSValue::encode(jsNumber(0)); 478 } 479 480 return JSValue::encode(jsNumber(parseInt(value.toString(exec), radix))); 481 } 482 483 EncodedJSValue JSC_HOST_CALL globalFuncParseFloat(ExecState* exec) 484 { 485 return JSValue::encode(jsNumber(parseFloat(exec->argument(0).toString(exec)))); 486 } 487 488 EncodedJSValue JSC_HOST_CALL globalFuncIsNaN(ExecState* exec) 489 { 490 return JSValue::encode(jsBoolean(isnan(exec->argument(0).toNumber(exec)))); 491 } 492 493 EncodedJSValue JSC_HOST_CALL globalFuncIsFinite(ExecState* exec) 494 { 495 double n = exec->argument(0).toNumber(exec); 496 return JSValue::encode(jsBoolean(!isnan(n) && !isinf(n))); 497 } 498 499 EncodedJSValue JSC_HOST_CALL globalFuncDecodeURI(ExecState* exec) 500 { 501 static const char do_not_unescape_when_decoding_URI[] = 502 "#$&+,/:;=?@"; 503 504 return JSValue::encode(decode(exec, do_not_unescape_when_decoding_URI, true)); 505 } 506 507 EncodedJSValue JSC_HOST_CALL globalFuncDecodeURIComponent(ExecState* exec) 508 { 509 return JSValue::encode(decode(exec, "", true)); 510 } 511 512 EncodedJSValue JSC_HOST_CALL globalFuncEncodeURI(ExecState* exec) 513 { 514 static const char do_not_escape_when_encoding_URI[] = 515 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 516 "abcdefghijklmnopqrstuvwxyz" 517 "0123456789" 518 "!#$&'()*+,-./:;=?@_~"; 519 520 return JSValue::encode(encode(exec, do_not_escape_when_encoding_URI)); 521 } 522 523 EncodedJSValue JSC_HOST_CALL globalFuncEncodeURIComponent(ExecState* exec) 524 { 525 static const char do_not_escape_when_encoding_URI_component[] = 526 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 527 "abcdefghijklmnopqrstuvwxyz" 528 "0123456789" 529 "!'()*-._~"; 530 531 return JSValue::encode(encode(exec, do_not_escape_when_encoding_URI_component)); 532 } 533 534 EncodedJSValue JSC_HOST_CALL globalFuncEscape(ExecState* exec) 535 { 536 static const char do_not_escape[] = 537 "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 538 "abcdefghijklmnopqrstuvwxyz" 539 "0123456789" 540 "*+-./@_"; 541 542 JSStringBuilder builder; 543 UString str = exec->argument(0).toString(exec); 544 const UChar* c = str.characters(); 545 for (unsigned k = 0; k < str.length(); k++, c++) { 546 int u = c[0]; 547 if (u > 255) { 548 char tmp[7]; 549 snprintf(tmp, sizeof(tmp), "%%u%04X", u); 550 builder.append(tmp); 551 } else if (u != 0 && strchr(do_not_escape, static_cast<char>(u))) 552 builder.append(c, 1); 553 else { 554 char tmp[4]; 555 snprintf(tmp, sizeof(tmp), "%%%02X", u); 556 builder.append(tmp); 557 } 558 } 559 560 return JSValue::encode(builder.build(exec)); 561 } 562 563 EncodedJSValue JSC_HOST_CALL globalFuncUnescape(ExecState* exec) 564 { 565 UStringBuilder builder; 566 UString str = exec->argument(0).toString(exec); 567 int k = 0; 568 int len = str.length(); 569 while (k < len) { 570 const UChar* c = str.characters() + k; 571 UChar u; 572 if (c[0] == '%' && k <= len - 6 && c[1] == 'u') { 573 if (isASCIIHexDigit(c[2]) && isASCIIHexDigit(c[3]) && isASCIIHexDigit(c[4]) && isASCIIHexDigit(c[5])) { 574 u = Lexer::convertUnicode(c[2], c[3], c[4], c[5]); 575 c = &u; 576 k += 5; 577 } 578 } else if (c[0] == '%' && k <= len - 3 && isASCIIHexDigit(c[1]) && isASCIIHexDigit(c[2])) { 579 u = UChar(Lexer::convertHex(c[1], c[2])); 580 c = &u; 581 k += 2; 582 } 583 k++; 584 builder.append(*c); 585 } 586 587 return JSValue::encode(jsString(exec, builder.toUString())); 588 } 589 590 #ifndef NDEBUG 591 EncodedJSValue JSC_HOST_CALL globalFuncJSCPrint(ExecState* exec) 592 { 593 CString string = exec->argument(0).toString(exec).utf8(); 594 puts(string.data()); 595 return JSValue::encode(jsUndefined()); 596 } 597 #endif 598 599 } // namespace JSC 600