1 /* 2 * Copyright (C) 1999-2000 Harri Porten (porten (at) kde.org) 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 4 * Copyright (C) 2007 Cameron Zwarich (cwzwarich (at) uwaterloo.ca) 5 * Copyright (C) 2009 Google Inc. All rights reserved. 6 * 7 * This library is free software; you can redistribute it and/or 8 * modify it under the terms of the GNU Library General Public 9 * License as published by the Free Software Foundation; either 10 * version 2 of the License, or (at your option) any later version. 11 * 12 * This library is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 * Library General Public License for more details. 16 * 17 * You should have received a copy of the GNU Library General Public License 18 * along with this library; see the file COPYING.LIB. If not, write to 19 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 20 * Boston, MA 02110-1301, USA. 21 * 22 */ 23 24 #include "config.h" 25 #include "UString.h" 26 27 #include "JSGlobalObjectFunctions.h" 28 #include "Collector.h" 29 #include "dtoa.h" 30 #include "Identifier.h" 31 #include "Operations.h" 32 #include <ctype.h> 33 #include <limits.h> 34 #include <limits> 35 #include <math.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <wtf/ASCIICType.h> 40 #include <wtf/Assertions.h> 41 #include <wtf/MathExtras.h> 42 #include <wtf/StringExtras.h> 43 #include <wtf/Vector.h> 44 #include <wtf/unicode/UTF8.h> 45 #include <wtf/StringExtras.h> 46 47 #if HAVE(STRINGS_H) 48 #include <strings.h> 49 #endif 50 51 using namespace WTF; 52 using namespace WTF::Unicode; 53 using namespace std; 54 55 namespace JSC { 56 57 extern const double NaN; 58 extern const double Inf; 59 60 CString::CString(const char* c) 61 : m_length(strlen(c)) 62 , m_data(new char[m_length + 1]) 63 { 64 memcpy(m_data, c, m_length + 1); 65 } 66 67 CString::CString(const char* c, size_t length) 68 : m_length(length) 69 , m_data(new char[length + 1]) 70 { 71 memcpy(m_data, c, m_length); 72 m_data[m_length] = 0; 73 } 74 75 CString::CString(const CString& b) 76 { 77 m_length = b.m_length; 78 if (b.m_data) { 79 m_data = new char[m_length + 1]; 80 memcpy(m_data, b.m_data, m_length + 1); 81 } else 82 m_data = 0; 83 } 84 85 CString::~CString() 86 { 87 delete [] m_data; 88 } 89 90 CString CString::adopt(char* c, size_t length) 91 { 92 CString s; 93 s.m_data = c; 94 s.m_length = length; 95 return s; 96 } 97 98 CString& CString::append(const CString& t) 99 { 100 char* n; 101 n = new char[m_length + t.m_length + 1]; 102 if (m_length) 103 memcpy(n, m_data, m_length); 104 if (t.m_length) 105 memcpy(n + m_length, t.m_data, t.m_length); 106 m_length += t.m_length; 107 n[m_length] = 0; 108 109 delete [] m_data; 110 m_data = n; 111 112 return *this; 113 } 114 115 CString& CString::operator=(const char* c) 116 { 117 if (m_data) 118 delete [] m_data; 119 m_length = strlen(c); 120 m_data = new char[m_length + 1]; 121 memcpy(m_data, c, m_length + 1); 122 123 return *this; 124 } 125 126 CString& CString::operator=(const CString& str) 127 { 128 if (this == &str) 129 return *this; 130 131 if (m_data) 132 delete [] m_data; 133 m_length = str.m_length; 134 if (str.m_data) { 135 m_data = new char[m_length + 1]; 136 memcpy(m_data, str.m_data, m_length + 1); 137 } else 138 m_data = 0; 139 140 return *this; 141 } 142 143 bool operator==(const CString& c1, const CString& c2) 144 { 145 size_t len = c1.size(); 146 return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0); 147 } 148 149 // These static strings are immutable, except for rc, whose initial value is chosen to 150 // reduce the possibility of it becoming zero due to ref/deref not being thread-safe. 151 static UChar sharedEmptyChar; 152 UStringImpl* UStringImpl::s_empty; 153 154 UString::Rep* UString::s_nullRep; 155 UString* UString::s_nullUString; 156 157 void initializeUString() 158 { 159 UStringImpl::s_empty = new UStringImpl(&sharedEmptyChar, 0, UStringImpl::ConstructStaticString); 160 161 UString::s_nullRep = new UStringImpl(0, 0, UStringImpl::ConstructStaticString); 162 UString::s_nullUString = new UString; 163 } 164 165 UString::UString(const char* c) 166 : m_rep(Rep::create(c)) 167 { 168 } 169 170 UString::UString(const char* c, int length) 171 : m_rep(Rep::create(c, length)) 172 { 173 } 174 175 UString::UString(const UChar* c, int length) 176 { 177 if (length == 0) 178 m_rep = &Rep::empty(); 179 else 180 m_rep = Rep::create(c, length); 181 } 182 183 UString UString::from(int i) 184 { 185 UChar buf[1 + sizeof(i) * 3]; 186 UChar* end = buf + sizeof(buf) / sizeof(UChar); 187 UChar* p = end; 188 189 if (i == 0) 190 *--p = '0'; 191 else if (i == INT_MIN) { 192 char minBuf[1 + sizeof(i) * 3]; 193 sprintf(minBuf, "%d", INT_MIN); 194 return UString(minBuf); 195 } else { 196 bool negative = false; 197 if (i < 0) { 198 negative = true; 199 i = -i; 200 } 201 while (i) { 202 *--p = static_cast<unsigned short>((i % 10) + '0'); 203 i /= 10; 204 } 205 if (negative) 206 *--p = '-'; 207 } 208 209 return UString(p, static_cast<int>(end - p)); 210 } 211 212 UString UString::from(long long i) 213 { 214 UChar buf[1 + sizeof(i) * 3]; 215 UChar* end = buf + sizeof(buf) / sizeof(UChar); 216 UChar* p = end; 217 218 if (i == 0) 219 *--p = '0'; 220 else if (i == std::numeric_limits<long long>::min()) { 221 char minBuf[1 + sizeof(i) * 3]; 222 #if OS(WINDOWS) 223 snprintf(minBuf, sizeof(minBuf) - 1, "%I64d", std::numeric_limits<long long>::min()); 224 #else 225 snprintf(minBuf, sizeof(minBuf) - 1, "%lld", std::numeric_limits<long long>::min()); 226 #endif 227 return UString(minBuf); 228 } else { 229 bool negative = false; 230 if (i < 0) { 231 negative = true; 232 i = -i; 233 } 234 while (i) { 235 *--p = static_cast<unsigned short>((i % 10) + '0'); 236 i /= 10; 237 } 238 if (negative) 239 *--p = '-'; 240 } 241 242 return UString(p, static_cast<int>(end - p)); 243 } 244 245 UString UString::from(unsigned int u) 246 { 247 UChar buf[sizeof(u) * 3]; 248 UChar* end = buf + sizeof(buf) / sizeof(UChar); 249 UChar* p = end; 250 251 if (u == 0) 252 *--p = '0'; 253 else { 254 while (u) { 255 *--p = static_cast<unsigned short>((u % 10) + '0'); 256 u /= 10; 257 } 258 } 259 260 return UString(p, static_cast<int>(end - p)); 261 } 262 263 UString UString::from(long l) 264 { 265 UChar buf[1 + sizeof(l) * 3]; 266 UChar* end = buf + sizeof(buf) / sizeof(UChar); 267 UChar* p = end; 268 269 if (l == 0) 270 *--p = '0'; 271 else if (l == LONG_MIN) { 272 char minBuf[1 + sizeof(l) * 3]; 273 sprintf(minBuf, "%ld", LONG_MIN); 274 return UString(minBuf); 275 } else { 276 bool negative = false; 277 if (l < 0) { 278 negative = true; 279 l = -l; 280 } 281 while (l) { 282 *--p = static_cast<unsigned short>((l % 10) + '0'); 283 l /= 10; 284 } 285 if (negative) 286 *--p = '-'; 287 } 288 289 return UString(p, static_cast<int>(end - p)); 290 } 291 292 UString UString::from(double d) 293 { 294 DtoaBuffer buffer; 295 unsigned length; 296 doubleToStringInJavaScriptFormat(d, buffer, &length); 297 return UString(buffer, length); 298 } 299 300 bool UString::getCString(CStringBuffer& buffer) const 301 { 302 int length = size(); 303 int neededSize = length + 1; 304 buffer.resize(neededSize); 305 char* buf = buffer.data(); 306 307 UChar ored = 0; 308 const UChar* p = data(); 309 char* q = buf; 310 const UChar* limit = p + length; 311 while (p != limit) { 312 UChar c = p[0]; 313 ored |= c; 314 *q = static_cast<char>(c); 315 ++p; 316 ++q; 317 } 318 *q = '\0'; 319 320 return !(ored & 0xFF00); 321 } 322 323 char* UString::ascii() const 324 { 325 static char* asciiBuffer = 0; 326 327 int length = size(); 328 int neededSize = length + 1; 329 delete[] asciiBuffer; 330 asciiBuffer = new char[neededSize]; 331 332 const UChar* p = data(); 333 char* q = asciiBuffer; 334 const UChar* limit = p + length; 335 while (p != limit) { 336 *q = static_cast<char>(p[0]); 337 ++p; 338 ++q; 339 } 340 *q = '\0'; 341 342 return asciiBuffer; 343 } 344 345 bool UString::is8Bit() const 346 { 347 const UChar* u = data(); 348 const UChar* limit = u + size(); 349 while (u < limit) { 350 if (u[0] > 0xFF) 351 return false; 352 ++u; 353 } 354 355 return true; 356 } 357 358 UChar UString::operator[](int pos) const 359 { 360 if (pos >= size()) 361 return '\0'; 362 return data()[pos]; 363 } 364 365 double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const 366 { 367 if (size() == 1) { 368 UChar c = data()[0]; 369 if (isASCIIDigit(c)) 370 return c - '0'; 371 if (isASCIISpace(c) && tolerateEmptyString) 372 return 0; 373 return NaN; 374 } 375 376 // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk 377 // after the number, so this is too strict a check. 378 CStringBuffer s; 379 if (!getCString(s)) 380 return NaN; 381 const char* c = s.data(); 382 383 // skip leading white space 384 while (isASCIISpace(*c)) 385 c++; 386 387 // empty string ? 388 if (*c == '\0') 389 return tolerateEmptyString ? 0.0 : NaN; 390 391 double d; 392 393 // hex number ? 394 if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) { 395 const char* firstDigitPosition = c + 2; 396 c++; 397 d = 0.0; 398 while (*(++c)) { 399 if (*c >= '0' && *c <= '9') 400 d = d * 16.0 + *c - '0'; 401 else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f')) 402 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0; 403 else 404 break; 405 } 406 407 if (d >= mantissaOverflowLowerBound) 408 d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16); 409 } else { 410 // regular number ? 411 char* end; 412 d = WTF::strtod(c, &end); 413 if ((d != 0.0 || end != c) && d != Inf && d != -Inf) { 414 c = end; 415 } else { 416 double sign = 1.0; 417 418 if (*c == '+') 419 c++; 420 else if (*c == '-') { 421 sign = -1.0; 422 c++; 423 } 424 425 // We used strtod() to do the conversion. However, strtod() handles 426 // infinite values slightly differently than JavaScript in that it 427 // converts the string "inf" with any capitalization to infinity, 428 // whereas the ECMA spec requires that it be converted to NaN. 429 430 if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') { 431 d = sign * Inf; 432 c += 8; 433 } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i') 434 c = end; 435 else 436 return NaN; 437 } 438 } 439 440 // allow trailing white space 441 while (isASCIISpace(*c)) 442 c++; 443 // don't allow anything after - unless tolerant=true 444 if (!tolerateTrailingJunk && *c != '\0') 445 d = NaN; 446 447 return d; 448 } 449 450 double UString::toDouble(bool tolerateTrailingJunk) const 451 { 452 return toDouble(tolerateTrailingJunk, true); 453 } 454 455 double UString::toDouble() const 456 { 457 return toDouble(false, true); 458 } 459 460 uint32_t UString::toUInt32(bool* ok) const 461 { 462 double d = toDouble(); 463 bool b = true; 464 465 if (d != static_cast<uint32_t>(d)) { 466 b = false; 467 d = 0; 468 } 469 470 if (ok) 471 *ok = b; 472 473 return static_cast<uint32_t>(d); 474 } 475 476 uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const 477 { 478 double d = toDouble(false, tolerateEmptyString); 479 bool b = true; 480 481 if (d != static_cast<uint32_t>(d)) { 482 b = false; 483 d = 0; 484 } 485 486 if (ok) 487 *ok = b; 488 489 return static_cast<uint32_t>(d); 490 } 491 492 uint32_t UString::toStrictUInt32(bool* ok) const 493 { 494 if (ok) 495 *ok = false; 496 497 // Empty string is not OK. 498 int len = m_rep->size(); 499 if (len == 0) 500 return 0; 501 const UChar* p = m_rep->data(); 502 unsigned short c = p[0]; 503 504 // If the first digit is 0, only 0 itself is OK. 505 if (c == '0') { 506 if (len == 1 && ok) 507 *ok = true; 508 return 0; 509 } 510 511 // Convert to UInt32, checking for overflow. 512 uint32_t i = 0; 513 while (1) { 514 // Process character, turning it into a digit. 515 if (c < '0' || c > '9') 516 return 0; 517 const unsigned d = c - '0'; 518 519 // Multiply by 10, checking for overflow out of 32 bits. 520 if (i > 0xFFFFFFFFU / 10) 521 return 0; 522 i *= 10; 523 524 // Add in the digit, checking for overflow out of 32 bits. 525 const unsigned max = 0xFFFFFFFFU - d; 526 if (i > max) 527 return 0; 528 i += d; 529 530 // Handle end of string. 531 if (--len == 0) { 532 if (ok) 533 *ok = true; 534 return i; 535 } 536 537 // Get next character. 538 c = *(++p); 539 } 540 } 541 542 int UString::find(const UString& f, int pos) const 543 { 544 int fsz = f.size(); 545 546 if (pos < 0) 547 pos = 0; 548 549 if (fsz == 1) { 550 UChar ch = f[0]; 551 const UChar* end = data() + size(); 552 for (const UChar* c = data() + pos; c < end; c++) { 553 if (*c == ch) 554 return static_cast<int>(c - data()); 555 } 556 return -1; 557 } 558 559 int sz = size(); 560 if (sz < fsz) 561 return -1; 562 if (fsz == 0) 563 return pos; 564 const UChar* end = data() + sz - fsz; 565 int fsizeminusone = (fsz - 1) * sizeof(UChar); 566 const UChar* fdata = f.data(); 567 unsigned short fchar = fdata[0]; 568 ++fdata; 569 for (const UChar* c = data() + pos; c <= end; c++) { 570 if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone)) 571 return static_cast<int>(c - data()); 572 } 573 574 return -1; 575 } 576 577 int UString::find(UChar ch, int pos) const 578 { 579 if (pos < 0) 580 pos = 0; 581 const UChar* end = data() + size(); 582 for (const UChar* c = data() + pos; c < end; c++) { 583 if (*c == ch) 584 return static_cast<int>(c - data()); 585 } 586 587 return -1; 588 } 589 590 int UString::rfind(const UString& f, int pos) const 591 { 592 int sz = size(); 593 int fsz = f.size(); 594 if (sz < fsz) 595 return -1; 596 if (pos < 0) 597 pos = 0; 598 if (pos > sz - fsz) 599 pos = sz - fsz; 600 if (fsz == 0) 601 return pos; 602 int fsizeminusone = (fsz - 1) * sizeof(UChar); 603 const UChar* fdata = f.data(); 604 for (const UChar* c = data() + pos; c >= data(); c--) { 605 if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone)) 606 return static_cast<int>(c - data()); 607 } 608 609 return -1; 610 } 611 612 int UString::rfind(UChar ch, int pos) const 613 { 614 if (isEmpty()) 615 return -1; 616 if (pos + 1 >= size()) 617 pos = size() - 1; 618 for (const UChar* c = data() + pos; c >= data(); c--) { 619 if (*c == ch) 620 return static_cast<int>(c - data()); 621 } 622 623 return -1; 624 } 625 626 UString UString::substr(int pos, int len) const 627 { 628 int s = size(); 629 630 if (pos < 0) 631 pos = 0; 632 else if (pos >= s) 633 pos = s; 634 if (len < 0) 635 len = s; 636 if (pos + len >= s) 637 len = s - pos; 638 639 if (pos == 0 && len == s) 640 return *this; 641 642 return UString(Rep::create(m_rep, pos, len)); 643 } 644 645 bool operator==(const UString& s1, const char *s2) 646 { 647 if (s2 == 0) 648 return s1.isEmpty(); 649 650 const UChar* u = s1.data(); 651 const UChar* uend = u + s1.size(); 652 while (u != uend && *s2) { 653 if (u[0] != (unsigned char)*s2) 654 return false; 655 s2++; 656 u++; 657 } 658 659 return u == uend && *s2 == 0; 660 } 661 662 bool operator<(const UString& s1, const UString& s2) 663 { 664 const int l1 = s1.size(); 665 const int l2 = s2.size(); 666 const int lmin = l1 < l2 ? l1 : l2; 667 const UChar* c1 = s1.data(); 668 const UChar* c2 = s2.data(); 669 int l = 0; 670 while (l < lmin && *c1 == *c2) { 671 c1++; 672 c2++; 673 l++; 674 } 675 if (l < lmin) 676 return (c1[0] < c2[0]); 677 678 return (l1 < l2); 679 } 680 681 bool operator>(const UString& s1, const UString& s2) 682 { 683 const int l1 = s1.size(); 684 const int l2 = s2.size(); 685 const int lmin = l1 < l2 ? l1 : l2; 686 const UChar* c1 = s1.data(); 687 const UChar* c2 = s2.data(); 688 int l = 0; 689 while (l < lmin && *c1 == *c2) { 690 c1++; 691 c2++; 692 l++; 693 } 694 if (l < lmin) 695 return (c1[0] > c2[0]); 696 697 return (l1 > l2); 698 } 699 700 int compare(const UString& s1, const UString& s2) 701 { 702 const int l1 = s1.size(); 703 const int l2 = s2.size(); 704 const int lmin = l1 < l2 ? l1 : l2; 705 const UChar* c1 = s1.data(); 706 const UChar* c2 = s2.data(); 707 int l = 0; 708 while (l < lmin && *c1 == *c2) { 709 c1++; 710 c2++; 711 l++; 712 } 713 714 if (l < lmin) 715 return (c1[0] > c2[0]) ? 1 : -1; 716 717 if (l1 == l2) 718 return 0; 719 720 return (l1 > l2) ? 1 : -1; 721 } 722 723 bool equal(const UString::Rep* r, const UString::Rep* b) 724 { 725 int length = r->size(); 726 if (length != b->size()) 727 return false; 728 const UChar* d = r->data(); 729 const UChar* s = b->data(); 730 for (int i = 0; i != length; ++i) { 731 if (d[i] != s[i]) 732 return false; 733 } 734 return true; 735 } 736 737 CString UString::UTF8String(bool strict) const 738 { 739 // Allocate a buffer big enough to hold all the characters. 740 const int length = size(); 741 Vector<char, 1024> buffer(length * 3); 742 743 // Convert to runs of 8-bit characters. 744 char* p = buffer.data(); 745 const UChar* d = reinterpret_cast<const UChar*>(&data()[0]); 746 ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict); 747 if (result != conversionOK) 748 return CString(); 749 750 return CString(buffer.data(), p - buffer.data()); 751 } 752 753 } // namespace JSC 754