Home | History | Annotate | Download | only in runtime
      1 /*
      2  *  Copyright (C) 1999-2000 Harri Porten (porten (at) kde.org)
      3  *  Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
      4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich (at) uwaterloo.ca)
      5  *  Copyright (C) 2009 Google Inc. All rights reserved.
      6  *
      7  *  This library is free software; you can redistribute it and/or
      8  *  modify it under the terms of the GNU Library General Public
      9  *  License as published by the Free Software Foundation; either
     10  *  version 2 of the License, or (at your option) any later version.
     11  *
     12  *  This library is distributed in the hope that it will be useful,
     13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15  *  Library General Public License for more details.
     16  *
     17  *  You should have received a copy of the GNU Library General Public License
     18  *  along with this library; see the file COPYING.LIB.  If not, write to
     19  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     20  *  Boston, MA 02110-1301, USA.
     21  *
     22  */
     23 
     24 #include "config.h"
     25 #include "UString.h"
     26 
     27 #include "JSGlobalObjectFunctions.h"
     28 #include "Collector.h"
     29 #include "dtoa.h"
     30 #include "Identifier.h"
     31 #include "Operations.h"
     32 #include <ctype.h>
     33 #include <limits.h>
     34 #include <limits>
     35 #include <math.h>
     36 #include <stdio.h>
     37 #include <stdlib.h>
     38 #include <string.h>
     39 #include <wtf/ASCIICType.h>
     40 #include <wtf/Assertions.h>
     41 #include <wtf/MathExtras.h>
     42 #include <wtf/StringExtras.h>
     43 #include <wtf/Vector.h>
     44 #include <wtf/unicode/UTF8.h>
     45 #include <wtf/StringExtras.h>
     46 
     47 #if HAVE(STRINGS_H)
     48 #include <strings.h>
     49 #endif
     50 
     51 using namespace WTF;
     52 using namespace WTF::Unicode;
     53 using namespace std;
     54 
     55 namespace JSC {
     56 
     57 extern const double NaN;
     58 extern const double Inf;
     59 
     60 CString::CString(const char* c)
     61     : m_length(strlen(c))
     62     , m_data(new char[m_length + 1])
     63 {
     64     memcpy(m_data, c, m_length + 1);
     65 }
     66 
     67 CString::CString(const char* c, size_t length)
     68     : m_length(length)
     69     , m_data(new char[length + 1])
     70 {
     71     memcpy(m_data, c, m_length);
     72     m_data[m_length] = 0;
     73 }
     74 
     75 CString::CString(const CString& b)
     76 {
     77     m_length = b.m_length;
     78     if (b.m_data) {
     79         m_data = new char[m_length + 1];
     80         memcpy(m_data, b.m_data, m_length + 1);
     81     } else
     82         m_data = 0;
     83 }
     84 
     85 CString::~CString()
     86 {
     87     delete [] m_data;
     88 }
     89 
     90 CString CString::adopt(char* c, size_t length)
     91 {
     92     CString s;
     93     s.m_data = c;
     94     s.m_length = length;
     95     return s;
     96 }
     97 
     98 CString& CString::append(const CString& t)
     99 {
    100     char* n;
    101     n = new char[m_length + t.m_length + 1];
    102     if (m_length)
    103         memcpy(n, m_data, m_length);
    104     if (t.m_length)
    105         memcpy(n + m_length, t.m_data, t.m_length);
    106     m_length += t.m_length;
    107     n[m_length] = 0;
    108 
    109     delete [] m_data;
    110     m_data = n;
    111 
    112     return *this;
    113 }
    114 
    115 CString& CString::operator=(const char* c)
    116 {
    117     if (m_data)
    118         delete [] m_data;
    119     m_length = strlen(c);
    120     m_data = new char[m_length + 1];
    121     memcpy(m_data, c, m_length + 1);
    122 
    123     return *this;
    124 }
    125 
    126 CString& CString::operator=(const CString& str)
    127 {
    128     if (this == &str)
    129         return *this;
    130 
    131     if (m_data)
    132         delete [] m_data;
    133     m_length = str.m_length;
    134     if (str.m_data) {
    135         m_data = new char[m_length + 1];
    136         memcpy(m_data, str.m_data, m_length + 1);
    137     } else
    138         m_data = 0;
    139 
    140     return *this;
    141 }
    142 
    143 bool operator==(const CString& c1, const CString& c2)
    144 {
    145     size_t len = c1.size();
    146     return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
    147 }
    148 
    149 // These static strings are immutable, except for rc, whose initial value is chosen to
    150 // reduce the possibility of it becoming zero due to ref/deref not being thread-safe.
    151 static UChar sharedEmptyChar;
    152 UStringImpl* UStringImpl::s_empty;
    153 
    154 UString::Rep* UString::s_nullRep;
    155 UString* UString::s_nullUString;
    156 
    157 void initializeUString()
    158 {
    159     UStringImpl::s_empty = new UStringImpl(&sharedEmptyChar, 0, UStringImpl::ConstructStaticString);
    160 
    161     UString::s_nullRep = new UStringImpl(0, 0, UStringImpl::ConstructStaticString);
    162     UString::s_nullUString = new UString;
    163 }
    164 
    165 UString::UString(const char* c)
    166     : m_rep(Rep::create(c))
    167 {
    168 }
    169 
    170 UString::UString(const char* c, int length)
    171     : m_rep(Rep::create(c, length))
    172 {
    173 }
    174 
    175 UString::UString(const UChar* c, int length)
    176 {
    177     if (length == 0)
    178         m_rep = &Rep::empty();
    179     else
    180         m_rep = Rep::create(c, length);
    181 }
    182 
    183 UString UString::from(int i)
    184 {
    185     UChar buf[1 + sizeof(i) * 3];
    186     UChar* end = buf + sizeof(buf) / sizeof(UChar);
    187     UChar* p = end;
    188 
    189     if (i == 0)
    190         *--p = '0';
    191     else if (i == INT_MIN) {
    192         char minBuf[1 + sizeof(i) * 3];
    193         sprintf(minBuf, "%d", INT_MIN);
    194         return UString(minBuf);
    195     } else {
    196         bool negative = false;
    197         if (i < 0) {
    198             negative = true;
    199             i = -i;
    200         }
    201         while (i) {
    202             *--p = static_cast<unsigned short>((i % 10) + '0');
    203             i /= 10;
    204         }
    205         if (negative)
    206             *--p = '-';
    207     }
    208 
    209     return UString(p, static_cast<int>(end - p));
    210 }
    211 
    212 UString UString::from(long long i)
    213 {
    214     UChar buf[1 + sizeof(i) * 3];
    215     UChar* end = buf + sizeof(buf) / sizeof(UChar);
    216     UChar* p = end;
    217 
    218     if (i == 0)
    219         *--p = '0';
    220     else if (i == std::numeric_limits<long long>::min()) {
    221         char minBuf[1 + sizeof(i) * 3];
    222 #if OS(WINDOWS)
    223         snprintf(minBuf, sizeof(minBuf) - 1, "%I64d", std::numeric_limits<long long>::min());
    224 #else
    225         snprintf(minBuf, sizeof(minBuf) - 1, "%lld", std::numeric_limits<long long>::min());
    226 #endif
    227         return UString(minBuf);
    228     } else {
    229         bool negative = false;
    230         if (i < 0) {
    231             negative = true;
    232             i = -i;
    233         }
    234         while (i) {
    235             *--p = static_cast<unsigned short>((i % 10) + '0');
    236             i /= 10;
    237         }
    238         if (negative)
    239             *--p = '-';
    240     }
    241 
    242     return UString(p, static_cast<int>(end - p));
    243 }
    244 
    245 UString UString::from(unsigned int u)
    246 {
    247     UChar buf[sizeof(u) * 3];
    248     UChar* end = buf + sizeof(buf) / sizeof(UChar);
    249     UChar* p = end;
    250 
    251     if (u == 0)
    252         *--p = '0';
    253     else {
    254         while (u) {
    255             *--p = static_cast<unsigned short>((u % 10) + '0');
    256             u /= 10;
    257         }
    258     }
    259 
    260     return UString(p, static_cast<int>(end - p));
    261 }
    262 
    263 UString UString::from(long l)
    264 {
    265     UChar buf[1 + sizeof(l) * 3];
    266     UChar* end = buf + sizeof(buf) / sizeof(UChar);
    267     UChar* p = end;
    268 
    269     if (l == 0)
    270         *--p = '0';
    271     else if (l == LONG_MIN) {
    272         char minBuf[1 + sizeof(l) * 3];
    273         sprintf(minBuf, "%ld", LONG_MIN);
    274         return UString(minBuf);
    275     } else {
    276         bool negative = false;
    277         if (l < 0) {
    278             negative = true;
    279             l = -l;
    280         }
    281         while (l) {
    282             *--p = static_cast<unsigned short>((l % 10) + '0');
    283             l /= 10;
    284         }
    285         if (negative)
    286             *--p = '-';
    287     }
    288 
    289     return UString(p, static_cast<int>(end - p));
    290 }
    291 
    292 UString UString::from(double d)
    293 {
    294     DtoaBuffer buffer;
    295     unsigned length;
    296     doubleToStringInJavaScriptFormat(d, buffer, &length);
    297     return UString(buffer, length);
    298 }
    299 
    300 bool UString::getCString(CStringBuffer& buffer) const
    301 {
    302     int length = size();
    303     int neededSize = length + 1;
    304     buffer.resize(neededSize);
    305     char* buf = buffer.data();
    306 
    307     UChar ored = 0;
    308     const UChar* p = data();
    309     char* q = buf;
    310     const UChar* limit = p + length;
    311     while (p != limit) {
    312         UChar c = p[0];
    313         ored |= c;
    314         *q = static_cast<char>(c);
    315         ++p;
    316         ++q;
    317     }
    318     *q = '\0';
    319 
    320     return !(ored & 0xFF00);
    321 }
    322 
    323 char* UString::ascii() const
    324 {
    325     static char* asciiBuffer = 0;
    326 
    327     int length = size();
    328     int neededSize = length + 1;
    329     delete[] asciiBuffer;
    330     asciiBuffer = new char[neededSize];
    331 
    332     const UChar* p = data();
    333     char* q = asciiBuffer;
    334     const UChar* limit = p + length;
    335     while (p != limit) {
    336         *q = static_cast<char>(p[0]);
    337         ++p;
    338         ++q;
    339     }
    340     *q = '\0';
    341 
    342     return asciiBuffer;
    343 }
    344 
    345 bool UString::is8Bit() const
    346 {
    347     const UChar* u = data();
    348     const UChar* limit = u + size();
    349     while (u < limit) {
    350         if (u[0] > 0xFF)
    351             return false;
    352         ++u;
    353     }
    354 
    355     return true;
    356 }
    357 
    358 UChar UString::operator[](int pos) const
    359 {
    360     if (pos >= size())
    361         return '\0';
    362     return data()[pos];
    363 }
    364 
    365 double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
    366 {
    367     if (size() == 1) {
    368         UChar c = data()[0];
    369         if (isASCIIDigit(c))
    370             return c - '0';
    371         if (isASCIISpace(c) && tolerateEmptyString)
    372             return 0;
    373         return NaN;
    374     }
    375 
    376     // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
    377     // after the number, so this is too strict a check.
    378     CStringBuffer s;
    379     if (!getCString(s))
    380         return NaN;
    381     const char* c = s.data();
    382 
    383     // skip leading white space
    384     while (isASCIISpace(*c))
    385         c++;
    386 
    387     // empty string ?
    388     if (*c == '\0')
    389         return tolerateEmptyString ? 0.0 : NaN;
    390 
    391     double d;
    392 
    393     // hex number ?
    394     if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
    395         const char* firstDigitPosition = c + 2;
    396         c++;
    397         d = 0.0;
    398         while (*(++c)) {
    399             if (*c >= '0' && *c <= '9')
    400                 d = d * 16.0 + *c - '0';
    401             else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
    402                 d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
    403             else
    404                 break;
    405         }
    406 
    407         if (d >= mantissaOverflowLowerBound)
    408             d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
    409     } else {
    410         // regular number ?
    411         char* end;
    412         d = WTF::strtod(c, &end);
    413         if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
    414             c = end;
    415         } else {
    416             double sign = 1.0;
    417 
    418             if (*c == '+')
    419                 c++;
    420             else if (*c == '-') {
    421                 sign = -1.0;
    422                 c++;
    423             }
    424 
    425             // We used strtod() to do the conversion. However, strtod() handles
    426             // infinite values slightly differently than JavaScript in that it
    427             // converts the string "inf" with any capitalization to infinity,
    428             // whereas the ECMA spec requires that it be converted to NaN.
    429 
    430             if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
    431                 d = sign * Inf;
    432                 c += 8;
    433             } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
    434                 c = end;
    435             else
    436                 return NaN;
    437         }
    438     }
    439 
    440     // allow trailing white space
    441     while (isASCIISpace(*c))
    442         c++;
    443     // don't allow anything after - unless tolerant=true
    444     if (!tolerateTrailingJunk && *c != '\0')
    445         d = NaN;
    446 
    447     return d;
    448 }
    449 
    450 double UString::toDouble(bool tolerateTrailingJunk) const
    451 {
    452     return toDouble(tolerateTrailingJunk, true);
    453 }
    454 
    455 double UString::toDouble() const
    456 {
    457     return toDouble(false, true);
    458 }
    459 
    460 uint32_t UString::toUInt32(bool* ok) const
    461 {
    462     double d = toDouble();
    463     bool b = true;
    464 
    465     if (d != static_cast<uint32_t>(d)) {
    466         b = false;
    467         d = 0;
    468     }
    469 
    470     if (ok)
    471         *ok = b;
    472 
    473     return static_cast<uint32_t>(d);
    474 }
    475 
    476 uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
    477 {
    478     double d = toDouble(false, tolerateEmptyString);
    479     bool b = true;
    480 
    481     if (d != static_cast<uint32_t>(d)) {
    482         b = false;
    483         d = 0;
    484     }
    485 
    486     if (ok)
    487         *ok = b;
    488 
    489     return static_cast<uint32_t>(d);
    490 }
    491 
    492 uint32_t UString::toStrictUInt32(bool* ok) const
    493 {
    494     if (ok)
    495         *ok = false;
    496 
    497     // Empty string is not OK.
    498     int len = m_rep->size();
    499     if (len == 0)
    500         return 0;
    501     const UChar* p = m_rep->data();
    502     unsigned short c = p[0];
    503 
    504     // If the first digit is 0, only 0 itself is OK.
    505     if (c == '0') {
    506         if (len == 1 && ok)
    507             *ok = true;
    508         return 0;
    509     }
    510 
    511     // Convert to UInt32, checking for overflow.
    512     uint32_t i = 0;
    513     while (1) {
    514         // Process character, turning it into a digit.
    515         if (c < '0' || c > '9')
    516             return 0;
    517         const unsigned d = c - '0';
    518 
    519         // Multiply by 10, checking for overflow out of 32 bits.
    520         if (i > 0xFFFFFFFFU / 10)
    521             return 0;
    522         i *= 10;
    523 
    524         // Add in the digit, checking for overflow out of 32 bits.
    525         const unsigned max = 0xFFFFFFFFU - d;
    526         if (i > max)
    527             return 0;
    528         i += d;
    529 
    530         // Handle end of string.
    531         if (--len == 0) {
    532             if (ok)
    533                 *ok = true;
    534             return i;
    535         }
    536 
    537         // Get next character.
    538         c = *(++p);
    539     }
    540 }
    541 
    542 int UString::find(const UString& f, int pos) const
    543 {
    544     int fsz = f.size();
    545 
    546     if (pos < 0)
    547         pos = 0;
    548 
    549     if (fsz == 1) {
    550         UChar ch = f[0];
    551         const UChar* end = data() + size();
    552         for (const UChar* c = data() + pos; c < end; c++) {
    553             if (*c == ch)
    554                 return static_cast<int>(c - data());
    555         }
    556         return -1;
    557     }
    558 
    559     int sz = size();
    560     if (sz < fsz)
    561         return -1;
    562     if (fsz == 0)
    563         return pos;
    564     const UChar* end = data() + sz - fsz;
    565     int fsizeminusone = (fsz - 1) * sizeof(UChar);
    566     const UChar* fdata = f.data();
    567     unsigned short fchar = fdata[0];
    568     ++fdata;
    569     for (const UChar* c = data() + pos; c <= end; c++) {
    570         if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
    571             return static_cast<int>(c - data());
    572     }
    573 
    574     return -1;
    575 }
    576 
    577 int UString::find(UChar ch, int pos) const
    578 {
    579     if (pos < 0)
    580         pos = 0;
    581     const UChar* end = data() + size();
    582     for (const UChar* c = data() + pos; c < end; c++) {
    583         if (*c == ch)
    584             return static_cast<int>(c - data());
    585     }
    586 
    587     return -1;
    588 }
    589 
    590 int UString::rfind(const UString& f, int pos) const
    591 {
    592     int sz = size();
    593     int fsz = f.size();
    594     if (sz < fsz)
    595         return -1;
    596     if (pos < 0)
    597         pos = 0;
    598     if (pos > sz - fsz)
    599         pos = sz - fsz;
    600     if (fsz == 0)
    601         return pos;
    602     int fsizeminusone = (fsz - 1) * sizeof(UChar);
    603     const UChar* fdata = f.data();
    604     for (const UChar* c = data() + pos; c >= data(); c--) {
    605         if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
    606             return static_cast<int>(c - data());
    607     }
    608 
    609     return -1;
    610 }
    611 
    612 int UString::rfind(UChar ch, int pos) const
    613 {
    614     if (isEmpty())
    615         return -1;
    616     if (pos + 1 >= size())
    617         pos = size() - 1;
    618     for (const UChar* c = data() + pos; c >= data(); c--) {
    619         if (*c == ch)
    620             return static_cast<int>(c - data());
    621     }
    622 
    623     return -1;
    624 }
    625 
    626 UString UString::substr(int pos, int len) const
    627 {
    628     int s = size();
    629 
    630     if (pos < 0)
    631         pos = 0;
    632     else if (pos >= s)
    633         pos = s;
    634     if (len < 0)
    635         len = s;
    636     if (pos + len >= s)
    637         len = s - pos;
    638 
    639     if (pos == 0 && len == s)
    640         return *this;
    641 
    642     return UString(Rep::create(m_rep, pos, len));
    643 }
    644 
    645 bool operator==(const UString& s1, const char *s2)
    646 {
    647     if (s2 == 0)
    648         return s1.isEmpty();
    649 
    650     const UChar* u = s1.data();
    651     const UChar* uend = u + s1.size();
    652     while (u != uend && *s2) {
    653         if (u[0] != (unsigned char)*s2)
    654             return false;
    655         s2++;
    656         u++;
    657     }
    658 
    659     return u == uend && *s2 == 0;
    660 }
    661 
    662 bool operator<(const UString& s1, const UString& s2)
    663 {
    664     const int l1 = s1.size();
    665     const int l2 = s2.size();
    666     const int lmin = l1 < l2 ? l1 : l2;
    667     const UChar* c1 = s1.data();
    668     const UChar* c2 = s2.data();
    669     int l = 0;
    670     while (l < lmin && *c1 == *c2) {
    671         c1++;
    672         c2++;
    673         l++;
    674     }
    675     if (l < lmin)
    676         return (c1[0] < c2[0]);
    677 
    678     return (l1 < l2);
    679 }
    680 
    681 bool operator>(const UString& s1, const UString& s2)
    682 {
    683     const int l1 = s1.size();
    684     const int l2 = s2.size();
    685     const int lmin = l1 < l2 ? l1 : l2;
    686     const UChar* c1 = s1.data();
    687     const UChar* c2 = s2.data();
    688     int l = 0;
    689     while (l < lmin && *c1 == *c2) {
    690         c1++;
    691         c2++;
    692         l++;
    693     }
    694     if (l < lmin)
    695         return (c1[0] > c2[0]);
    696 
    697     return (l1 > l2);
    698 }
    699 
    700 int compare(const UString& s1, const UString& s2)
    701 {
    702     const int l1 = s1.size();
    703     const int l2 = s2.size();
    704     const int lmin = l1 < l2 ? l1 : l2;
    705     const UChar* c1 = s1.data();
    706     const UChar* c2 = s2.data();
    707     int l = 0;
    708     while (l < lmin && *c1 == *c2) {
    709         c1++;
    710         c2++;
    711         l++;
    712     }
    713 
    714     if (l < lmin)
    715         return (c1[0] > c2[0]) ? 1 : -1;
    716 
    717     if (l1 == l2)
    718         return 0;
    719 
    720     return (l1 > l2) ? 1 : -1;
    721 }
    722 
    723 bool equal(const UString::Rep* r, const UString::Rep* b)
    724 {
    725     int length = r->size();
    726     if (length != b->size())
    727         return false;
    728     const UChar* d = r->data();
    729     const UChar* s = b->data();
    730     for (int i = 0; i != length; ++i) {
    731         if (d[i] != s[i])
    732             return false;
    733     }
    734     return true;
    735 }
    736 
    737 CString UString::UTF8String(bool strict) const
    738 {
    739     // Allocate a buffer big enough to hold all the characters.
    740     const int length = size();
    741     Vector<char, 1024> buffer(length * 3);
    742 
    743     // Convert to runs of 8-bit characters.
    744     char* p = buffer.data();
    745     const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
    746     ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
    747     if (result != conversionOK)
    748         return CString();
    749 
    750     return CString(buffer.data(), p - buffer.data());
    751 }
    752 
    753 } // namespace JSC
    754