Home | History | Annotate | Download | only in runtime
      1 /*
      2  *  Copyright (C) 1999-2000 Harri Porten (porten (at) kde.org)
      3  *  Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
      4  *  Copyright (C) 2007 Cameron Zwarich (cwzwarich (at) uwaterloo.ca)
      5  *  Copyright (C) 2009 Google Inc. All rights reserved.
      6  *
      7  *  This library is free software; you can redistribute it and/or
      8  *  modify it under the terms of the GNU Library General Public
      9  *  License as published by the Free Software Foundation; either
     10  *  version 2 of the License, or (at your option) any later version.
     11  *
     12  *  This library is distributed in the hope that it will be useful,
     13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
     14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     15  *  Library General Public License for more details.
     16  *
     17  *  You should have received a copy of the GNU Library General Public License
     18  *  along with this library; see the file COPYING.LIB.  If not, write to
     19  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     20  *  Boston, MA 02110-1301, USA.
     21  *
     22  */
     23 
     24 #include "config.h"
     25 #include "UString.h"
     26 
     27 #include "JSGlobalObjectFunctions.h"
     28 #include "Heap.h"
     29 #include "Identifier.h"
     30 #include "Operations.h"
     31 #include <ctype.h>
     32 #include <limits.h>
     33 #include <limits>
     34 #include <stdio.h>
     35 #include <stdlib.h>
     36 #include <wtf/ASCIICType.h>
     37 #include <wtf/Assertions.h>
     38 #include <wtf/DecimalNumber.h>
     39 #include <wtf/MathExtras.h>
     40 #include <wtf/StringExtras.h>
     41 #include <wtf/Vector.h>
     42 #include <wtf/unicode/UTF8.h>
     43 
     44 #if HAVE(STRINGS_H)
     45 #include <strings.h>
     46 #endif
     47 
     48 using namespace WTF;
     49 using namespace WTF::Unicode;
     50 using namespace std;
     51 
     52 namespace JSC {
     53 
     54 extern const double NaN;
     55 extern const double Inf;
     56 
     57 COMPILE_ASSERT(sizeof(UString) == sizeof(void*), UString_should_stay_small);
     58 
     59 // Construct a string with UTF-16 data.
     60 UString::UString(const UChar* characters, unsigned length)
     61     : m_impl(characters ? StringImpl::create(characters, length) : 0)
     62 {
     63 }
     64 
     65 // Construct a string with UTF-16 data, from a null-terminated source.
     66 UString::UString(const UChar* characters)
     67 {
     68     if (!characters)
     69         return;
     70 
     71     int length = 0;
     72     while (characters[length] != UChar(0))
     73         ++length;
     74 
     75     m_impl = StringImpl::create(characters, length);
     76 }
     77 
     78 // Construct a string with latin1 data.
     79 UString::UString(const char* characters, unsigned length)
     80     : m_impl(characters ? StringImpl::create(characters, length) : 0)
     81 {
     82 }
     83 
     84 // Construct a string with latin1 data, from a null-terminated source.
     85 UString::UString(const char* characters)
     86     : m_impl(characters ? StringImpl::create(characters) : 0)
     87 {
     88 }
     89 
     90 UString UString::number(int i)
     91 {
     92     UChar buf[1 + sizeof(i) * 3];
     93     UChar* end = buf + WTF_ARRAY_LENGTH(buf);
     94     UChar* p = end;
     95 
     96     if (i == 0)
     97         *--p = '0';
     98     else if (i == INT_MIN) {
     99         char minBuf[1 + sizeof(i) * 3];
    100         snprintf(minBuf, sizeof(minBuf), "%d", INT_MIN);
    101         return UString(minBuf);
    102     } else {
    103         bool negative = false;
    104         if (i < 0) {
    105             negative = true;
    106             i = -i;
    107         }
    108         while (i) {
    109             *--p = static_cast<unsigned short>((i % 10) + '0');
    110             i /= 10;
    111         }
    112         if (negative)
    113             *--p = '-';
    114     }
    115 
    116     return UString(p, static_cast<unsigned>(end - p));
    117 }
    118 
    119 UString UString::number(long long i)
    120 {
    121     UChar buf[1 + sizeof(i) * 3];
    122     UChar* end = buf + WTF_ARRAY_LENGTH(buf);
    123     UChar* p = end;
    124 
    125     if (i == 0)
    126         *--p = '0';
    127     else if (i == std::numeric_limits<long long>::min()) {
    128         char minBuf[1 + sizeof(i) * 3];
    129 #if OS(WINDOWS)
    130         snprintf(minBuf, sizeof(minBuf), "%I64d", std::numeric_limits<long long>::min());
    131 #else
    132         snprintf(minBuf, sizeof(minBuf), "%lld", std::numeric_limits<long long>::min());
    133 #endif
    134         return UString(minBuf);
    135     } else {
    136         bool negative = false;
    137         if (i < 0) {
    138             negative = true;
    139             i = -i;
    140         }
    141         while (i) {
    142             *--p = static_cast<unsigned short>((i % 10) + '0');
    143             i /= 10;
    144         }
    145         if (negative)
    146             *--p = '-';
    147     }
    148 
    149     return UString(p, static_cast<unsigned>(end - p));
    150 }
    151 
    152 UString UString::number(unsigned u)
    153 {
    154     UChar buf[sizeof(u) * 3];
    155     UChar* end = buf + WTF_ARRAY_LENGTH(buf);
    156     UChar* p = end;
    157 
    158     if (u == 0)
    159         *--p = '0';
    160     else {
    161         while (u) {
    162             *--p = static_cast<unsigned short>((u % 10) + '0');
    163             u /= 10;
    164         }
    165     }
    166 
    167     return UString(p, static_cast<unsigned>(end - p));
    168 }
    169 
    170 UString UString::number(long l)
    171 {
    172     UChar buf[1 + sizeof(l) * 3];
    173     UChar* end = buf + WTF_ARRAY_LENGTH(buf);
    174     UChar* p = end;
    175 
    176     if (l == 0)
    177         *--p = '0';
    178     else if (l == LONG_MIN) {
    179         char minBuf[1 + sizeof(l) * 3];
    180         snprintf(minBuf, sizeof(minBuf), "%ld", LONG_MIN);
    181         return UString(minBuf);
    182     } else {
    183         bool negative = false;
    184         if (l < 0) {
    185             negative = true;
    186             l = -l;
    187         }
    188         while (l) {
    189             *--p = static_cast<unsigned short>((l % 10) + '0');
    190             l /= 10;
    191         }
    192         if (negative)
    193             *--p = '-';
    194     }
    195 
    196     return UString(p, end - p);
    197 }
    198 
    199 UString UString::number(double d)
    200 {
    201     NumberToStringBuffer buffer;
    202     unsigned length = numberToString(d, buffer);
    203     return UString(buffer, length);
    204 }
    205 
    206 UString UString::substringSharingImpl(unsigned offset, unsigned length) const
    207 {
    208     // FIXME: We used to check against a limit of Heap::minExtraCost / sizeof(UChar).
    209 
    210     unsigned stringLength = this->length();
    211     offset = min(offset, stringLength);
    212     length = min(length, stringLength - offset);
    213 
    214     if (!offset && length == stringLength)
    215         return *this;
    216     return UString(StringImpl::create(m_impl, offset, length));
    217 }
    218 
    219 bool operator==(const UString& s1, const char *s2)
    220 {
    221     if (s2 == 0)
    222         return s1.isEmpty();
    223 
    224     const UChar* u = s1.characters();
    225     const UChar* uend = u + s1.length();
    226     while (u != uend && *s2) {
    227         if (u[0] != (unsigned char)*s2)
    228             return false;
    229         s2++;
    230         u++;
    231     }
    232 
    233     return u == uend && *s2 == 0;
    234 }
    235 
    236 bool operator<(const UString& s1, const UString& s2)
    237 {
    238     const unsigned l1 = s1.length();
    239     const unsigned l2 = s2.length();
    240     const unsigned lmin = l1 < l2 ? l1 : l2;
    241     const UChar* c1 = s1.characters();
    242     const UChar* c2 = s2.characters();
    243     unsigned l = 0;
    244     while (l < lmin && *c1 == *c2) {
    245         c1++;
    246         c2++;
    247         l++;
    248     }
    249     if (l < lmin)
    250         return (c1[0] < c2[0]);
    251 
    252     return (l1 < l2);
    253 }
    254 
    255 bool operator>(const UString& s1, const UString& s2)
    256 {
    257     const unsigned l1 = s1.length();
    258     const unsigned l2 = s2.length();
    259     const unsigned lmin = l1 < l2 ? l1 : l2;
    260     const UChar* c1 = s1.characters();
    261     const UChar* c2 = s2.characters();
    262     unsigned l = 0;
    263     while (l < lmin && *c1 == *c2) {
    264         c1++;
    265         c2++;
    266         l++;
    267     }
    268     if (l < lmin)
    269         return (c1[0] > c2[0]);
    270 
    271     return (l1 > l2);
    272 }
    273 
    274 CString UString::ascii() const
    275 {
    276     // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
    277     // preserved, characters outside of this range are converted to '?'.
    278 
    279     unsigned length = this->length();
    280     const UChar* characters = this->characters();
    281 
    282     char* characterBuffer;
    283     CString result = CString::newUninitialized(length, characterBuffer);
    284 
    285     for (unsigned i = 0; i < length; ++i) {
    286         UChar ch = characters[i];
    287         characterBuffer[i] = ch && (ch < 0x20 || ch >= 0x7f) ? '?' : ch;
    288     }
    289 
    290     return result;
    291 }
    292 
    293 CString UString::latin1() const
    294 {
    295     // Basic Latin1 (ISO) encoding - Unicode characters 0..255 are
    296     // preserved, characters outside of this range are converted to '?'.
    297 
    298     unsigned length = this->length();
    299     const UChar* characters = this->characters();
    300 
    301     char* characterBuffer;
    302     CString result = CString::newUninitialized(length, characterBuffer);
    303 
    304     for (unsigned i = 0; i < length; ++i) {
    305         UChar ch = characters[i];
    306         characterBuffer[i] = ch > 0xff ? '?' : ch;
    307     }
    308 
    309     return result;
    310 }
    311 
    312 // Helper to write a three-byte UTF-8 code point to the buffer, caller must check room is available.
    313 static inline void putUTF8Triple(char*& buffer, UChar ch)
    314 {
    315     ASSERT(ch >= 0x0800);
    316     *buffer++ = static_cast<char>(((ch >> 12) & 0x0F) | 0xE0);
    317     *buffer++ = static_cast<char>(((ch >> 6) & 0x3F) | 0x80);
    318     *buffer++ = static_cast<char>((ch & 0x3F) | 0x80);
    319 }
    320 
    321 CString UString::utf8(bool strict) const
    322 {
    323     unsigned length = this->length();
    324     const UChar* characters = this->characters();
    325 
    326     // Allocate a buffer big enough to hold all the characters
    327     // (an individual UTF-16 UChar can only expand to 3 UTF-8 bytes).
    328     // Optimization ideas, if we find this function is hot:
    329     //  * We could speculatively create a CStringBuffer to contain 'length'
    330     //    characters, and resize if necessary (i.e. if the buffer contains
    331     //    non-ascii characters). (Alternatively, scan the buffer first for
    332     //    ascii characters, so we know this will be sufficient).
    333     //  * We could allocate a CStringBuffer with an appropriate size to
    334     //    have a good chance of being able to write the string into the
    335     //    buffer without reallocing (say, 1.5 x length).
    336     if (length > numeric_limits<unsigned>::max() / 3)
    337         return CString();
    338     Vector<char, 1024> bufferVector(length * 3);
    339 
    340     char* buffer = bufferVector.data();
    341     ConversionResult result = convertUTF16ToUTF8(&characters, characters + length, &buffer, buffer + bufferVector.size(), strict);
    342     ASSERT(result != targetExhausted); // (length * 3) should be sufficient for any conversion
    343 
    344     // Only produced from strict conversion.
    345     if (result == sourceIllegal)
    346         return CString();
    347 
    348     // Check for an unconverted high surrogate.
    349     if (result == sourceExhausted) {
    350         if (strict)
    351             return CString();
    352         // This should be one unpaired high surrogate. Treat it the same
    353         // was as an unpaired high surrogate would have been handled in
    354         // the middle of a string with non-strict conversion - which is
    355         // to say, simply encode it to UTF-8.
    356         ASSERT((characters + 1) == (this->characters() + length));
    357         ASSERT((*characters >= 0xD800) && (*characters <= 0xDBFF));
    358         // There should be room left, since one UChar hasn't been converted.
    359         ASSERT((buffer + 3) <= (buffer + bufferVector.size()));
    360         putUTF8Triple(buffer, *characters);
    361     }
    362 
    363     return CString(bufferVector.data(), buffer - bufferVector.data());
    364 }
    365 
    366 } // namespace JSC
    367