Home | History | Annotate | Download | only in text
      1 /*
      2  * (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
      4  *
      5  * This library is free software; you can redistribute it and/or
      6  * modify it under the terms of the GNU Library General Public
      7  * License as published by the Free Software Foundation; either
      8  * version 2 of the License, or (at your option) any later version.
      9  *
     10  * This library is distributed in the hope that it will be useful,
     11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  * Library General Public License for more details.
     14  *
     15  * You should have received a copy of the GNU Library General Public License
     16  * along with this library; see the file COPYING.LIB.  If not, write to
     17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18  * Boston, MA 02110-1301, USA.
     19  *
     20  */
     21 
     22 #ifndef WTFString_h
     23 #define WTFString_h
     24 
     25 // This file would be called String.h, but that conflicts with <string.h>
     26 // on systems without case-sensitive file systems.
     27 
     28 #include "StringImpl.h"
     29 
     30 #ifdef __OBJC__
     31 #include <objc/objc.h>
     32 #endif
     33 
     34 #if USE(CF)
     35 typedef const struct __CFString * CFStringRef;
     36 #endif
     37 
     38 #if PLATFORM(QT)
     39 QT_BEGIN_NAMESPACE
     40 class QString;
     41 QT_END_NAMESPACE
     42 #include <QDataStream>
     43 #endif
     44 
     45 #if PLATFORM(WX)
     46 class wxString;
     47 #endif
     48 
     49 #if PLATFORM(HAIKU)
     50 class BString;
     51 #endif
     52 
     53 #if PLATFORM(BREWMP)
     54 // AECHAR is defined in AEEStdDef.h, but don't include it here to avoid conflicts.
     55 #ifndef _AECHAR_DEFINED
     56 typedef uint16             AECHAR;
     57 #define _AECHAR_DEFINED
     58 #endif
     59 #endif
     60 
     61 namespace WTF {
     62 
     63 class CString;
     64 struct StringHash;
     65 
     66 // Declarations of string operations
     67 
     68 bool charactersAreAllASCII(const UChar*, size_t);
     69 bool charactersAreAllLatin1(const UChar*, size_t);
     70 int charactersToIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
     71 unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
     72 int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
     73 uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
     74 intptr_t charactersToIntPtrStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
     75 
     76 int charactersToInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
     77 unsigned charactersToUInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
     78 int64_t charactersToInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
     79 uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
     80 intptr_t charactersToIntPtr(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
     81 
     82 double charactersToDouble(const UChar*, size_t, bool* ok = 0, bool* didReadNumber = 0);
     83 float charactersToFloat(const UChar*, size_t, bool* ok = 0, bool* didReadNumber = 0);
     84 
     85 template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters(const UChar*, size_t);
     86 
     87 class String {
     88 public:
     89     // Construct a null string, distinguishable from an empty string.
     90     String() { }
     91 
     92     // Construct a string with UTF-16 data.
     93     String(const UChar* characters, unsigned length);
     94 
     95     // Construct a string by copying the contents of a vector.  To avoid
     96     // copying, consider using String::adopt instead.
     97     template<size_t inlineCapacity>
     98     explicit String(const Vector<UChar, inlineCapacity>&);
     99 
    100     // Construct a string with UTF-16 data, from a null-terminated source.
    101     String(const UChar*);
    102 
    103     // Construct a string with latin1 data.
    104     String(const char* characters, unsigned length);
    105 
    106     // Construct a string with latin1 data, from a null-terminated source.
    107     String(const char* characters);
    108 
    109     // Construct a string referencing an existing StringImpl.
    110     String(StringImpl* impl) : m_impl(impl) { }
    111     String(PassRefPtr<StringImpl> impl) : m_impl(impl) { }
    112     String(RefPtr<StringImpl> impl) : m_impl(impl) { }
    113 
    114     // Inline the destructor.
    115     ALWAYS_INLINE ~String() { }
    116 
    117     void swap(String& o) { m_impl.swap(o.m_impl); }
    118 
    119     static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); }
    120     template<size_t inlineCapacity>
    121     static String adopt(Vector<UChar, inlineCapacity>& vector) { return StringImpl::adopt(vector); }
    122 
    123     bool isNull() const { return !m_impl; }
    124     bool isEmpty() const { return !m_impl || !m_impl->length(); }
    125 
    126     StringImpl* impl() const { return m_impl.get(); }
    127 
    128     unsigned length() const
    129     {
    130         if (!m_impl)
    131             return 0;
    132         return m_impl->length();
    133     }
    134 
    135     const UChar* characters() const
    136     {
    137         if (!m_impl)
    138             return 0;
    139         return m_impl->characters();
    140     }
    141 
    142     CString ascii() const;
    143     CString latin1() const;
    144     CString utf8(bool strict = false) const;
    145 
    146     UChar operator[](unsigned index) const
    147     {
    148         if (!m_impl || index >= m_impl->length())
    149             return 0;
    150         return m_impl->characters()[index];
    151     }
    152 
    153     static String number(short);
    154     static String number(unsigned short);
    155     static String number(int);
    156     static String number(unsigned);
    157     static String number(long);
    158     static String number(unsigned long);
    159     static String number(long long);
    160     static String number(unsigned long long);
    161     static String number(double);
    162 
    163     // Find a single character or string, also with match function & latin1 forms.
    164     size_t find(UChar c, unsigned start = 0) const
    165         { return m_impl ? m_impl->find(c, start) : notFound; }
    166     size_t find(const String& str, unsigned start = 0) const
    167         { return m_impl ? m_impl->find(str.impl(), start) : notFound; }
    168     size_t find(CharacterMatchFunctionPtr matchFunction, unsigned start = 0) const
    169         { return m_impl ? m_impl->find(matchFunction, start) : notFound; }
    170     size_t find(const char* str, unsigned start = 0) const
    171         { return m_impl ? m_impl->find(str, start) : notFound; }
    172 
    173     // Find the last instance of a single character or string.
    174     size_t reverseFind(UChar c, unsigned start = UINT_MAX) const
    175         { return m_impl ? m_impl->reverseFind(c, start) : notFound; }
    176     size_t reverseFind(const String& str, unsigned start = UINT_MAX) const
    177         { return m_impl ? m_impl->reverseFind(str.impl(), start) : notFound; }
    178 
    179     // Case insensitive string matching.
    180     size_t findIgnoringCase(const char* str, unsigned start = 0) const
    181         { return m_impl ? m_impl->findIgnoringCase(str, start) : notFound; }
    182     size_t findIgnoringCase(const String& str, unsigned start = 0) const
    183         { return m_impl ? m_impl->findIgnoringCase(str.impl(), start) : notFound; }
    184     size_t reverseFindIgnoringCase(const String& str, unsigned start = UINT_MAX) const
    185         { return m_impl ? m_impl->reverseFindIgnoringCase(str.impl(), start) : notFound; }
    186 
    187     // Wrappers for find & reverseFind adding dynamic sensitivity check.
    188     size_t find(const char* str, unsigned start, bool caseSensitive) const
    189         { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); }
    190     size_t find(const String& str, unsigned start, bool caseSensitive) const
    191         { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); }
    192     size_t reverseFind(const String& str, unsigned start, bool caseSensitive) const
    193         { return caseSensitive ? reverseFind(str, start) : reverseFindIgnoringCase(str, start); }
    194 
    195     const UChar* charactersWithNullTermination();
    196 
    197     UChar32 characterStartingAt(unsigned) const; // Ditto.
    198 
    199     bool contains(UChar c) const { return find(c) != notFound; }
    200     bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; }
    201     bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; }
    202 
    203     bool startsWith(const String& s, bool caseSensitive = true) const
    204         { return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); }
    205     bool endsWith(const String& s, bool caseSensitive = true) const
    206         { return m_impl ? m_impl->endsWith(s.impl(), caseSensitive) : s.isEmpty(); }
    207 
    208     void append(const String&);
    209     void append(char);
    210     void append(UChar);
    211     void append(const UChar*, unsigned length);
    212     void insert(const String&, unsigned pos);
    213     void insert(const UChar*, unsigned length, unsigned pos);
    214 
    215     String& replace(UChar a, UChar b) { if (m_impl) m_impl = m_impl->replace(a, b); return *this; }
    216     String& replace(UChar a, const String& b) { if (m_impl) m_impl = m_impl->replace(a, b.impl()); return *this; }
    217     String& replace(const String& a, const String& b) { if (m_impl) m_impl = m_impl->replace(a.impl(), b.impl()); return *this; }
    218     String& replace(unsigned index, unsigned len, const String& b) { if (m_impl) m_impl = m_impl->replace(index, len, b.impl()); return *this; }
    219 
    220     void makeLower() { if (m_impl) m_impl = m_impl->lower(); }
    221     void makeUpper() { if (m_impl) m_impl = m_impl->upper(); }
    222     void makeSecure(UChar aChar) { if (m_impl) m_impl = m_impl->secure(aChar); }
    223 
    224     void truncate(unsigned len);
    225     void remove(unsigned pos, int len = 1);
    226 
    227     String substring(unsigned pos, unsigned len = UINT_MAX) const;
    228     String substringSharingImpl(unsigned pos, unsigned len = UINT_MAX) const;
    229     String left(unsigned len) const { return substring(0, len); }
    230     String right(unsigned len) const { return substring(length() - len, len); }
    231 
    232     // Returns a lowercase/uppercase version of the string
    233     String lower() const;
    234     String upper() const;
    235 
    236     String stripWhiteSpace() const;
    237     String simplifyWhiteSpace() const;
    238 
    239     String removeCharacters(CharacterMatchFunctionPtr) const;
    240     template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters() const;
    241 
    242     // Return the string with case folded for case insensitive comparison.
    243     String foldCase() const;
    244 
    245 #if !PLATFORM(QT)
    246     static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2);
    247 #else
    248     static String format(const char *, ...);
    249 #endif
    250 
    251     // Returns an uninitialized string. The characters needs to be written
    252     // into the buffer returned in data before the returned string is used.
    253     // Failure to do this will have unpredictable results.
    254     static String createUninitialized(unsigned length, UChar*& data) { return StringImpl::createUninitialized(length, data); }
    255 
    256     void split(const String& separator, Vector<String>& result) const;
    257     void split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const;
    258     void split(UChar separator, Vector<String>& result) const;
    259     void split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const;
    260 
    261     int toIntStrict(bool* ok = 0, int base = 10) const;
    262     unsigned toUIntStrict(bool* ok = 0, int base = 10) const;
    263     int64_t toInt64Strict(bool* ok = 0, int base = 10) const;
    264     uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const;
    265     intptr_t toIntPtrStrict(bool* ok = 0, int base = 10) const;
    266 
    267     int toInt(bool* ok = 0) const;
    268     unsigned toUInt(bool* ok = 0) const;
    269     int64_t toInt64(bool* ok = 0) const;
    270     uint64_t toUInt64(bool* ok = 0) const;
    271     intptr_t toIntPtr(bool* ok = 0) const;
    272     double toDouble(bool* ok = 0, bool* didReadNumber = 0) const;
    273     float toFloat(bool* ok = 0, bool* didReadNumber = 0) const;
    274 
    275     bool percentage(int& percentage) const;
    276 
    277     // Returns a StringImpl suitable for use on another thread.
    278     String crossThreadString() const;
    279     // Makes a deep copy. Helpful only if you need to use a String on another thread
    280     // (use crossThreadString if the method call doesn't need to be threadsafe).
    281     // Since the underlying StringImpl objects are immutable, there's no other reason
    282     // to ever prefer copy() over plain old assignment.
    283     String threadsafeCopy() const;
    284 
    285     // Prevent Strings from being implicitly convertable to bool as it will be ambiguous on any platform that
    286     // allows implicit conversion to another pointer type (e.g., Mac allows implicit conversion to NSString*).
    287     typedef struct ImplicitConversionFromWTFStringToBoolDisallowedA* (String::*UnspecifiedBoolTypeA);
    288     typedef struct ImplicitConversionFromWTFStringToBoolDisallowedB* (String::*UnspecifiedBoolTypeB);
    289     operator UnspecifiedBoolTypeA() const;
    290     operator UnspecifiedBoolTypeB() const;
    291 
    292 #if USE(CF)
    293     String(CFStringRef);
    294     CFStringRef createCFString() const;
    295 #endif
    296 
    297 #ifdef __OBJC__
    298     String(NSString*);
    299 
    300     // This conversion maps NULL to "", which loses the meaning of NULL, but we
    301     // need this mapping because AppKit crashes when passed nil NSStrings.
    302     operator NSString*() const { if (!m_impl) return @""; return *m_impl; }
    303 #endif
    304 
    305 #if PLATFORM(QT)
    306     String(const QString&);
    307     String(const QStringRef&);
    308     operator QString() const;
    309 #endif
    310 
    311 #if PLATFORM(WX)
    312     String(const wxString&);
    313     operator wxString() const;
    314 #endif
    315 
    316 #if PLATFORM(HAIKU)
    317     String(const BString&);
    318     operator BString() const;
    319 #endif
    320 
    321 #if PLATFORM(BREWMP)
    322     String(const AECHAR*);
    323 #endif
    324 
    325     // String::fromUTF8 will return a null string if
    326     // the input data contains invalid UTF-8 sequences.
    327     static String fromUTF8(const char*, size_t);
    328     static String fromUTF8(const char*);
    329 
    330     // Tries to convert the passed in string to UTF-8, but will fall back to Latin-1 if the string is not valid UTF-8.
    331     static String fromUTF8WithLatin1Fallback(const char*, size_t);
    332 
    333     // Determines the writing direction using the Unicode Bidi Algorithm rules P2 and P3.
    334     WTF::Unicode::Direction defaultWritingDirection(bool* hasStrongDirectionality = 0) const
    335     {
    336         if (m_impl)
    337             return m_impl->defaultWritingDirection(hasStrongDirectionality);
    338         if (hasStrongDirectionality)
    339             *hasStrongDirectionality = false;
    340         return WTF::Unicode::LeftToRight;
    341     }
    342 
    343     bool containsOnlyASCII() const { return charactersAreAllASCII(characters(), length()); }
    344     bool containsOnlyLatin1() const { return charactersAreAllLatin1(characters(), length()); }
    345 
    346     // Hash table deleted values, which are only constructed and never copied or destroyed.
    347     String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { }
    348     bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); }
    349 
    350 private:
    351     RefPtr<StringImpl> m_impl;
    352 };
    353 
    354 #if PLATFORM(QT)
    355 QDataStream& operator<<(QDataStream& stream, const String& str);
    356 QDataStream& operator>>(QDataStream& stream, String& str);
    357 #endif
    358 
    359 String operator+(const String&, const String&);
    360 String operator+(const String&, const char*);
    361 String operator+(const char*, const String&);
    362 
    363 inline String& operator+=(String& a, const String& b) { a.append(b); return a; }
    364 
    365 inline bool operator==(const String& a, const String& b) { return equal(a.impl(), b.impl()); }
    366 inline bool operator==(const String& a, const char* b) { return equal(a.impl(), b); }
    367 inline bool operator==(const char* a, const String& b) { return equal(a, b.impl()); }
    368 
    369 inline bool operator!=(const String& a, const String& b) { return !equal(a.impl(), b.impl()); }
    370 inline bool operator!=(const String& a, const char* b) { return !equal(a.impl(), b); }
    371 inline bool operator!=(const char* a, const String& b) { return !equal(a, b.impl()); }
    372 
    373 inline bool equalIgnoringCase(const String& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); }
    374 inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), b); }
    375 inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(a, b.impl()); }
    376 
    377 inline bool equalPossiblyIgnoringCase(const String& a, const String& b, bool ignoreCase)
    378 {
    379     return ignoreCase ? equalIgnoringCase(a, b) : (a == b);
    380 }
    381 
    382 inline bool equalIgnoringNullity(const String& a, const String& b) { return equalIgnoringNullity(a.impl(), b.impl()); }
    383 
    384 template<size_t inlineCapacity>
    385 inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, const String& b) { return equalIgnoringNullity(a, b.impl()); }
    386 
    387 inline bool operator!(const String& str) { return str.isNull(); }
    388 
    389 inline void swap(String& a, String& b) { a.swap(b); }
    390 
    391 // Definitions of string operations
    392 
    393 template<size_t inlineCapacity>
    394 String::String(const Vector<UChar, inlineCapacity>& vector)
    395     : m_impl(vector.size() ? StringImpl::create(vector.data(), vector.size()) : 0)
    396 {
    397 }
    398 
    399 #ifdef __OBJC__
    400 // This is for situations in WebKit where the long standing behavior has been
    401 // "nil if empty", so we try to maintain longstanding behavior for the sake of
    402 // entrenched clients
    403 inline NSString* nsStringNilIfEmpty(const String& str) {  return str.isEmpty() ? nil : (NSString*)str; }
    404 #endif
    405 
    406 inline bool charactersAreAllASCII(const UChar* characters, size_t length)
    407 {
    408     UChar ored = 0;
    409     for (size_t i = 0; i < length; ++i)
    410         ored |= characters[i];
    411     return !(ored & 0xFF80);
    412 }
    413 
    414 inline bool charactersAreAllLatin1(const UChar* characters, size_t length)
    415 {
    416     UChar ored = 0;
    417     for (size_t i = 0; i < length; ++i)
    418         ored |= characters[i];
    419     return !(ored & 0xFF00);
    420 }
    421 
    422 int codePointCompare(const String&, const String&);
    423 
    424 inline size_t find(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0)
    425 {
    426     while (index < length) {
    427         if (characters[index] == matchCharacter)
    428             return index;
    429         ++index;
    430     }
    431     return notFound;
    432 }
    433 
    434 inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0)
    435 {
    436     while (index < length) {
    437         if (matchFunction(characters[index]))
    438             return index;
    439         ++index;
    440     }
    441     return notFound;
    442 }
    443 
    444 inline size_t reverseFind(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX)
    445 {
    446     if (!length)
    447         return notFound;
    448     if (index >= length)
    449         index = length - 1;
    450     while (characters[index] != matchCharacter) {
    451         if (!index--)
    452             return notFound;
    453     }
    454     return index;
    455 }
    456 
    457 inline void append(Vector<UChar>& vector, const String& string)
    458 {
    459     vector.append(string.characters(), string.length());
    460 }
    461 
    462 inline void appendNumber(Vector<UChar>& vector, unsigned char number)
    463 {
    464     int numberLength = number > 99 ? 3 : (number > 9 ? 2 : 1);
    465     size_t vectorSize = vector.size();
    466     vector.grow(vectorSize + numberLength);
    467 
    468     switch (numberLength) {
    469     case 3:
    470         vector[vectorSize + 2] = number % 10 + '0';
    471         number /= 10;
    472 
    473     case 2:
    474         vector[vectorSize + 1] = number % 10 + '0';
    475         number /= 10;
    476 
    477     case 1:
    478         vector[vectorSize] = number % 10 + '0';
    479     }
    480 }
    481 
    482 template<bool isSpecialCharacter(UChar)> inline bool isAllSpecialCharacters(const UChar* characters, size_t length)
    483 {
    484     for (size_t i = 0; i < length; ++i) {
    485         if (!isSpecialCharacter(characters[i]))
    486             return false;
    487     }
    488     return true;
    489 }
    490 
    491 template<bool isSpecialCharacter(UChar)> inline bool String::isAllSpecialCharacters() const
    492 {
    493     return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters(), length());
    494 }
    495 
    496 // StringHash is the default hash for String
    497 template<typename T> struct DefaultHash;
    498 template<> struct DefaultHash<String> {
    499     typedef StringHash Hash;
    500 };
    501 
    502 template <> struct VectorTraits<String> : SimpleClassVectorTraits { };
    503 
    504 }
    505 
    506 using WTF::CString;
    507 using WTF::String;
    508 using WTF::append;
    509 using WTF::appendNumber;
    510 using WTF::charactersAreAllASCII;
    511 using WTF::charactersAreAllLatin1;
    512 using WTF::charactersToIntStrict;
    513 using WTF::charactersToUIntStrict;
    514 using WTF::charactersToInt64Strict;
    515 using WTF::charactersToUInt64Strict;
    516 using WTF::charactersToIntPtrStrict;
    517 using WTF::charactersToInt;
    518 using WTF::charactersToUInt;
    519 using WTF::charactersToInt64;
    520 using WTF::charactersToUInt64;
    521 using WTF::charactersToIntPtr;
    522 using WTF::charactersToDouble;
    523 using WTF::charactersToFloat;
    524 using WTF::equal;
    525 using WTF::equalIgnoringCase;
    526 using WTF::find;
    527 using WTF::isAllSpecialCharacters;
    528 using WTF::isSpaceOrNewline;
    529 using WTF::reverseFind;
    530 
    531 #endif
    532