Home | History | Annotate | Download | only in text
      1 /*
      2  * (C) 1999 Lars Knoll (knoll (at) kde.org)
      3  * Copyright (C) 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved.
      4  *
      5  * This library is free software; you can redistribute it and/or
      6  * modify it under the terms of the GNU Library General Public
      7  * License as published by the Free Software Foundation; either
      8  * version 2 of the License, or (at your option) any later version.
      9  *
     10  * This library is distributed in the hope that it will be useful,
     11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     13  * Library General Public License for more details.
     14  *
     15  * You should have received a copy of the GNU Library General Public License
     16  * along with this library; see the file COPYING.LIB.  If not, write to
     17  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
     18  * Boston, MA 02110-1301, USA.
     19  *
     20  */
     21 
     22 #ifndef PlatformString_h
     23 #define PlatformString_h
     24 
     25 // This file would be called String.h, but that conflicts with <string.h>
     26 // on systems without case-sensitive file systems.
     27 
     28 #include "StringImpl.h"
     29 
     30 #ifdef __OBJC__
     31 #include <objc/objc.h>
     32 #endif
     33 
     34 #if PLATFORM(CF)
     35 typedef const struct __CFString * CFStringRef;
     36 #endif
     37 
     38 #if PLATFORM(QT)
     39 QT_BEGIN_NAMESPACE
     40 class QString;
     41 QT_END_NAMESPACE
     42 #include <QDataStream>
     43 #endif
     44 
     45 #if PLATFORM(WX)
     46 class wxString;
     47 #endif
     48 
     49 #if PLATFORM(HAIKU)
     50 class BString;
     51 #endif
     52 
     53 #if USE(JSC)
     54 namespace JSC {
     55 class Identifier;
     56 class UString;
     57 }
     58 #endif
     59 
     60 namespace WebCore {
     61 
     62 class CString;
     63 class SharedBuffer;
     64 struct StringHash;
     65 
     66 class String {
     67 public:
     68     String() { } // gives null string, distinguishable from an empty string
     69     String(const UChar*, unsigned length);
     70     String(const UChar*); // Specifically for null terminated UTF-16
     71 #if USE(JSC)
     72     String(const JSC::Identifier&);
     73     String(const JSC::UString&);
     74 #endif
     75     String(const char*);
     76     String(const char*, unsigned length);
     77     String(StringImpl* i) : m_impl(i) { }
     78     String(PassRefPtr<StringImpl> i) : m_impl(i) { }
     79     String(RefPtr<StringImpl> i) : m_impl(i) { }
     80 
     81     void swap(String& o) { m_impl.swap(o.m_impl); }
     82 
     83     // Hash table deleted values, which are only constructed and never copied or destroyed.
     84     String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { }
     85     bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); }
     86 
     87     static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); }
     88     static String adopt(Vector<UChar>& vector) { return StringImpl::adopt(vector); }
     89 
     90 #if USE(JSC)
     91     operator JSC::UString() const;
     92 #endif
     93 
     94     unsigned length() const;
     95     const UChar* characters() const;
     96     const UChar* charactersWithNullTermination();
     97 
     98     UChar operator[](unsigned i) const; // if i >= length(), returns 0
     99     UChar32 characterStartingAt(unsigned) const; // Ditto.
    100 
    101     bool contains(UChar c) const { return find(c) != -1; }
    102     bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; }
    103     bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != -1; }
    104 
    105     int find(UChar c, int start = 0) const
    106         { return m_impl ? m_impl->find(c, start) : -1; }
    107     int find(CharacterMatchFunctionPtr matchFunction, int start = 0) const
    108         { return m_impl ? m_impl->find(matchFunction, start) : -1; }
    109     int find(const char* str, int start = 0, bool caseSensitive = true) const
    110         { return m_impl ? m_impl->find(str, start, caseSensitive) : -1; }
    111     int find(const String& str, int start = 0, bool caseSensitive = true) const
    112         { return m_impl ? m_impl->find(str.impl(), start, caseSensitive) : -1; }
    113 
    114     int reverseFind(UChar c, int start = -1) const
    115         { return m_impl ? m_impl->reverseFind(c, start) : -1; }
    116     int reverseFind(const String& str, int start = -1, bool caseSensitive = true) const
    117         { return m_impl ? m_impl->reverseFind(str.impl(), start, caseSensitive) : -1; }
    118 
    119     bool startsWith(const String& s, bool caseSensitive = true) const
    120         { return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); }
    121     bool endsWith(const String& s, bool caseSensitive = true) const
    122         { return m_impl ? m_impl->endsWith(s.impl(), caseSensitive) : s.isEmpty(); }
    123 
    124     void append(const String&);
    125     void append(char);
    126     void append(UChar);
    127     void append(const UChar*, unsigned length);
    128     void insert(const String&, unsigned pos);
    129     void insert(const UChar*, unsigned length, unsigned pos);
    130 
    131     String& replace(UChar a, UChar b) { if (m_impl) m_impl = m_impl->replace(a, b); return *this; }
    132     String& replace(UChar a, const String& b) { if (m_impl) m_impl = m_impl->replace(a, b.impl()); return *this; }
    133     String& replace(const String& a, const String& b) { if (m_impl) m_impl = m_impl->replace(a.impl(), b.impl()); return *this; }
    134     String& replace(unsigned index, unsigned len, const String& b) { if (m_impl) m_impl = m_impl->replace(index, len, b.impl()); return *this; }
    135 
    136     void truncate(unsigned len);
    137     void remove(unsigned pos, int len = 1);
    138 
    139     String substring(unsigned pos, unsigned len = UINT_MAX) const;
    140     String left(unsigned len) const { return substring(0, len); }
    141     String right(unsigned len) const { return substring(length() - len, len); }
    142 
    143     // Returns a lowercase/uppercase version of the string
    144     String lower() const;
    145     String upper() const;
    146 
    147     String stripWhiteSpace() const;
    148     String simplifyWhiteSpace() const;
    149 
    150     String removeCharacters(CharacterMatchFunctionPtr) const;
    151 
    152     // Return the string with case folded for case insensitive comparison.
    153     String foldCase() const;
    154 
    155     static String number(short);
    156     static String number(unsigned short);
    157     static String number(int);
    158     static String number(unsigned);
    159     static String number(long);
    160     static String number(unsigned long);
    161     static String number(long long);
    162     static String number(unsigned long long);
    163     static String number(double);
    164 
    165     static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2);
    166 
    167     // Returns an uninitialized string. The characters needs to be written
    168     // into the buffer returned in data before the returned string is used.
    169     // Failure to do this will have unpredictable results.
    170     static String createUninitialized(unsigned length, UChar*& data) { return StringImpl::createUninitialized(length, data); }
    171 
    172     void split(const String& separator, Vector<String>& result) const;
    173     void split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const;
    174     void split(UChar separator, Vector<String>& result) const;
    175     void split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const;
    176 
    177     int toIntStrict(bool* ok = 0, int base = 10) const;
    178     unsigned toUIntStrict(bool* ok = 0, int base = 10) const;
    179     int64_t toInt64Strict(bool* ok = 0, int base = 10) const;
    180     uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const;
    181     intptr_t toIntPtrStrict(bool* ok = 0, int base = 10) const;
    182 
    183     int toInt(bool* ok = 0) const;
    184     unsigned toUInt(bool* ok = 0) const;
    185     int64_t toInt64(bool* ok = 0) const;
    186     uint64_t toUInt64(bool* ok = 0) const;
    187     intptr_t toIntPtr(bool* ok = 0) const;
    188     double toDouble(bool* ok = 0) const;
    189     float toFloat(bool* ok = 0) const;
    190 
    191     bool percentage(int& percentage) const;
    192 
    193     // Returns a StringImpl suitable for use on another thread.
    194     String crossThreadString() const;
    195     // Makes a deep copy. Helpful only if you need to use a String on another thread
    196     // (use crossThreadString if the method call doesn't need to be threadsafe).
    197     // Since the underlying StringImpl objects are immutable, there's no other reason
    198     // to ever prefer copy() over plain old assignment.
    199     String threadsafeCopy() const;
    200 
    201     bool isNull() const { return !m_impl; }
    202     bool isEmpty() const;
    203 
    204     StringImpl* impl() const { return m_impl.get(); }
    205 
    206 #if PLATFORM(CF)
    207     String(CFStringRef);
    208     CFStringRef createCFString() const;
    209 #endif
    210 
    211 #ifdef __OBJC__
    212     String(NSString*);
    213 
    214     // This conversion maps NULL to "", which loses the meaning of NULL, but we
    215     // need this mapping because AppKit crashes when passed nil NSStrings.
    216     operator NSString*() const { if (!m_impl) return @""; return *m_impl; }
    217 #endif
    218 
    219 #if PLATFORM(QT)
    220     String(const QString&);
    221     String(const QStringRef&);
    222     operator QString() const;
    223 #endif
    224 
    225 #if PLATFORM(WX)
    226     String(const wxString&);
    227     operator wxString() const;
    228 #endif
    229 
    230 #if PLATFORM(HAIKU)
    231     String(const BString&);
    232     operator BString() const;
    233 #endif
    234 
    235 #ifndef NDEBUG
    236     Vector<char> ascii() const;
    237 #endif
    238 
    239     CString latin1() const;
    240     CString utf8() const;
    241 
    242     static String fromUTF8(const char*, size_t);
    243     static String fromUTF8(const char*);
    244 
    245     // Tries to convert the passed in string to UTF-8, but will fall back to Latin-1 if the string is not valid UTF-8.
    246     static String fromUTF8WithLatin1Fallback(const char*, size_t);
    247 
    248     // Determines the writing direction using the Unicode Bidi Algorithm rules P2 and P3.
    249     WTF::Unicode::Direction defaultWritingDirection() const { return m_impl ? m_impl->defaultWritingDirection() : WTF::Unicode::LeftToRight; }
    250 
    251     // Counts the number of grapheme clusters. A surrogate pair or a sequence
    252     // of a non-combining character and following combining characters is
    253     // counted as 1 grapheme cluster.
    254     unsigned numGraphemeClusters() const;
    255     // Returns the number of characters which will be less than or equal to
    256     // the specified grapheme cluster length.
    257     unsigned numCharactersInGraphemeClusters(unsigned) const;
    258 
    259 private:
    260     RefPtr<StringImpl> m_impl;
    261 };
    262 
    263 #if PLATFORM(QT)
    264 QDataStream& operator<<(QDataStream& stream, const String& str);
    265 QDataStream& operator>>(QDataStream& stream, String& str);
    266 #endif
    267 
    268 String operator+(const String&, const String&);
    269 String operator+(const String&, const char*);
    270 String operator+(const char*, const String&);
    271 
    272 inline String& operator+=(String& a, const String& b) { a.append(b); return a; }
    273 
    274 inline bool operator==(const String& a, const String& b) { return equal(a.impl(), b.impl()); }
    275 inline bool operator==(const String& a, const char* b) { return equal(a.impl(), b); }
    276 inline bool operator==(const char* a, const String& b) { return equal(a, b.impl()); }
    277 
    278 inline bool operator!=(const String& a, const String& b) { return !equal(a.impl(), b.impl()); }
    279 inline bool operator!=(const String& a, const char* b) { return !equal(a.impl(), b); }
    280 inline bool operator!=(const char* a, const String& b) { return !equal(a, b.impl()); }
    281 
    282 inline bool equalIgnoringCase(const String& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); }
    283 inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), b); }
    284 inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(a, b.impl()); }
    285 
    286 inline bool equalPossiblyIgnoringCase(const String& a, const String& b, bool ignoreCase)
    287 {
    288     return ignoreCase ? equalIgnoringCase(a, b) : (a == b);
    289 }
    290 
    291 inline bool equalIgnoringNullity(const String& a, const String& b) { return equalIgnoringNullity(a.impl(), b.impl()); }
    292 
    293 inline bool operator!(const String& str) { return str.isNull(); }
    294 
    295 inline void swap(String& a, String& b) { a.swap(b); }
    296 
    297 // String Operations
    298 
    299 bool charactersAreAllASCII(const UChar*, size_t);
    300 
    301 int charactersToIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
    302 unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
    303 int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
    304 uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
    305 intptr_t charactersToIntPtrStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
    306 
    307 int charactersToInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
    308 unsigned charactersToUInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
    309 int64_t charactersToInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
    310 uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
    311 intptr_t charactersToIntPtr(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
    312 
    313 double charactersToDouble(const UChar*, size_t, bool* ok = 0);
    314 float charactersToFloat(const UChar*, size_t, bool* ok = 0);
    315 
    316 int find(const UChar*, size_t, UChar, int startPosition = 0);
    317 int reverseFind(const UChar*, size_t, UChar, int startPosition = -1);
    318 
    319 #ifdef __OBJC__
    320 // This is for situations in WebKit where the long standing behavior has been
    321 // "nil if empty", so we try to maintain longstanding behavior for the sake of
    322 // entrenched clients
    323 inline NSString* nsStringNilIfEmpty(const String& str) {  return str.isEmpty() ? nil : (NSString*)str; }
    324 #endif
    325 
    326 inline bool charactersAreAllASCII(const UChar* characters, size_t length)
    327 {
    328     UChar ored = 0;
    329     for (size_t i = 0; i < length; ++i)
    330         ored |= characters[i];
    331     return !(ored & 0xFF80);
    332 }
    333 
    334 inline int find(const UChar* characters, size_t length, UChar character, int startPosition)
    335 {
    336     if (startPosition >= static_cast<int>(length))
    337         return -1;
    338     for (size_t i = startPosition; i < length; ++i) {
    339         if (characters[i] == character)
    340             return static_cast<int>(i);
    341     }
    342     return -1;
    343 }
    344 
    345 inline int find(const UChar* characters, size_t length, CharacterMatchFunctionPtr matchFunction, int startPosition)
    346 {
    347     if (startPosition >= static_cast<int>(length))
    348         return -1;
    349     for (size_t i = startPosition; i < length; ++i) {
    350         if (matchFunction(characters[i]))
    351             return static_cast<int>(i);
    352     }
    353     return -1;
    354 }
    355 
    356 inline int reverseFind(const UChar* characters, size_t length, UChar character, int startPosition)
    357 {
    358     if (startPosition >= static_cast<int>(length) || !length)
    359         return -1;
    360     if (startPosition < 0)
    361         startPosition += static_cast<int>(length);
    362     while (true) {
    363         if (characters[startPosition] == character)
    364             return startPosition;
    365         if (!startPosition)
    366             return -1;
    367         startPosition--;
    368     }
    369     ASSERT_NOT_REACHED();
    370     return -1;
    371 }
    372 
    373 inline void append(Vector<UChar>& vector, const String& string)
    374 {
    375     vector.append(string.characters(), string.length());
    376 }
    377 
    378 inline void appendNumber(Vector<UChar>& vector, unsigned char number)
    379 {
    380     int numberLength = number > 99 ? 3 : (number > 9 ? 2 : 1);
    381     size_t vectorSize = vector.size();
    382     vector.grow(vectorSize + numberLength);
    383 
    384     switch (numberLength) {
    385     case 3:
    386         vector[vectorSize + 2] = number % 10 + '0';
    387         number /= 10;
    388 
    389     case 2:
    390         vector[vectorSize + 1] = number % 10 + '0';
    391         number /= 10;
    392 
    393     case 1:
    394         vector[vectorSize] = number % 10 + '0';
    395     }
    396 }
    397 
    398 
    399 
    400 PassRefPtr<SharedBuffer> utf8Buffer(const String&);
    401 
    402 } // namespace WebCore
    403 
    404 namespace WTF {
    405 
    406     // StringHash is the default hash for String
    407     template<typename T> struct DefaultHash;
    408     template<> struct DefaultHash<WebCore::String> {
    409         typedef WebCore::StringHash Hash;
    410     };
    411 
    412 }
    413 
    414 #endif
    415