1 /* 2 * (C) 1999 Lars Knoll (knoll (at) kde.org) 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved. 4 * 5 * This library is free software; you can redistribute it and/or 6 * modify it under the terms of the GNU Library General Public 7 * License as published by the Free Software Foundation; either 8 * version 2 of the License, or (at your option) any later version. 9 * 10 * This library is distributed in the hope that it will be useful, 11 * but WITHOUT ANY WARRANTY; without even the implied warranty of 12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 * Library General Public License for more details. 14 * 15 * You should have received a copy of the GNU Library General Public License 16 * along with this library; see the file COPYING.LIB. If not, write to 17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 18 * Boston, MA 02110-1301, USA. 19 * 20 */ 21 22 #ifndef WTFString_h 23 #define WTFString_h 24 25 // This file would be called String.h, but that conflicts with <string.h> 26 // on systems without case-sensitive file systems. 27 28 #include "StringImpl.h" 29 30 #ifdef __OBJC__ 31 #include <objc/objc.h> 32 #endif 33 34 #if USE(CF) 35 typedef const struct __CFString * CFStringRef; 36 #endif 37 38 #if PLATFORM(QT) 39 QT_BEGIN_NAMESPACE 40 class QString; 41 QT_END_NAMESPACE 42 #include <QDataStream> 43 #endif 44 45 #if PLATFORM(WX) 46 class wxString; 47 #endif 48 49 #if PLATFORM(HAIKU) 50 class BString; 51 #endif 52 53 #if PLATFORM(BREWMP) 54 // AECHAR is defined in AEEStdDef.h, but don't include it here to avoid conflicts. 55 #ifndef _AECHAR_DEFINED 56 typedef uint16 AECHAR; 57 #define _AECHAR_DEFINED 58 #endif 59 #endif 60 61 namespace WTF { 62 63 class CString; 64 struct StringHash; 65 66 // Declarations of string operations 67 68 bool charactersAreAllASCII(const UChar*, size_t); 69 bool charactersAreAllLatin1(const UChar*, size_t); 70 int charactersToIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10); 71 unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10); 72 int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10); 73 uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10); 74 intptr_t charactersToIntPtrStrict(const UChar*, size_t, bool* ok = 0, int base = 10); 75 76 int charactersToInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage 77 unsigned charactersToUInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage 78 int64_t charactersToInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage 79 uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage 80 intptr_t charactersToIntPtr(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage 81 82 double charactersToDouble(const UChar*, size_t, bool* ok = 0, bool* didReadNumber = 0); 83 float charactersToFloat(const UChar*, size_t, bool* ok = 0, bool* didReadNumber = 0); 84 85 template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters(const UChar*, size_t); 86 87 class String { 88 public: 89 // Construct a null string, distinguishable from an empty string. 90 String() { } 91 92 // Construct a string with UTF-16 data. 93 String(const UChar* characters, unsigned length); 94 95 // Construct a string by copying the contents of a vector. To avoid 96 // copying, consider using String::adopt instead. 97 template<size_t inlineCapacity> 98 explicit String(const Vector<UChar, inlineCapacity>&); 99 100 // Construct a string with UTF-16 data, from a null-terminated source. 101 String(const UChar*); 102 103 // Construct a string with latin1 data. 104 String(const char* characters, unsigned length); 105 106 // Construct a string with latin1 data, from a null-terminated source. 107 String(const char* characters); 108 109 // Construct a string referencing an existing StringImpl. 110 String(StringImpl* impl) : m_impl(impl) { } 111 String(PassRefPtr<StringImpl> impl) : m_impl(impl) { } 112 String(RefPtr<StringImpl> impl) : m_impl(impl) { } 113 114 // Inline the destructor. 115 ALWAYS_INLINE ~String() { } 116 117 void swap(String& o) { m_impl.swap(o.m_impl); } 118 119 static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); } 120 template<size_t inlineCapacity> 121 static String adopt(Vector<UChar, inlineCapacity>& vector) { return StringImpl::adopt(vector); } 122 123 bool isNull() const { return !m_impl; } 124 bool isEmpty() const { return !m_impl || !m_impl->length(); } 125 126 StringImpl* impl() const { return m_impl.get(); } 127 128 unsigned length() const 129 { 130 if (!m_impl) 131 return 0; 132 return m_impl->length(); 133 } 134 135 const UChar* characters() const 136 { 137 if (!m_impl) 138 return 0; 139 return m_impl->characters(); 140 } 141 142 CString ascii() const; 143 CString latin1() const; 144 CString utf8(bool strict = false) const; 145 146 UChar operator[](unsigned index) const 147 { 148 if (!m_impl || index >= m_impl->length()) 149 return 0; 150 return m_impl->characters()[index]; 151 } 152 153 static String number(short); 154 static String number(unsigned short); 155 static String number(int); 156 static String number(unsigned); 157 static String number(long); 158 static String number(unsigned long); 159 static String number(long long); 160 static String number(unsigned long long); 161 static String number(double); 162 163 // Find a single character or string, also with match function & latin1 forms. 164 size_t find(UChar c, unsigned start = 0) const 165 { return m_impl ? m_impl->find(c, start) : notFound; } 166 size_t find(const String& str, unsigned start = 0) const 167 { return m_impl ? m_impl->find(str.impl(), start) : notFound; } 168 size_t find(CharacterMatchFunctionPtr matchFunction, unsigned start = 0) const 169 { return m_impl ? m_impl->find(matchFunction, start) : notFound; } 170 size_t find(const char* str, unsigned start = 0) const 171 { return m_impl ? m_impl->find(str, start) : notFound; } 172 173 // Find the last instance of a single character or string. 174 size_t reverseFind(UChar c, unsigned start = UINT_MAX) const 175 { return m_impl ? m_impl->reverseFind(c, start) : notFound; } 176 size_t reverseFind(const String& str, unsigned start = UINT_MAX) const 177 { return m_impl ? m_impl->reverseFind(str.impl(), start) : notFound; } 178 179 // Case insensitive string matching. 180 size_t findIgnoringCase(const char* str, unsigned start = 0) const 181 { return m_impl ? m_impl->findIgnoringCase(str, start) : notFound; } 182 size_t findIgnoringCase(const String& str, unsigned start = 0) const 183 { return m_impl ? m_impl->findIgnoringCase(str.impl(), start) : notFound; } 184 size_t reverseFindIgnoringCase(const String& str, unsigned start = UINT_MAX) const 185 { return m_impl ? m_impl->reverseFindIgnoringCase(str.impl(), start) : notFound; } 186 187 // Wrappers for find & reverseFind adding dynamic sensitivity check. 188 size_t find(const char* str, unsigned start, bool caseSensitive) const 189 { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); } 190 size_t find(const String& str, unsigned start, bool caseSensitive) const 191 { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); } 192 size_t reverseFind(const String& str, unsigned start, bool caseSensitive) const 193 { return caseSensitive ? reverseFind(str, start) : reverseFindIgnoringCase(str, start); } 194 195 const UChar* charactersWithNullTermination(); 196 197 UChar32 characterStartingAt(unsigned) const; // Ditto. 198 199 bool contains(UChar c) const { return find(c) != notFound; } 200 bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; } 201 bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; } 202 203 bool startsWith(const String& s, bool caseSensitive = true) const 204 { return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); } 205 bool endsWith(const String& s, bool caseSensitive = true) const 206 { return m_impl ? m_impl->endsWith(s.impl(), caseSensitive) : s.isEmpty(); } 207 208 void append(const String&); 209 void append(char); 210 void append(UChar); 211 void append(const UChar*, unsigned length); 212 void insert(const String&, unsigned pos); 213 void insert(const UChar*, unsigned length, unsigned pos); 214 215 String& replace(UChar a, UChar b) { if (m_impl) m_impl = m_impl->replace(a, b); return *this; } 216 String& replace(UChar a, const String& b) { if (m_impl) m_impl = m_impl->replace(a, b.impl()); return *this; } 217 String& replace(const String& a, const String& b) { if (m_impl) m_impl = m_impl->replace(a.impl(), b.impl()); return *this; } 218 String& replace(unsigned index, unsigned len, const String& b) { if (m_impl) m_impl = m_impl->replace(index, len, b.impl()); return *this; } 219 220 void makeLower() { if (m_impl) m_impl = m_impl->lower(); } 221 void makeUpper() { if (m_impl) m_impl = m_impl->upper(); } 222 void makeSecure(UChar aChar) { if (m_impl) m_impl = m_impl->secure(aChar); } 223 224 void truncate(unsigned len); 225 void remove(unsigned pos, int len = 1); 226 227 String substring(unsigned pos, unsigned len = UINT_MAX) const; 228 String substringSharingImpl(unsigned pos, unsigned len = UINT_MAX) const; 229 String left(unsigned len) const { return substring(0, len); } 230 String right(unsigned len) const { return substring(length() - len, len); } 231 232 // Returns a lowercase/uppercase version of the string 233 String lower() const; 234 String upper() const; 235 236 String stripWhiteSpace() const; 237 String simplifyWhiteSpace() const; 238 239 String removeCharacters(CharacterMatchFunctionPtr) const; 240 template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters() const; 241 242 // Return the string with case folded for case insensitive comparison. 243 String foldCase() const; 244 245 #if !PLATFORM(QT) 246 static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2); 247 #else 248 static String format(const char *, ...); 249 #endif 250 251 // Returns an uninitialized string. The characters needs to be written 252 // into the buffer returned in data before the returned string is used. 253 // Failure to do this will have unpredictable results. 254 static String createUninitialized(unsigned length, UChar*& data) { return StringImpl::createUninitialized(length, data); } 255 256 void split(const String& separator, Vector<String>& result) const; 257 void split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const; 258 void split(UChar separator, Vector<String>& result) const; 259 void split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const; 260 261 int toIntStrict(bool* ok = 0, int base = 10) const; 262 unsigned toUIntStrict(bool* ok = 0, int base = 10) const; 263 int64_t toInt64Strict(bool* ok = 0, int base = 10) const; 264 uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const; 265 intptr_t toIntPtrStrict(bool* ok = 0, int base = 10) const; 266 267 int toInt(bool* ok = 0) const; 268 unsigned toUInt(bool* ok = 0) const; 269 int64_t toInt64(bool* ok = 0) const; 270 uint64_t toUInt64(bool* ok = 0) const; 271 intptr_t toIntPtr(bool* ok = 0) const; 272 double toDouble(bool* ok = 0, bool* didReadNumber = 0) const; 273 float toFloat(bool* ok = 0, bool* didReadNumber = 0) const; 274 275 bool percentage(int& percentage) const; 276 277 // Returns a StringImpl suitable for use on another thread. 278 String crossThreadString() const; 279 // Makes a deep copy. Helpful only if you need to use a String on another thread 280 // (use crossThreadString if the method call doesn't need to be threadsafe). 281 // Since the underlying StringImpl objects are immutable, there's no other reason 282 // to ever prefer copy() over plain old assignment. 283 String threadsafeCopy() const; 284 285 // Prevent Strings from being implicitly convertable to bool as it will be ambiguous on any platform that 286 // allows implicit conversion to another pointer type (e.g., Mac allows implicit conversion to NSString*). 287 typedef struct ImplicitConversionFromWTFStringToBoolDisallowedA* (String::*UnspecifiedBoolTypeA); 288 typedef struct ImplicitConversionFromWTFStringToBoolDisallowedB* (String::*UnspecifiedBoolTypeB); 289 operator UnspecifiedBoolTypeA() const; 290 operator UnspecifiedBoolTypeB() const; 291 292 #if USE(CF) 293 String(CFStringRef); 294 CFStringRef createCFString() const; 295 #endif 296 297 #ifdef __OBJC__ 298 String(NSString*); 299 300 // This conversion maps NULL to "", which loses the meaning of NULL, but we 301 // need this mapping because AppKit crashes when passed nil NSStrings. 302 operator NSString*() const { if (!m_impl) return @""; return *m_impl; } 303 #endif 304 305 #if PLATFORM(QT) 306 String(const QString&); 307 String(const QStringRef&); 308 operator QString() const; 309 #endif 310 311 #if PLATFORM(WX) 312 String(const wxString&); 313 operator wxString() const; 314 #endif 315 316 #if PLATFORM(HAIKU) 317 String(const BString&); 318 operator BString() const; 319 #endif 320 321 #if PLATFORM(BREWMP) 322 String(const AECHAR*); 323 #endif 324 325 // String::fromUTF8 will return a null string if 326 // the input data contains invalid UTF-8 sequences. 327 static String fromUTF8(const char*, size_t); 328 static String fromUTF8(const char*); 329 330 // Tries to convert the passed in string to UTF-8, but will fall back to Latin-1 if the string is not valid UTF-8. 331 static String fromUTF8WithLatin1Fallback(const char*, size_t); 332 333 // Determines the writing direction using the Unicode Bidi Algorithm rules P2 and P3. 334 WTF::Unicode::Direction defaultWritingDirection(bool* hasStrongDirectionality = 0) const 335 { 336 if (m_impl) 337 return m_impl->defaultWritingDirection(hasStrongDirectionality); 338 if (hasStrongDirectionality) 339 *hasStrongDirectionality = false; 340 return WTF::Unicode::LeftToRight; 341 } 342 343 bool containsOnlyASCII() const { return charactersAreAllASCII(characters(), length()); } 344 bool containsOnlyLatin1() const { return charactersAreAllLatin1(characters(), length()); } 345 346 // Hash table deleted values, which are only constructed and never copied or destroyed. 347 String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { } 348 bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); } 349 350 private: 351 RefPtr<StringImpl> m_impl; 352 }; 353 354 #if PLATFORM(QT) 355 QDataStream& operator<<(QDataStream& stream, const String& str); 356 QDataStream& operator>>(QDataStream& stream, String& str); 357 #endif 358 359 String operator+(const String&, const String&); 360 String operator+(const String&, const char*); 361 String operator+(const char*, const String&); 362 363 inline String& operator+=(String& a, const String& b) { a.append(b); return a; } 364 365 inline bool operator==(const String& a, const String& b) { return equal(a.impl(), b.impl()); } 366 inline bool operator==(const String& a, const char* b) { return equal(a.impl(), b); } 367 inline bool operator==(const char* a, const String& b) { return equal(a, b.impl()); } 368 369 inline bool operator!=(const String& a, const String& b) { return !equal(a.impl(), b.impl()); } 370 inline bool operator!=(const String& a, const char* b) { return !equal(a.impl(), b); } 371 inline bool operator!=(const char* a, const String& b) { return !equal(a, b.impl()); } 372 373 inline bool equalIgnoringCase(const String& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); } 374 inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), b); } 375 inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(a, b.impl()); } 376 377 inline bool equalPossiblyIgnoringCase(const String& a, const String& b, bool ignoreCase) 378 { 379 return ignoreCase ? equalIgnoringCase(a, b) : (a == b); 380 } 381 382 inline bool equalIgnoringNullity(const String& a, const String& b) { return equalIgnoringNullity(a.impl(), b.impl()); } 383 384 template<size_t inlineCapacity> 385 inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, const String& b) { return equalIgnoringNullity(a, b.impl()); } 386 387 inline bool operator!(const String& str) { return str.isNull(); } 388 389 inline void swap(String& a, String& b) { a.swap(b); } 390 391 // Definitions of string operations 392 393 template<size_t inlineCapacity> 394 String::String(const Vector<UChar, inlineCapacity>& vector) 395 : m_impl(vector.size() ? StringImpl::create(vector.data(), vector.size()) : 0) 396 { 397 } 398 399 #ifdef __OBJC__ 400 // This is for situations in WebKit where the long standing behavior has been 401 // "nil if empty", so we try to maintain longstanding behavior for the sake of 402 // entrenched clients 403 inline NSString* nsStringNilIfEmpty(const String& str) { return str.isEmpty() ? nil : (NSString*)str; } 404 #endif 405 406 inline bool charactersAreAllASCII(const UChar* characters, size_t length) 407 { 408 UChar ored = 0; 409 for (size_t i = 0; i < length; ++i) 410 ored |= characters[i]; 411 return !(ored & 0xFF80); 412 } 413 414 inline bool charactersAreAllLatin1(const UChar* characters, size_t length) 415 { 416 UChar ored = 0; 417 for (size_t i = 0; i < length; ++i) 418 ored |= characters[i]; 419 return !(ored & 0xFF00); 420 } 421 422 int codePointCompare(const String&, const String&); 423 424 inline size_t find(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0) 425 { 426 while (index < length) { 427 if (characters[index] == matchCharacter) 428 return index; 429 ++index; 430 } 431 return notFound; 432 } 433 434 inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0) 435 { 436 while (index < length) { 437 if (matchFunction(characters[index])) 438 return index; 439 ++index; 440 } 441 return notFound; 442 } 443 444 inline size_t reverseFind(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX) 445 { 446 if (!length) 447 return notFound; 448 if (index >= length) 449 index = length - 1; 450 while (characters[index] != matchCharacter) { 451 if (!index--) 452 return notFound; 453 } 454 return index; 455 } 456 457 inline void append(Vector<UChar>& vector, const String& string) 458 { 459 vector.append(string.characters(), string.length()); 460 } 461 462 inline void appendNumber(Vector<UChar>& vector, unsigned char number) 463 { 464 int numberLength = number > 99 ? 3 : (number > 9 ? 2 : 1); 465 size_t vectorSize = vector.size(); 466 vector.grow(vectorSize + numberLength); 467 468 switch (numberLength) { 469 case 3: 470 vector[vectorSize + 2] = number % 10 + '0'; 471 number /= 10; 472 473 case 2: 474 vector[vectorSize + 1] = number % 10 + '0'; 475 number /= 10; 476 477 case 1: 478 vector[vectorSize] = number % 10 + '0'; 479 } 480 } 481 482 template<bool isSpecialCharacter(UChar)> inline bool isAllSpecialCharacters(const UChar* characters, size_t length) 483 { 484 for (size_t i = 0; i < length; ++i) { 485 if (!isSpecialCharacter(characters[i])) 486 return false; 487 } 488 return true; 489 } 490 491 template<bool isSpecialCharacter(UChar)> inline bool String::isAllSpecialCharacters() const 492 { 493 return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters(), length()); 494 } 495 496 // StringHash is the default hash for String 497 template<typename T> struct DefaultHash; 498 template<> struct DefaultHash<String> { 499 typedef StringHash Hash; 500 }; 501 502 template <> struct VectorTraits<String> : SimpleClassVectorTraits { }; 503 504 } 505 506 using WTF::CString; 507 using WTF::String; 508 using WTF::append; 509 using WTF::appendNumber; 510 using WTF::charactersAreAllASCII; 511 using WTF::charactersAreAllLatin1; 512 using WTF::charactersToIntStrict; 513 using WTF::charactersToUIntStrict; 514 using WTF::charactersToInt64Strict; 515 using WTF::charactersToUInt64Strict; 516 using WTF::charactersToIntPtrStrict; 517 using WTF::charactersToInt; 518 using WTF::charactersToUInt; 519 using WTF::charactersToInt64; 520 using WTF::charactersToUInt64; 521 using WTF::charactersToIntPtr; 522 using WTF::charactersToDouble; 523 using WTF::charactersToFloat; 524 using WTF::equal; 525 using WTF::equalIgnoringCase; 526 using WTF::find; 527 using WTF::isAllSpecialCharacters; 528 using WTF::isSpaceOrNewline; 529 using WTF::reverseFind; 530 531 #endif 532