1 /* 2 * Copyright (C) 2005 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_STRING8_H 18 #define ANDROID_STRING8_H 19 20 #include <utils/Errors.h> 21 22 // Need this for the char16_t type; String8.h should not 23 // be depedent on the String16 class. 24 #include <utils/String16.h> 25 26 #include <stdint.h> 27 #include <string.h> 28 #include <sys/types.h> 29 30 // --------------------------------------------------------------------------- 31 32 extern "C" { 33 34 typedef uint32_t char32_t; 35 36 size_t strlen32(const char32_t *); 37 size_t strnlen32(const char32_t *, size_t); 38 39 /* 40 * Returns the length of "src" when "src" is valid UTF-8 string. 41 * Returns 0 if src is NULL, 0-length string or non UTF-8 string. 42 * This function should be used to determine whether "src" is valid UTF-8 43 * characters with valid unicode codepoints. "src" must be null-terminated. 44 * 45 * If you are going to use other GetUtf... functions defined in this header 46 * with string which may not be valid UTF-8 with valid codepoint (form 0 to 47 * 0x10FFFF), you should use this function before calling others, since the 48 * other functions do not check whether the string is valid UTF-8 or not. 49 * 50 * If you do not care whether "src" is valid UTF-8 or not, you should use 51 * strlen() as usual, which should be much faster. 52 */ 53 size_t utf8_length(const char *src); 54 55 /* 56 * Returns the UTF-32 length of "src". 57 */ 58 size_t utf32_length(const char *src, size_t src_len); 59 60 /* 61 * Returns the UTF-8 length of "src". 62 */ 63 size_t utf8_length_from_utf16(const char16_t *src, size_t src_len); 64 65 /* 66 * Returns the UTF-8 length of "src". 67 */ 68 size_t utf8_length_from_utf32(const char32_t *src, size_t src_len); 69 70 /* 71 * Returns the unicode value at "index". 72 * Returns -1 when the index is invalid (equals to or more than "src_len"). 73 * If returned value is positive, it is able to be converted to char32_t, which 74 * is unsigned. Then, if "next_index" is not NULL, the next index to be used is 75 * stored in "next_index". "next_index" can be NULL. 76 */ 77 int32_t utf32_at(const char *src, size_t src_len, 78 size_t index, size_t *next_index); 79 80 /* 81 * Stores a UTF-32 string converted from "src" in "dst", if "dst_length" is not 82 * large enough to store the string, the part of the "src" string is stored 83 * into "dst". 84 * Returns the size actually used for storing the string. 85 * "dst" is not null-terminated when dst_len is fully used (like strncpy). 86 */ 87 size_t utf8_to_utf32(const char* src, size_t src_len, 88 char32_t* dst, size_t dst_len); 89 90 /* 91 * Stores a UTF-8 string converted from "src" in "dst", if "dst_length" is not 92 * large enough to store the string, the part of the "src" string is stored 93 * into "dst" as much as possible. See the examples for more detail. 94 * Returns the size actually used for storing the string. 95 * dst" is not null-terminated when dst_len is fully used (like strncpy). 96 * 97 * Example 1 98 * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) 99 * "src_len" == 2 100 * "dst_len" >= 7 101 * -> 102 * Returned value == 6 103 * "dst" becomes \xE3\x81\x82\xE3\x81\x84\0 104 * (note that "dst" is null-terminated) 105 * 106 * Example 2 107 * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) 108 * "src_len" == 2 109 * "dst_len" == 5 110 * -> 111 * Returned value == 3 112 * "dst" becomes \xE3\x81\x82\0 113 * (note that "dst" is null-terminated, but \u3044 is not stored in "dst" 114 * since "dst" does not have enough size to store the character) 115 * 116 * Example 3 117 * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84) 118 * "src_len" == 2 119 * "dst_len" == 6 120 * -> 121 * Returned value == 6 122 * "dst" becomes \xE3\x81\x82\xE3\x81\x84 123 * (note that "dst" is NOT null-terminated, like strncpy) 124 */ 125 size_t utf32_to_utf8(const char32_t* src, size_t src_len, 126 char* dst, size_t dst_len); 127 128 size_t utf16_to_utf8(const char16_t* src, size_t src_len, 129 char* dst, size_t dst_len); 130 131 } 132 133 // --------------------------------------------------------------------------- 134 135 namespace android { 136 137 class TextOutput; 138 139 //! This is a string holding UTF-8 characters. Does not allow the value more 140 // than 0x10FFFF, which is not valid unicode codepoint. 141 class String8 142 { 143 public: 144 String8(); 145 String8(const String8& o); 146 explicit String8(const char* o); 147 explicit String8(const char* o, size_t numChars); 148 149 explicit String8(const String16& o); 150 explicit String8(const char16_t* o); 151 explicit String8(const char16_t* o, size_t numChars); 152 explicit String8(const char32_t* o); 153 explicit String8(const char32_t* o, size_t numChars); 154 ~String8(); 155 156 inline const char* string() const; 157 inline size_t size() const; 158 inline size_t length() const; 159 inline size_t bytes() const; 160 161 inline const SharedBuffer* sharedBuffer() const; 162 163 void setTo(const String8& other); 164 status_t setTo(const char* other); 165 status_t setTo(const char* other, size_t numChars); 166 status_t setTo(const char16_t* other, size_t numChars); 167 status_t setTo(const char32_t* other, 168 size_t length); 169 170 status_t append(const String8& other); 171 status_t append(const char* other); 172 status_t append(const char* other, size_t numChars); 173 174 status_t appendFormat(const char* fmt, ...) 175 __attribute__((format (printf, 2, 3))); 176 177 // Note that this function takes O(N) time to calculate the value. 178 // No cache value is stored. 179 size_t getUtf32Length() const; 180 int32_t getUtf32At(size_t index, 181 size_t *next_index) const; 182 size_t getUtf32(char32_t* dst, size_t dst_len) const; 183 184 inline String8& operator=(const String8& other); 185 inline String8& operator=(const char* other); 186 187 inline String8& operator+=(const String8& other); 188 inline String8 operator+(const String8& other) const; 189 190 inline String8& operator+=(const char* other); 191 inline String8 operator+(const char* other) const; 192 193 inline int compare(const String8& other) const; 194 195 inline bool operator<(const String8& other) const; 196 inline bool operator<=(const String8& other) const; 197 inline bool operator==(const String8& other) const; 198 inline bool operator!=(const String8& other) const; 199 inline bool operator>=(const String8& other) const; 200 inline bool operator>(const String8& other) const; 201 202 inline bool operator<(const char* other) const; 203 inline bool operator<=(const char* other) const; 204 inline bool operator==(const char* other) const; 205 inline bool operator!=(const char* other) const; 206 inline bool operator>=(const char* other) const; 207 inline bool operator>(const char* other) const; 208 209 inline operator const char*() const; 210 211 char* lockBuffer(size_t size); 212 void unlockBuffer(); 213 status_t unlockBuffer(size_t size); 214 215 // return the index of the first byte of other in this at or after 216 // start, or -1 if not found 217 ssize_t find(const char* other, size_t start = 0) const; 218 219 void toLower(); 220 void toLower(size_t start, size_t numChars); 221 void toUpper(); 222 void toUpper(size_t start, size_t numChars); 223 224 /* 225 * These methods operate on the string as if it were a path name. 226 */ 227 228 /* 229 * Set the filename field to a specific value. 230 * 231 * Normalizes the filename, removing a trailing '/' if present. 232 */ 233 void setPathName(const char* name); 234 void setPathName(const char* name, size_t numChars); 235 236 /* 237 * Get just the filename component. 238 * 239 * "/tmp/foo/bar.c" --> "bar.c" 240 */ 241 String8 getPathLeaf(void) const; 242 243 /* 244 * Remove the last (file name) component, leaving just the directory 245 * name. 246 * 247 * "/tmp/foo/bar.c" --> "/tmp/foo" 248 * "/tmp" --> "" // ????? shouldn't this be "/" ???? XXX 249 * "bar.c" --> "" 250 */ 251 String8 getPathDir(void) const; 252 253 /* 254 * Retrieve the front (root dir) component. Optionally also return the 255 * remaining components. 256 * 257 * "/tmp/foo/bar.c" --> "tmp" (remain = "foo/bar.c") 258 * "/tmp" --> "tmp" (remain = "") 259 * "bar.c" --> "bar.c" (remain = "") 260 */ 261 String8 walkPath(String8* outRemains = NULL) const; 262 263 /* 264 * Return the filename extension. This is the last '.' and up to 265 * four characters that follow it. The '.' is included in case we 266 * decide to expand our definition of what constitutes an extension. 267 * 268 * "/tmp/foo/bar.c" --> ".c" 269 * "/tmp" --> "" 270 * "/tmp/foo.bar/baz" --> "" 271 * "foo.jpeg" --> ".jpeg" 272 * "foo." --> "" 273 */ 274 String8 getPathExtension(void) const; 275 276 /* 277 * Return the path without the extension. Rules for what constitutes 278 * an extension are described in the comment for getPathExtension(). 279 * 280 * "/tmp/foo/bar.c" --> "/tmp/foo/bar" 281 */ 282 String8 getBasePath(void) const; 283 284 /* 285 * Add a component to the pathname. We guarantee that there is 286 * exactly one path separator between the old path and the new. 287 * If there is no existing name, we just copy the new name in. 288 * 289 * If leaf is a fully qualified path (i.e. starts with '/', it 290 * replaces whatever was there before. 291 */ 292 String8& appendPath(const char* leaf); 293 String8& appendPath(const String8& leaf) { return appendPath(leaf.string()); } 294 295 /* 296 * Like appendPath(), but does not affect this string. Returns a new one instead. 297 */ 298 String8 appendPathCopy(const char* leaf) const 299 { String8 p(*this); p.appendPath(leaf); return p; } 300 String8 appendPathCopy(const String8& leaf) const { return appendPathCopy(leaf.string()); } 301 302 /* 303 * Converts all separators in this string to /, the default path separator. 304 * 305 * If the default OS separator is backslash, this converts all 306 * backslashes to slashes, in-place. Otherwise it does nothing. 307 * Returns self. 308 */ 309 String8& convertToResPath(); 310 311 private: 312 status_t real_append(const char* other, size_t numChars); 313 char* find_extension(void) const; 314 315 const char* mString; 316 }; 317 318 TextOutput& operator<<(TextOutput& to, const String16& val); 319 320 // --------------------------------------------------------------------------- 321 // No user servicable parts below. 322 323 inline int compare_type(const String8& lhs, const String8& rhs) 324 { 325 return lhs.compare(rhs); 326 } 327 328 inline int strictly_order_type(const String8& lhs, const String8& rhs) 329 { 330 return compare_type(lhs, rhs) < 0; 331 } 332 333 inline const char* String8::string() const 334 { 335 return mString; 336 } 337 338 inline size_t String8::length() const 339 { 340 return SharedBuffer::sizeFromData(mString)-1; 341 } 342 343 inline size_t String8::size() const 344 { 345 return length(); 346 } 347 348 inline size_t String8::bytes() const 349 { 350 return SharedBuffer::sizeFromData(mString)-1; 351 } 352 353 inline const SharedBuffer* String8::sharedBuffer() const 354 { 355 return SharedBuffer::bufferFromData(mString); 356 } 357 358 inline String8& String8::operator=(const String8& other) 359 { 360 setTo(other); 361 return *this; 362 } 363 364 inline String8& String8::operator=(const char* other) 365 { 366 setTo(other); 367 return *this; 368 } 369 370 inline String8& String8::operator+=(const String8& other) 371 { 372 append(other); 373 return *this; 374 } 375 376 inline String8 String8::operator+(const String8& other) const 377 { 378 String8 tmp(*this); 379 tmp += other; 380 return tmp; 381 } 382 383 inline String8& String8::operator+=(const char* other) 384 { 385 append(other); 386 return *this; 387 } 388 389 inline String8 String8::operator+(const char* other) const 390 { 391 String8 tmp(*this); 392 tmp += other; 393 return tmp; 394 } 395 396 inline int String8::compare(const String8& other) const 397 { 398 return strcmp(mString, other.mString); 399 } 400 401 inline bool String8::operator<(const String8& other) const 402 { 403 return strcmp(mString, other.mString) < 0; 404 } 405 406 inline bool String8::operator<=(const String8& other) const 407 { 408 return strcmp(mString, other.mString) <= 0; 409 } 410 411 inline bool String8::operator==(const String8& other) const 412 { 413 return strcmp(mString, other.mString) == 0; 414 } 415 416 inline bool String8::operator!=(const String8& other) const 417 { 418 return strcmp(mString, other.mString) != 0; 419 } 420 421 inline bool String8::operator>=(const String8& other) const 422 { 423 return strcmp(mString, other.mString) >= 0; 424 } 425 426 inline bool String8::operator>(const String8& other) const 427 { 428 return strcmp(mString, other.mString) > 0; 429 } 430 431 inline bool String8::operator<(const char* other) const 432 { 433 return strcmp(mString, other) < 0; 434 } 435 436 inline bool String8::operator<=(const char* other) const 437 { 438 return strcmp(mString, other) <= 0; 439 } 440 441 inline bool String8::operator==(const char* other) const 442 { 443 return strcmp(mString, other) == 0; 444 } 445 446 inline bool String8::operator!=(const char* other) const 447 { 448 return strcmp(mString, other) != 0; 449 } 450 451 inline bool String8::operator>=(const char* other) const 452 { 453 return strcmp(mString, other) >= 0; 454 } 455 456 inline bool String8::operator>(const char* other) const 457 { 458 return strcmp(mString, other) > 0; 459 } 460 461 inline String8::operator const char*() const 462 { 463 return mString; 464 } 465 466 } // namespace android 467 468 // --------------------------------------------------------------------------- 469 470 #endif // ANDROID_STRING8_H 471