Home | History | Annotate | Download | only in utils
      1 /*
      2  * Copyright (C) 2005 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ANDROID_STRING8_H
     18 #define ANDROID_STRING8_H
     19 
     20 #include <utils/Errors.h>
     21 
     22 // Need this for the char16_t type; String8.h should not
     23 // be depedent on the String16 class.
     24 #include <utils/String16.h>
     25 
     26 #include <stdint.h>
     27 #include <string.h>
     28 #include <sys/types.h>
     29 
     30 // ---------------------------------------------------------------------------
     31 
     32 extern "C" {
     33 
     34 typedef uint32_t char32_t;
     35 
     36 size_t strlen32(const char32_t *);
     37 size_t strnlen32(const char32_t *, size_t);
     38 
     39 /*
     40  * Returns the length of "src" when "src" is valid UTF-8 string.
     41  * Returns 0 if src is NULL, 0-length string or non UTF-8 string.
     42  * This function should be used to determine whether "src" is valid UTF-8
     43  * characters with valid unicode codepoints. "src" must be null-terminated.
     44  *
     45  * If you are going to use other GetUtf... functions defined in this header
     46  * with string which may not be valid UTF-8 with valid codepoint (form 0 to
     47  * 0x10FFFF), you should use this function before calling others, since the
     48  * other functions do not check whether the string is valid UTF-8 or not.
     49  *
     50  * If you do not care whether "src" is valid UTF-8 or not, you should use
     51  * strlen() as usual, which should be much faster.
     52  */
     53 size_t utf8_length(const char *src);
     54 
     55 /*
     56  * Returns the UTF-32 length of "src".
     57  */
     58 size_t utf32_length(const char *src, size_t src_len);
     59 
     60 /*
     61  * Returns the UTF-8 length of "src".
     62  */
     63 size_t utf8_length_from_utf16(const char16_t *src, size_t src_len);
     64 
     65 /*
     66  * Returns the UTF-8 length of "src".
     67  */
     68 size_t utf8_length_from_utf32(const char32_t *src, size_t src_len);
     69 
     70 /*
     71  * Returns the unicode value at "index".
     72  * Returns -1 when the index is invalid (equals to or more than "src_len").
     73  * If returned value is positive, it is able to be converted to char32_t, which
     74  * is unsigned. Then, if "next_index" is not NULL, the next index to be used is
     75  * stored in "next_index". "next_index" can be NULL.
     76  */
     77 int32_t utf32_at(const char *src, size_t src_len,
     78                  size_t index, size_t *next_index);
     79 
     80 /*
     81  * Stores a UTF-32 string converted from "src" in "dst", if "dst_length" is not
     82  * large enough to store the string, the part of the "src" string is stored
     83  * into "dst".
     84  * Returns the size actually used for storing the string.
     85  * "dst" is not null-terminated when dst_len is fully used (like strncpy).
     86  */
     87 size_t utf8_to_utf32(const char* src, size_t src_len,
     88                      char32_t* dst, size_t dst_len);
     89 
     90 /*
     91  * Stores a UTF-8 string converted from "src" in "dst", if "dst_length" is not
     92  * large enough to store the string, the part of the "src" string is stored
     93  * into "dst" as much as possible. See the examples for more detail.
     94  * Returns the size actually used for storing the string.
     95  * dst" is not null-terminated when dst_len is fully used (like strncpy).
     96  *
     97  * Example 1
     98  * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84)
     99  * "src_len" == 2
    100  * "dst_len" >= 7
    101  * ->
    102  * Returned value == 6
    103  * "dst" becomes \xE3\x81\x82\xE3\x81\x84\0
    104  * (note that "dst" is null-terminated)
    105  *
    106  * Example 2
    107  * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84)
    108  * "src_len" == 2
    109  * "dst_len" == 5
    110  * ->
    111  * Returned value == 3
    112  * "dst" becomes \xE3\x81\x82\0
    113  * (note that "dst" is null-terminated, but \u3044 is not stored in "dst"
    114  * since "dst" does not have enough size to store the character)
    115  *
    116  * Example 3
    117  * "src" == \u3042\u3044 (\xE3\x81\x82\xE3\x81\x84)
    118  * "src_len" == 2
    119  * "dst_len" == 6
    120  * ->
    121  * Returned value == 6
    122  * "dst" becomes \xE3\x81\x82\xE3\x81\x84
    123  * (note that "dst" is NOT null-terminated, like strncpy)
    124  */
    125 size_t utf32_to_utf8(const char32_t* src, size_t src_len,
    126                      char* dst, size_t dst_len);
    127 
    128 size_t utf16_to_utf8(const char16_t* src, size_t src_len,
    129                      char* dst, size_t dst_len);
    130 
    131 }
    132 
    133 // ---------------------------------------------------------------------------
    134 
    135 namespace android {
    136 
    137 class TextOutput;
    138 
    139 //! This is a string holding UTF-8 characters. Does not allow the value more
    140 // than 0x10FFFF, which is not valid unicode codepoint.
    141 class String8
    142 {
    143 public:
    144                                 String8();
    145                                 String8(const String8& o);
    146     explicit                    String8(const char* o);
    147     explicit                    String8(const char* o, size_t numChars);
    148 
    149     explicit                    String8(const String16& o);
    150     explicit                    String8(const char16_t* o);
    151     explicit                    String8(const char16_t* o, size_t numChars);
    152     explicit                    String8(const char32_t* o);
    153     explicit                    String8(const char32_t* o, size_t numChars);
    154                                 ~String8();
    155 
    156     inline  const char*         string() const;
    157     inline  size_t              size() const;
    158     inline  size_t              length() const;
    159     inline  size_t              bytes() const;
    160 
    161     inline  const SharedBuffer* sharedBuffer() const;
    162 
    163             void                setTo(const String8& other);
    164             status_t            setTo(const char* other);
    165             status_t            setTo(const char* other, size_t numChars);
    166             status_t            setTo(const char16_t* other, size_t numChars);
    167             status_t            setTo(const char32_t* other,
    168                                       size_t length);
    169 
    170             status_t            append(const String8& other);
    171             status_t            append(const char* other);
    172             status_t            append(const char* other, size_t numChars);
    173 
    174             status_t            appendFormat(const char* fmt, ...)
    175                     __attribute__((format (printf, 2, 3)));
    176 
    177             // Note that this function takes O(N) time to calculate the value.
    178             // No cache value is stored.
    179             size_t              getUtf32Length() const;
    180             int32_t             getUtf32At(size_t index,
    181                                            size_t *next_index) const;
    182             size_t              getUtf32(char32_t* dst, size_t dst_len) const;
    183 
    184     inline  String8&            operator=(const String8& other);
    185     inline  String8&            operator=(const char* other);
    186 
    187     inline  String8&            operator+=(const String8& other);
    188     inline  String8             operator+(const String8& other) const;
    189 
    190     inline  String8&            operator+=(const char* other);
    191     inline  String8             operator+(const char* other) const;
    192 
    193     inline  int                 compare(const String8& other) const;
    194 
    195     inline  bool                operator<(const String8& other) const;
    196     inline  bool                operator<=(const String8& other) const;
    197     inline  bool                operator==(const String8& other) const;
    198     inline  bool                operator!=(const String8& other) const;
    199     inline  bool                operator>=(const String8& other) const;
    200     inline  bool                operator>(const String8& other) const;
    201 
    202     inline  bool                operator<(const char* other) const;
    203     inline  bool                operator<=(const char* other) const;
    204     inline  bool                operator==(const char* other) const;
    205     inline  bool                operator!=(const char* other) const;
    206     inline  bool                operator>=(const char* other) const;
    207     inline  bool                operator>(const char* other) const;
    208 
    209     inline                      operator const char*() const;
    210 
    211             char*               lockBuffer(size_t size);
    212             void                unlockBuffer();
    213             status_t            unlockBuffer(size_t size);
    214 
    215             // return the index of the first byte of other in this at or after
    216             // start, or -1 if not found
    217             ssize_t             find(const char* other, size_t start = 0) const;
    218 
    219             void                toLower();
    220             void                toLower(size_t start, size_t numChars);
    221             void                toUpper();
    222             void                toUpper(size_t start, size_t numChars);
    223 
    224     /*
    225      * These methods operate on the string as if it were a path name.
    226      */
    227 
    228     /*
    229      * Set the filename field to a specific value.
    230      *
    231      * Normalizes the filename, removing a trailing '/' if present.
    232      */
    233     void setPathName(const char* name);
    234     void setPathName(const char* name, size_t numChars);
    235 
    236     /*
    237      * Get just the filename component.
    238      *
    239      * "/tmp/foo/bar.c" --> "bar.c"
    240      */
    241     String8 getPathLeaf(void) const;
    242 
    243     /*
    244      * Remove the last (file name) component, leaving just the directory
    245      * name.
    246      *
    247      * "/tmp/foo/bar.c" --> "/tmp/foo"
    248      * "/tmp" --> "" // ????? shouldn't this be "/" ???? XXX
    249      * "bar.c" --> ""
    250      */
    251     String8 getPathDir(void) const;
    252 
    253     /*
    254      * Retrieve the front (root dir) component.  Optionally also return the
    255      * remaining components.
    256      *
    257      * "/tmp/foo/bar.c" --> "tmp" (remain = "foo/bar.c")
    258      * "/tmp" --> "tmp" (remain = "")
    259      * "bar.c" --> "bar.c" (remain = "")
    260      */
    261     String8 walkPath(String8* outRemains = NULL) const;
    262 
    263     /*
    264      * Return the filename extension.  This is the last '.' and up to
    265      * four characters that follow it.  The '.' is included in case we
    266      * decide to expand our definition of what constitutes an extension.
    267      *
    268      * "/tmp/foo/bar.c" --> ".c"
    269      * "/tmp" --> ""
    270      * "/tmp/foo.bar/baz" --> ""
    271      * "foo.jpeg" --> ".jpeg"
    272      * "foo." --> ""
    273      */
    274     String8 getPathExtension(void) const;
    275 
    276     /*
    277      * Return the path without the extension.  Rules for what constitutes
    278      * an extension are described in the comment for getPathExtension().
    279      *
    280      * "/tmp/foo/bar.c" --> "/tmp/foo/bar"
    281      */
    282     String8 getBasePath(void) const;
    283 
    284     /*
    285      * Add a component to the pathname.  We guarantee that there is
    286      * exactly one path separator between the old path and the new.
    287      * If there is no existing name, we just copy the new name in.
    288      *
    289      * If leaf is a fully qualified path (i.e. starts with '/', it
    290      * replaces whatever was there before.
    291      */
    292     String8& appendPath(const char* leaf);
    293     String8& appendPath(const String8& leaf)  { return appendPath(leaf.string()); }
    294 
    295     /*
    296      * Like appendPath(), but does not affect this string.  Returns a new one instead.
    297      */
    298     String8 appendPathCopy(const char* leaf) const
    299                                              { String8 p(*this); p.appendPath(leaf); return p; }
    300     String8 appendPathCopy(const String8& leaf) const { return appendPathCopy(leaf.string()); }
    301 
    302     /*
    303      * Converts all separators in this string to /, the default path separator.
    304      *
    305      * If the default OS separator is backslash, this converts all
    306      * backslashes to slashes, in-place. Otherwise it does nothing.
    307      * Returns self.
    308      */
    309     String8& convertToResPath();
    310 
    311 private:
    312             status_t            real_append(const char* other, size_t numChars);
    313             char*               find_extension(void) const;
    314 
    315             const char* mString;
    316 };
    317 
    318 TextOutput& operator<<(TextOutput& to, const String16& val);
    319 
    320 // ---------------------------------------------------------------------------
    321 // No user servicable parts below.
    322 
    323 inline int compare_type(const String8& lhs, const String8& rhs)
    324 {
    325     return lhs.compare(rhs);
    326 }
    327 
    328 inline int strictly_order_type(const String8& lhs, const String8& rhs)
    329 {
    330     return compare_type(lhs, rhs) < 0;
    331 }
    332 
    333 inline const char* String8::string() const
    334 {
    335     return mString;
    336 }
    337 
    338 inline size_t String8::length() const
    339 {
    340     return SharedBuffer::sizeFromData(mString)-1;
    341 }
    342 
    343 inline size_t String8::size() const
    344 {
    345     return length();
    346 }
    347 
    348 inline size_t String8::bytes() const
    349 {
    350     return SharedBuffer::sizeFromData(mString)-1;
    351 }
    352 
    353 inline const SharedBuffer* String8::sharedBuffer() const
    354 {
    355     return SharedBuffer::bufferFromData(mString);
    356 }
    357 
    358 inline String8& String8::operator=(const String8& other)
    359 {
    360     setTo(other);
    361     return *this;
    362 }
    363 
    364 inline String8& String8::operator=(const char* other)
    365 {
    366     setTo(other);
    367     return *this;
    368 }
    369 
    370 inline String8& String8::operator+=(const String8& other)
    371 {
    372     append(other);
    373     return *this;
    374 }
    375 
    376 inline String8 String8::operator+(const String8& other) const
    377 {
    378     String8 tmp(*this);
    379     tmp += other;
    380     return tmp;
    381 }
    382 
    383 inline String8& String8::operator+=(const char* other)
    384 {
    385     append(other);
    386     return *this;
    387 }
    388 
    389 inline String8 String8::operator+(const char* other) const
    390 {
    391     String8 tmp(*this);
    392     tmp += other;
    393     return tmp;
    394 }
    395 
    396 inline int String8::compare(const String8& other) const
    397 {
    398     return strcmp(mString, other.mString);
    399 }
    400 
    401 inline bool String8::operator<(const String8& other) const
    402 {
    403     return strcmp(mString, other.mString) < 0;
    404 }
    405 
    406 inline bool String8::operator<=(const String8& other) const
    407 {
    408     return strcmp(mString, other.mString) <= 0;
    409 }
    410 
    411 inline bool String8::operator==(const String8& other) const
    412 {
    413     return strcmp(mString, other.mString) == 0;
    414 }
    415 
    416 inline bool String8::operator!=(const String8& other) const
    417 {
    418     return strcmp(mString, other.mString) != 0;
    419 }
    420 
    421 inline bool String8::operator>=(const String8& other) const
    422 {
    423     return strcmp(mString, other.mString) >= 0;
    424 }
    425 
    426 inline bool String8::operator>(const String8& other) const
    427 {
    428     return strcmp(mString, other.mString) > 0;
    429 }
    430 
    431 inline bool String8::operator<(const char* other) const
    432 {
    433     return strcmp(mString, other) < 0;
    434 }
    435 
    436 inline bool String8::operator<=(const char* other) const
    437 {
    438     return strcmp(mString, other) <= 0;
    439 }
    440 
    441 inline bool String8::operator==(const char* other) const
    442 {
    443     return strcmp(mString, other) == 0;
    444 }
    445 
    446 inline bool String8::operator!=(const char* other) const
    447 {
    448     return strcmp(mString, other) != 0;
    449 }
    450 
    451 inline bool String8::operator>=(const char* other) const
    452 {
    453     return strcmp(mString, other) >= 0;
    454 }
    455 
    456 inline bool String8::operator>(const char* other) const
    457 {
    458     return strcmp(mString, other) > 0;
    459 }
    460 
    461 inline String8::operator const char*() const
    462 {
    463     return mString;
    464 }
    465 
    466 }  // namespace android
    467 
    468 // ---------------------------------------------------------------------------
    469 
    470 #endif // ANDROID_STRING8_H
    471