Home | History | Annotate | Download | only in utils
      1 // Copyright 2018 Google LLC.
      2 // Use of this source code is governed by a BSD-style license that can be found in the LICENSE file.
      3 #ifndef SkUTF_DEFINED
      4 #define SkUTF_DEFINED
      5 
      6 #include <cstddef>
      7 #include <cstdint>
      8 
      9 typedef int32_t SkUnichar;
     10 
     11 namespace SkUTF {
     12 
     13 /** Given a sequence of UTF-8 bytes, return the number of unicode codepoints.
     14     If the sequence is invalid UTF-8, return -1.
     15 */
     16 int CountUTF8(const char* utf8, size_t byteLength);
     17 
     18 /** Given a sequence of aligned UTF-16 characters in machine-endian form,
     19     return the number of unicode codepoints.  If the sequence is invalid
     20     UTF-16, return -1.
     21 */
     22 int CountUTF16(const uint16_t* utf16, size_t byteLength);
     23 
     24 /** Given a sequence of aligned UTF-32 characters in machine-endian form,
     25     return the number of unicode codepoints.  If the sequence is invalid
     26     UTF-32, return -1.
     27 */
     28 int CountUTF32(const int32_t* utf32, size_t byteLength);
     29 
     30 /** Given a sequence of UTF-8 bytes, return the first unicode codepoint.
     31     The pointer will be incremented to point at the next codepoint's start.  If
     32     invalid UTF-8 is encountered, set *ptr to end and return -1.
     33 */
     34 SkUnichar NextUTF8(const char** ptr, const char* end);
     35 
     36 /** Given a sequence of aligned UTF-16 characters in machine-endian form,
     37     return the first unicode codepoint.  The pointer will be incremented to
     38     point at the next codepoint's start.  If invalid UTF-16 is encountered,
     39     set *ptr to end and return -1.
     40 */
     41 SkUnichar NextUTF16(const uint16_t** ptr, const uint16_t* end);
     42 
     43 /** Given a sequence of aligned UTF-32 characters in machine-endian form,
     44     return the first unicode codepoint.  The pointer will be incremented to
     45     point at the next codepoint's start.  If invalid UTF-32 is encountered,
     46     set *ptr to end and return -1.
     47 */
     48 SkUnichar NextUTF32(const int32_t** ptr, const int32_t* end);
     49 
     50 constexpr unsigned kMaxBytesInUTF8Sequence = 4;
     51 
     52 /** Convert the unicode codepoint into UTF-8.  If `utf8` is non-null, place the
     53     result in that array.  Return the number of bytes in the result.  If `utf8`
     54     is null, simply return the number of bytes that would be used.  For invalid
     55     unicode codepoints, return 0.
     56 */
     57 size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr);
     58 
     59 /** Convert the unicode codepoint into UTF-16.  If `utf16` is non-null, place
     60     the result in that array.  Return the number of UTF-16 code units in the
     61     result (1 or 2).  If `utf16` is null, simply return the number of code
     62     units that would be used.  For invalid unicode codepoints, return 0.
     63 */
     64 size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr);
     65 
     66 }  // namespace SkUTF
     67 
     68 #endif  // SkUTF_DEFINED
     69