1 // Copyright 2018 Google LLC. 2 // Use of this source code is governed by a BSD-style license that can be found in the LICENSE file. 3 #ifndef SkUTF_DEFINED 4 #define SkUTF_DEFINED 5 6 #include <cstddef> 7 #include <cstdint> 8 9 typedef int32_t SkUnichar; 10 11 namespace SkUTF { 12 13 /** Given a sequence of UTF-8 bytes, return the number of unicode codepoints. 14 If the sequence is invalid UTF-8, return -1. 15 */ 16 int CountUTF8(const char* utf8, size_t byteLength); 17 18 /** Given a sequence of aligned UTF-16 characters in machine-endian form, 19 return the number of unicode codepoints. If the sequence is invalid 20 UTF-16, return -1. 21 */ 22 int CountUTF16(const uint16_t* utf16, size_t byteLength); 23 24 /** Given a sequence of aligned UTF-32 characters in machine-endian form, 25 return the number of unicode codepoints. If the sequence is invalid 26 UTF-32, return -1. 27 */ 28 int CountUTF32(const int32_t* utf32, size_t byteLength); 29 30 /** Given a sequence of UTF-8 bytes, return the first unicode codepoint. 31 The pointer will be incremented to point at the next codepoint's start. If 32 invalid UTF-8 is encountered, set *ptr to end and return -1. 33 */ 34 SkUnichar NextUTF8(const char** ptr, const char* end); 35 36 /** Given a sequence of aligned UTF-16 characters in machine-endian form, 37 return the first unicode codepoint. The pointer will be incremented to 38 point at the next codepoint's start. If invalid UTF-16 is encountered, 39 set *ptr to end and return -1. 40 */ 41 SkUnichar NextUTF16(const uint16_t** ptr, const uint16_t* end); 42 43 /** Given a sequence of aligned UTF-32 characters in machine-endian form, 44 return the first unicode codepoint. The pointer will be incremented to 45 point at the next codepoint's start. If invalid UTF-32 is encountered, 46 set *ptr to end and return -1. 47 */ 48 SkUnichar NextUTF32(const int32_t** ptr, const int32_t* end); 49 50 constexpr unsigned kMaxBytesInUTF8Sequence = 4; 51 52 /** Convert the unicode codepoint into UTF-8. If `utf8` is non-null, place the 53 result in that array. Return the number of bytes in the result. If `utf8` 54 is null, simply return the number of bytes that would be used. For invalid 55 unicode codepoints, return 0. 56 */ 57 size_t ToUTF8(SkUnichar uni, char utf8[kMaxBytesInUTF8Sequence] = nullptr); 58 59 /** Convert the unicode codepoint into UTF-16. If `utf16` is non-null, place 60 the result in that array. Return the number of UTF-16 code units in the 61 result (1 or 2). If `utf16` is null, simply return the number of code 62 units that would be used. For invalid unicode codepoints, return 0. 63 */ 64 size_t ToUTF16(SkUnichar uni, uint16_t utf16[2] = nullptr); 65 66 } // namespace SkUTF 67 68 #endif // SkUTF_DEFINED 69