1 /* 2 * Copyright (C) 2015 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <cstdlib> 18 #include <string> 19 #include <vector> 20 21 #include <cutils/log.h> 22 #include <unicode/utf.h> 23 #include <unicode/utf8.h> 24 25 #include "minikin/U16StringPiece.h" 26 27 namespace minikin { 28 29 // src is of the form "U+1F431 | 'h' 'i'". Position of "|" gets saved to offset if non-null. 30 // Size is returned in an out parameter because gtest needs a void return for ASSERT to work. 31 void ParseUnicode(uint16_t* buf, size_t buf_size, const char* src, size_t* result_size, 32 size_t* offset) { 33 size_t input_ix = 0; 34 size_t output_ix = 0; 35 bool seen_offset = false; 36 37 while (src[input_ix] != 0) { 38 switch (src[input_ix]) { 39 case '\'': 40 // single ASCII char 41 LOG_ALWAYS_FATAL_IF(static_cast<uint8_t>(src[input_ix]) >= 0x80); 42 input_ix++; 43 LOG_ALWAYS_FATAL_IF(src[input_ix] == 0); 44 LOG_ALWAYS_FATAL_IF(output_ix >= buf_size); 45 buf[output_ix++] = (uint16_t)src[input_ix++]; 46 LOG_ALWAYS_FATAL_IF(src[input_ix] != '\''); 47 input_ix++; 48 break; 49 case 'u': 50 case 'U': { 51 // Unicode codepoint in hex syntax 52 input_ix++; 53 LOG_ALWAYS_FATAL_IF(src[input_ix] != '+'); 54 input_ix++; 55 char* endptr = (char*)src + input_ix; 56 unsigned long int codepoint = strtoul(src + input_ix, &endptr, 16); 57 size_t num_hex_digits = endptr - (src + input_ix); 58 59 // also triggers on invalid number syntax, digits = 0 60 LOG_ALWAYS_FATAL_IF(num_hex_digits < 4u); 61 LOG_ALWAYS_FATAL_IF(num_hex_digits > 6u); 62 LOG_ALWAYS_FATAL_IF(codepoint > 0x10FFFFu); 63 input_ix += num_hex_digits; 64 if (U16_LENGTH(codepoint) == 1) { 65 LOG_ALWAYS_FATAL_IF(output_ix + 1 > buf_size); 66 buf[output_ix++] = codepoint; 67 } else { 68 // UTF-16 encoding 69 LOG_ALWAYS_FATAL_IF(output_ix + 2 > buf_size); 70 buf[output_ix++] = U16_LEAD(codepoint); 71 buf[output_ix++] = U16_TRAIL(codepoint); 72 } 73 break; 74 } 75 case ' ': 76 input_ix++; 77 break; 78 case '|': 79 LOG_ALWAYS_FATAL_IF(seen_offset); 80 LOG_ALWAYS_FATAL_IF(offset == nullptr); 81 *offset = output_ix; 82 seen_offset = true; 83 input_ix++; 84 break; 85 default: 86 LOG_ALWAYS_FATAL("Unexpected Character"); 87 } 88 } 89 LOG_ALWAYS_FATAL_IF(result_size == nullptr); 90 *result_size = output_ix; 91 LOG_ALWAYS_FATAL_IF(!seen_offset && offset != nullptr); 92 } 93 94 std::vector<uint16_t> parseUnicodeStringWithOffset(const std::string& in, size_t* offset) { 95 std::unique_ptr<uint16_t[]> buffer(new uint16_t[in.size()]); 96 size_t result_size = 0; 97 ParseUnicode(buffer.get(), in.size(), in.c_str(), &result_size, offset); 98 return std::vector<uint16_t>(buffer.get(), buffer.get() + result_size); 99 } 100 101 std::vector<uint16_t> parseUnicodeString(const std::string& in) { 102 return parseUnicodeStringWithOffset(in, nullptr); 103 } 104 105 std::vector<uint16_t> utf8ToUtf16(const std::string& text) { 106 std::vector<uint16_t> result; 107 int32_t i = 0; 108 const int32_t textLength = static_cast<int32_t>(text.size()); 109 uint32_t c = 0; 110 while (i < textLength) { 111 U8_NEXT(text.c_str(), i, textLength, c); 112 if (U16_LENGTH(c) == 1) { 113 result.push_back(c); 114 } else { 115 result.push_back(U16_LEAD(c)); 116 result.push_back(U16_TRAIL(c)); 117 } 118 } 119 return result; 120 } 121 122 std::string utf16ToUtf8(const U16StringPiece& u16String) { 123 const uint32_t textLength = u16String.size(); 124 uint32_t i = 0; 125 uint32_t c = 0; 126 127 std::string out; 128 out.reserve(textLength * 4); 129 130 while (i < textLength) { 131 U16_NEXT(u16String.data(), i, textLength, c); 132 133 char buf[U8_MAX_LENGTH] = {}; 134 uint32_t outIndex = 0; 135 U8_APPEND_UNSAFE(buf, outIndex, c); 136 out.append(buf, outIndex); 137 } 138 return out; 139 } 140 141 } // namespace minikin 142