1 /* 2 * Copyright (C) 2011 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "utf.h" 18 19 #include "base/logging.h" 20 #include "mirror/array.h" 21 #include "mirror/object-inl.h" 22 23 namespace art { 24 25 size_t CountModifiedUtf8Chars(const char* utf8) { 26 size_t len = 0; 27 int ic; 28 while ((ic = *utf8++) != '\0') { 29 len++; 30 if ((ic & 0x80) == 0) { 31 // one-byte encoding 32 continue; 33 } 34 // two- or three-byte encoding 35 utf8++; 36 if ((ic & 0x20) == 0) { 37 // two-byte encoding 38 continue; 39 } 40 // three-byte encoding 41 utf8++; 42 } 43 return len; 44 } 45 46 void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, const char* utf8_data_in) { 47 while (*utf8_data_in != '\0') { 48 *utf16_data_out++ = GetUtf16FromUtf8(&utf8_data_in); 49 } 50 } 51 52 void ConvertUtf16ToModifiedUtf8(char* utf8_out, const uint16_t* utf16_in, size_t char_count) { 53 while (char_count--) { 54 uint16_t ch = *utf16_in++; 55 if (ch > 0 && ch <= 0x7f) { 56 *utf8_out++ = ch; 57 } else { 58 if (ch > 0x07ff) { 59 *utf8_out++ = (ch >> 12) | 0xe0; 60 *utf8_out++ = ((ch >> 6) & 0x3f) | 0x80; 61 *utf8_out++ = (ch & 0x3f) | 0x80; 62 } else /*(ch > 0x7f || ch == 0)*/ { 63 *utf8_out++ = (ch >> 6) | 0xc0; 64 *utf8_out++ = (ch & 0x3f) | 0x80; 65 } 66 } 67 } 68 } 69 70 int32_t ComputeUtf16Hash(const mirror::CharArray* chars, int32_t offset, 71 size_t char_count) { 72 int32_t hash = 0; 73 for (size_t i = 0; i < char_count; i++) { 74 hash = hash * 31 + chars->Get(offset + i); 75 } 76 return hash; 77 } 78 79 int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count) { 80 int32_t hash = 0; 81 while (char_count--) { 82 hash = hash * 31 + *chars++; 83 } 84 return hash; 85 } 86 87 88 uint16_t GetUtf16FromUtf8(const char** utf8_data_in) { 89 uint8_t one = *(*utf8_data_in)++; 90 if ((one & 0x80) == 0) { 91 // one-byte encoding 92 return one; 93 } 94 // two- or three-byte encoding 95 uint8_t two = *(*utf8_data_in)++; 96 if ((one & 0x20) == 0) { 97 // two-byte encoding 98 return ((one & 0x1f) << 6) | (two & 0x3f); 99 } 100 // three-byte encoding 101 uint8_t three = *(*utf8_data_in)++; 102 return ((one & 0x0f) << 12) | ((two & 0x3f) << 6) | (three & 0x3f); 103 } 104 105 int CompareModifiedUtf8ToModifiedUtf8AsUtf16CodePointValues(const char* utf8_1, const char* utf8_2) { 106 for (;;) { 107 if (*utf8_1 == '\0') { 108 return (*utf8_2 == '\0') ? 0 : -1; 109 } else if (*utf8_2 == '\0') { 110 return 1; 111 } 112 113 int c1 = GetUtf16FromUtf8(&utf8_1); 114 int c2 = GetUtf16FromUtf8(&utf8_2); 115 116 if (c1 != c2) { 117 return c1 > c2 ? 1 : -1; 118 } 119 } 120 } 121 122 int CompareModifiedUtf8ToUtf16AsCodePointValues(const char* utf8_1, const uint16_t* utf8_2) { 123 for (;;) { 124 if (*utf8_1 == '\0') { 125 return (*utf8_2 == '\0') ? 0 : -1; 126 } else if (*utf8_2 == '\0') { 127 return 1; 128 } 129 130 int c1 = GetUtf16FromUtf8(&utf8_1); 131 int c2 = *utf8_2; 132 133 if (c1 != c2) { 134 return c1 > c2 ? 1 : -1; 135 } 136 } 137 } 138 139 size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count) { 140 size_t result = 0; 141 while (char_count--) { 142 uint16_t ch = *chars++; 143 if (ch > 0 && ch <= 0x7f) { 144 ++result; 145 } else { 146 if (ch > 0x7ff) { 147 result += 3; 148 } else { 149 result += 2; 150 } 151 } 152 } 153 return result; 154 } 155 156 } // namespace art 157