1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/utf_string_conversions.h" 6 7 #include <stdint.h> 8 9 #include "base/strings/string_piece.h" 10 #include "base/strings/string_util.h" 11 #include "base/strings/utf_string_conversion_utils.h" 12 #include "build/build_config.h" 13 14 namespace base { 15 16 namespace { 17 18 // Generalized Unicode converter ----------------------------------------------- 19 20 // Converts the given source Unicode character type to the given destination 21 // Unicode character type as a STL string. The given input buffer and size 22 // determine the source, and the given output STL string will be replaced by 23 // the result. 24 template<typename SRC_CHAR, typename DEST_STRING> 25 bool ConvertUnicode(const SRC_CHAR* src, 26 size_t src_len, 27 DEST_STRING* output) { 28 // ICU requires 32-bit numbers. 29 bool success = true; 30 int32_t src_len32 = static_cast<int32_t>(src_len); 31 for (int32_t i = 0; i < src_len32; i++) { 32 uint32_t code_point; 33 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { 34 WriteUnicodeCharacter(code_point, output); 35 } else { 36 WriteUnicodeCharacter(0xFFFD, output); 37 success = false; 38 } 39 } 40 41 return success; 42 } 43 44 } // namespace 45 46 // UTF-8 <-> Wide -------------------------------------------------------------- 47 48 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { 49 if (IsStringASCII(std::wstring(src, src_len))) { 50 output->assign(src, src + src_len); 51 return true; 52 } else { 53 PrepareForUTF8Output(src, src_len, output); 54 return ConvertUnicode(src, src_len, output); 55 } 56 } 57 58 std::string WideToUTF8(const std::wstring& wide) { 59 if (IsStringASCII(wide)) { 60 return std::string(wide.data(), wide.data() + wide.length()); 61 } 62 63 std::string ret; 64 PrepareForUTF8Output(wide.data(), wide.length(), &ret); 65 ConvertUnicode(wide.data(), wide.length(), &ret); 66 return ret; 67 } 68 69 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { 70 if (IsStringASCII(StringPiece(src, src_len))) { 71 output->assign(src, src + src_len); 72 return true; 73 } else { 74 PrepareForUTF16Or32Output(src, src_len, output); 75 return ConvertUnicode(src, src_len, output); 76 } 77 } 78 79 std::wstring UTF8ToWide(StringPiece utf8) { 80 if (IsStringASCII(utf8)) { 81 return std::wstring(utf8.begin(), utf8.end()); 82 } 83 84 std::wstring ret; 85 PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret); 86 ConvertUnicode(utf8.data(), utf8.length(), &ret); 87 return ret; 88 } 89 90 // UTF-16 <-> Wide ------------------------------------------------------------- 91 92 #if defined(WCHAR_T_IS_UTF16) 93 94 // When wide == UTF-16, then conversions are a NOP. 95 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 96 output->assign(src, src_len); 97 return true; 98 } 99 100 string16 WideToUTF16(const std::wstring& wide) { 101 return wide; 102 } 103 104 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 105 output->assign(src, src_len); 106 return true; 107 } 108 109 std::wstring UTF16ToWide(const string16& utf16) { 110 return utf16; 111 } 112 113 #elif defined(WCHAR_T_IS_UTF32) 114 115 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 116 output->clear(); 117 // Assume that normally we won't have any non-BMP characters so the counts 118 // will be the same. 119 output->reserve(src_len); 120 return ConvertUnicode(src, src_len, output); 121 } 122 123 string16 WideToUTF16(const std::wstring& wide) { 124 string16 ret; 125 WideToUTF16(wide.data(), wide.length(), &ret); 126 return ret; 127 } 128 129 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 130 output->clear(); 131 // Assume that normally we won't have any non-BMP characters so the counts 132 // will be the same. 133 output->reserve(src_len); 134 return ConvertUnicode(src, src_len, output); 135 } 136 137 std::wstring UTF16ToWide(const string16& utf16) { 138 std::wstring ret; 139 UTF16ToWide(utf16.data(), utf16.length(), &ret); 140 return ret; 141 } 142 143 #endif // defined(WCHAR_T_IS_UTF32) 144 145 // UTF16 <-> UTF8 -------------------------------------------------------------- 146 147 #if defined(WCHAR_T_IS_UTF32) 148 149 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 150 if (IsStringASCII(StringPiece(src, src_len))) { 151 output->assign(src, src + src_len); 152 return true; 153 } else { 154 PrepareForUTF16Or32Output(src, src_len, output); 155 return ConvertUnicode(src, src_len, output); 156 } 157 } 158 159 string16 UTF8ToUTF16(StringPiece utf8) { 160 if (IsStringASCII(utf8)) { 161 return string16(utf8.begin(), utf8.end()); 162 } 163 164 string16 ret; 165 PrepareForUTF16Or32Output(utf8.data(), utf8.length(), &ret); 166 // Ignore the success flag of this call, it will do the best it can for 167 // invalid input, which is what we want here. 168 ConvertUnicode(utf8.data(), utf8.length(), &ret); 169 return ret; 170 } 171 172 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 173 if (IsStringASCII(StringPiece16(src, src_len))) { 174 output->assign(src, src + src_len); 175 return true; 176 } else { 177 PrepareForUTF8Output(src, src_len, output); 178 return ConvertUnicode(src, src_len, output); 179 } 180 } 181 182 std::string UTF16ToUTF8(StringPiece16 utf16) { 183 if (IsStringASCII(utf16)) { 184 return std::string(utf16.begin(), utf16.end()); 185 } 186 187 std::string ret; 188 // Ignore the success flag of this call, it will do the best it can for 189 // invalid input, which is what we want here. 190 UTF16ToUTF8(utf16.data(), utf16.length(), &ret); 191 return ret; 192 } 193 194 #elif defined(WCHAR_T_IS_UTF16) 195 // Easy case since we can use the "wide" versions we already wrote above. 196 197 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 198 return UTF8ToWide(src, src_len, output); 199 } 200 201 string16 UTF8ToUTF16(StringPiece utf8) { 202 return UTF8ToWide(utf8); 203 } 204 205 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 206 return WideToUTF8(src, src_len, output); 207 } 208 209 std::string UTF16ToUTF8(StringPiece16 utf16) { 210 if (IsStringASCII(utf16)) 211 return std::string(utf16.data(), utf16.data() + utf16.length()); 212 213 std::string ret; 214 PrepareForUTF8Output(utf16.data(), utf16.length(), &ret); 215 ConvertUnicode(utf16.data(), utf16.length(), &ret); 216 return ret; 217 } 218 219 #endif 220 221 string16 ASCIIToUTF16(StringPiece ascii) { 222 DCHECK(IsStringASCII(ascii)) << ascii; 223 return string16(ascii.begin(), ascii.end()); 224 } 225 226 std::string UTF16ToASCII(StringPiece16 utf16) { 227 DCHECK(IsStringASCII(utf16)) << UTF16ToUTF8(utf16); 228 return std::string(utf16.begin(), utf16.end()); 229 } 230 231 } // namespace base 232