1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/utf_string_conversions.h" 6 7 #include "base/string_piece.h" 8 #include "base/utf_string_conversion_utils.h" 9 10 using base::PrepareForUTF8Output; 11 using base::PrepareForUTF16Or32Output; 12 using base::ReadUnicodeCharacter; 13 using base::WriteUnicodeCharacter; 14 15 namespace { 16 17 // Generalized Unicode converter ----------------------------------------------- 18 19 // Converts the given source Unicode character type to the given destination 20 // Unicode character type as a STL string. The given input buffer and size 21 // determine the source, and the given output STL string will be replaced by 22 // the result. 23 template<typename SRC_CHAR, typename DEST_STRING> 24 bool ConvertUnicode(const SRC_CHAR* src, 25 size_t src_len, 26 DEST_STRING* output) { 27 // ICU requires 32-bit numbers. 28 bool success = true; 29 int32 src_len32 = static_cast<int32>(src_len); 30 for (int32 i = 0; i < src_len32; i++) { 31 uint32 code_point; 32 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { 33 WriteUnicodeCharacter(code_point, output); 34 } else { 35 WriteUnicodeCharacter(0xFFFD, output); 36 success = false; 37 } 38 } 39 40 return success; 41 } 42 43 } // namespace 44 45 // UTF-8 <-> Wide -------------------------------------------------------------- 46 47 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { 48 PrepareForUTF8Output(src, src_len, output); 49 return ConvertUnicode(src, src_len, output); 50 } 51 52 std::string WideToUTF8(const std::wstring& wide) { 53 std::string ret; 54 // Ignore the success flag of this call, it will do the best it can for 55 // invalid input, which is what we want here. 56 WideToUTF8(wide.data(), wide.length(), &ret); 57 return ret; 58 } 59 60 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { 61 PrepareForUTF16Or32Output(src, src_len, output); 62 return ConvertUnicode(src, src_len, output); 63 } 64 65 std::wstring UTF8ToWide(const base::StringPiece& utf8) { 66 std::wstring ret; 67 UTF8ToWide(utf8.data(), utf8.length(), &ret); 68 return ret; 69 } 70 71 // UTF-16 <-> Wide ------------------------------------------------------------- 72 73 #if defined(WCHAR_T_IS_UTF16) 74 75 // When wide == UTF-16, then conversions are a NOP. 76 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 77 output->assign(src, src_len); 78 return true; 79 } 80 81 string16 WideToUTF16(const std::wstring& wide) { 82 return wide; 83 } 84 85 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 86 output->assign(src, src_len); 87 return true; 88 } 89 90 std::wstring UTF16ToWide(const string16& utf16) { 91 return utf16; 92 } 93 94 #elif defined(WCHAR_T_IS_UTF32) 95 96 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 97 output->clear(); 98 // Assume that normally we won't have any non-BMP characters so the counts 99 // will be the same. 100 output->reserve(src_len); 101 return ConvertUnicode(src, src_len, output); 102 } 103 104 string16 WideToUTF16(const std::wstring& wide) { 105 string16 ret; 106 WideToUTF16(wide.data(), wide.length(), &ret); 107 return ret; 108 } 109 110 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 111 output->clear(); 112 // Assume that normally we won't have any non-BMP characters so the counts 113 // will be the same. 114 output->reserve(src_len); 115 return ConvertUnicode(src, src_len, output); 116 } 117 118 std::wstring UTF16ToWide(const string16& utf16) { 119 std::wstring ret; 120 UTF16ToWide(utf16.data(), utf16.length(), &ret); 121 return ret; 122 } 123 124 #endif // defined(WCHAR_T_IS_UTF32) 125 126 // UTF16 <-> UTF8 -------------------------------------------------------------- 127 128 #if defined(WCHAR_T_IS_UTF32) 129 130 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 131 PrepareForUTF16Or32Output(src, src_len, output); 132 return ConvertUnicode(src, src_len, output); 133 } 134 135 string16 UTF8ToUTF16(const std::string& utf8) { 136 string16 ret; 137 // Ignore the success flag of this call, it will do the best it can for 138 // invalid input, which is what we want here. 139 UTF8ToUTF16(utf8.data(), utf8.length(), &ret); 140 return ret; 141 } 142 143 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 144 PrepareForUTF8Output(src, src_len, output); 145 return ConvertUnicode(src, src_len, output); 146 } 147 148 std::string UTF16ToUTF8(const string16& utf16) { 149 std::string ret; 150 // Ignore the success flag of this call, it will do the best it can for 151 // invalid input, which is what we want here. 152 UTF16ToUTF8(utf16.data(), utf16.length(), &ret); 153 return ret; 154 } 155 156 #elif defined(WCHAR_T_IS_UTF16) 157 // Easy case since we can use the "wide" versions we already wrote above. 158 159 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 160 return UTF8ToWide(src, src_len, output); 161 } 162 163 string16 UTF8ToUTF16(const std::string& utf8) { 164 return UTF8ToWide(utf8); 165 } 166 167 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 168 return WideToUTF8(src, src_len, output); 169 } 170 171 std::string UTF16ToUTF8(const string16& utf16) { 172 return WideToUTF8(utf16); 173 } 174 175 #endif 176