1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/utf_string_conversions.h" 6 7 #include "base/string_piece.h" 8 #include "base/string_util.h" 9 #include "base/utf_string_conversion_utils.h" 10 11 using base::PrepareForUTF8Output; 12 using base::PrepareForUTF16Or32Output; 13 using base::ReadUnicodeCharacter; 14 using base::WriteUnicodeCharacter; 15 16 namespace { 17 18 // Generalized Unicode converter ----------------------------------------------- 19 20 // Converts the given source Unicode character type to the given destination 21 // Unicode character type as a STL string. The given input buffer and size 22 // determine the source, and the given output STL string will be replaced by 23 // the result. 24 template<typename SRC_CHAR, typename DEST_STRING> 25 bool ConvertUnicode(const SRC_CHAR* src, 26 size_t src_len, 27 DEST_STRING* output) { 28 // ICU requires 32-bit numbers. 29 bool success = true; 30 int32 src_len32 = static_cast<int32>(src_len); 31 for (int32 i = 0; i < src_len32; i++) { 32 uint32 code_point; 33 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { 34 WriteUnicodeCharacter(code_point, output); 35 } else { 36 WriteUnicodeCharacter(0xFFFD, output); 37 success = false; 38 } 39 } 40 41 return success; 42 } 43 44 } // namespace 45 46 // UTF-8 <-> Wide -------------------------------------------------------------- 47 48 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { 49 PrepareForUTF8Output(src, src_len, output); 50 return ConvertUnicode(src, src_len, output); 51 } 52 53 std::string WideToUTF8(const std::wstring& wide) { 54 std::string ret; 55 // Ignore the success flag of this call, it will do the best it can for 56 // invalid input, which is what we want here. 57 WideToUTF8(wide.data(), wide.length(), &ret); 58 return ret; 59 } 60 61 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { 62 PrepareForUTF16Or32Output(src, src_len, output); 63 return ConvertUnicode(src, src_len, output); 64 } 65 66 std::wstring UTF8ToWide(const base::StringPiece& utf8) { 67 std::wstring ret; 68 UTF8ToWide(utf8.data(), utf8.length(), &ret); 69 return ret; 70 } 71 72 // UTF-16 <-> Wide ------------------------------------------------------------- 73 74 #if defined(WCHAR_T_IS_UTF16) 75 76 // When wide == UTF-16, then conversions are a NOP. 77 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 78 output->assign(src, src_len); 79 return true; 80 } 81 82 string16 WideToUTF16(const std::wstring& wide) { 83 return wide; 84 } 85 86 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 87 output->assign(src, src_len); 88 return true; 89 } 90 91 std::wstring UTF16ToWide(const string16& utf16) { 92 return utf16; 93 } 94 95 #elif defined(WCHAR_T_IS_UTF32) 96 97 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 98 output->clear(); 99 // Assume that normally we won't have any non-BMP characters so the counts 100 // will be the same. 101 output->reserve(src_len); 102 return ConvertUnicode(src, src_len, output); 103 } 104 105 string16 WideToUTF16(const std::wstring& wide) { 106 string16 ret; 107 WideToUTF16(wide.data(), wide.length(), &ret); 108 return ret; 109 } 110 111 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 112 output->clear(); 113 // Assume that normally we won't have any non-BMP characters so the counts 114 // will be the same. 115 output->reserve(src_len); 116 return ConvertUnicode(src, src_len, output); 117 } 118 119 std::wstring UTF16ToWide(const string16& utf16) { 120 std::wstring ret; 121 UTF16ToWide(utf16.data(), utf16.length(), &ret); 122 return ret; 123 } 124 125 #endif // defined(WCHAR_T_IS_UTF32) 126 127 // UTF16 <-> UTF8 -------------------------------------------------------------- 128 129 #if defined(WCHAR_T_IS_UTF32) 130 131 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 132 PrepareForUTF16Or32Output(src, src_len, output); 133 return ConvertUnicode(src, src_len, output); 134 } 135 136 string16 UTF8ToUTF16(const base::StringPiece& utf8) { 137 string16 ret; 138 // Ignore the success flag of this call, it will do the best it can for 139 // invalid input, which is what we want here. 140 UTF8ToUTF16(utf8.data(), utf8.length(), &ret); 141 return ret; 142 } 143 144 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 145 PrepareForUTF8Output(src, src_len, output); 146 return ConvertUnicode(src, src_len, output); 147 } 148 149 std::string UTF16ToUTF8(const string16& utf16) { 150 std::string ret; 151 // Ignore the success flag of this call, it will do the best it can for 152 // invalid input, which is what we want here. 153 UTF16ToUTF8(utf16.data(), utf16.length(), &ret); 154 return ret; 155 } 156 157 #elif defined(WCHAR_T_IS_UTF16) 158 // Easy case since we can use the "wide" versions we already wrote above. 159 160 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 161 return UTF8ToWide(src, src_len, output); 162 } 163 164 string16 UTF8ToUTF16(const base::StringPiece& utf8) { 165 return UTF8ToWide(utf8); 166 } 167 168 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 169 return WideToUTF8(src, src_len, output); 170 } 171 172 std::string UTF16ToUTF8(const string16& utf16) { 173 return WideToUTF8(utf16); 174 } 175 176 #endif 177 178 std::wstring ASCIIToWide(const base::StringPiece& ascii) { 179 DCHECK(IsStringASCII(ascii)) << ascii; 180 return std::wstring(ascii.begin(), ascii.end()); 181 } 182 183 string16 ASCIIToUTF16(const base::StringPiece& ascii) { 184 DCHECK(IsStringASCII(ascii)) << ascii; 185 return string16(ascii.begin(), ascii.end()); 186 } 187