1 // Copyright (c) 2010 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/utf_string_conversions.h" 6 7 #include "base/strings/string_piece.h" 8 #include "base/strings/string_util.h" 9 #include "base/strings/utf_string_conversion_utils.h" 10 11 namespace base { 12 13 namespace { 14 15 // Generalized Unicode converter ----------------------------------------------- 16 17 // Converts the given source Unicode character type to the given destination 18 // Unicode character type as a STL string. The given input buffer and size 19 // determine the source, and the given output STL string will be replaced by 20 // the result. 21 template<typename SRC_CHAR, typename DEST_STRING> 22 bool ConvertUnicode(const SRC_CHAR* src, 23 size_t src_len, 24 DEST_STRING* output) { 25 // ICU requires 32-bit numbers. 26 bool success = true; 27 int32 src_len32 = static_cast<int32>(src_len); 28 for (int32 i = 0; i < src_len32; i++) { 29 uint32 code_point; 30 if (ReadUnicodeCharacter(src, src_len32, &i, &code_point)) { 31 WriteUnicodeCharacter(code_point, output); 32 } else { 33 WriteUnicodeCharacter(0xFFFD, output); 34 success = false; 35 } 36 } 37 38 return success; 39 } 40 41 } // namespace 42 43 // UTF-8 <-> Wide -------------------------------------------------------------- 44 45 bool WideToUTF8(const wchar_t* src, size_t src_len, std::string* output) { 46 PrepareForUTF8Output(src, src_len, output); 47 return ConvertUnicode(src, src_len, output); 48 } 49 50 std::string WideToUTF8(const std::wstring& wide) { 51 std::string ret; 52 // Ignore the success flag of this call, it will do the best it can for 53 // invalid input, which is what we want here. 54 WideToUTF8(wide.data(), wide.length(), &ret); 55 return ret; 56 } 57 58 bool UTF8ToWide(const char* src, size_t src_len, std::wstring* output) { 59 PrepareForUTF16Or32Output(src, src_len, output); 60 return ConvertUnicode(src, src_len, output); 61 } 62 63 std::wstring UTF8ToWide(const StringPiece& utf8) { 64 std::wstring ret; 65 UTF8ToWide(utf8.data(), utf8.length(), &ret); 66 return ret; 67 } 68 69 // UTF-16 <-> Wide ------------------------------------------------------------- 70 71 #if defined(WCHAR_T_IS_UTF16) 72 73 // When wide == UTF-16, then conversions are a NOP. 74 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 75 output->assign(src, src_len); 76 return true; 77 } 78 79 string16 WideToUTF16(const std::wstring& wide) { 80 return wide; 81 } 82 83 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 84 output->assign(src, src_len); 85 return true; 86 } 87 88 std::wstring UTF16ToWide(const string16& utf16) { 89 return utf16; 90 } 91 92 #elif defined(WCHAR_T_IS_UTF32) 93 94 bool WideToUTF16(const wchar_t* src, size_t src_len, string16* output) { 95 output->clear(); 96 // Assume that normally we won't have any non-BMP characters so the counts 97 // will be the same. 98 output->reserve(src_len); 99 return ConvertUnicode(src, src_len, output); 100 } 101 102 string16 WideToUTF16(const std::wstring& wide) { 103 string16 ret; 104 WideToUTF16(wide.data(), wide.length(), &ret); 105 return ret; 106 } 107 108 bool UTF16ToWide(const char16* src, size_t src_len, std::wstring* output) { 109 output->clear(); 110 // Assume that normally we won't have any non-BMP characters so the counts 111 // will be the same. 112 output->reserve(src_len); 113 return ConvertUnicode(src, src_len, output); 114 } 115 116 std::wstring UTF16ToWide(const string16& utf16) { 117 std::wstring ret; 118 UTF16ToWide(utf16.data(), utf16.length(), &ret); 119 return ret; 120 } 121 122 #endif // defined(WCHAR_T_IS_UTF32) 123 124 // UTF16 <-> UTF8 -------------------------------------------------------------- 125 126 #if defined(WCHAR_T_IS_UTF32) 127 128 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 129 PrepareForUTF16Or32Output(src, src_len, output); 130 return ConvertUnicode(src, src_len, output); 131 } 132 133 string16 UTF8ToUTF16(const StringPiece& utf8) { 134 string16 ret; 135 // Ignore the success flag of this call, it will do the best it can for 136 // invalid input, which is what we want here. 137 UTF8ToUTF16(utf8.data(), utf8.length(), &ret); 138 return ret; 139 } 140 141 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 142 PrepareForUTF8Output(src, src_len, output); 143 return ConvertUnicode(src, src_len, output); 144 } 145 146 std::string UTF16ToUTF8(const string16& utf16) { 147 std::string ret; 148 // Ignore the success flag of this call, it will do the best it can for 149 // invalid input, which is what we want here. 150 UTF16ToUTF8(utf16.data(), utf16.length(), &ret); 151 return ret; 152 } 153 154 #elif defined(WCHAR_T_IS_UTF16) 155 // Easy case since we can use the "wide" versions we already wrote above. 156 157 bool UTF8ToUTF16(const char* src, size_t src_len, string16* output) { 158 return UTF8ToWide(src, src_len, output); 159 } 160 161 string16 UTF8ToUTF16(const StringPiece& utf8) { 162 return UTF8ToWide(utf8); 163 } 164 165 bool UTF16ToUTF8(const char16* src, size_t src_len, std::string* output) { 166 return WideToUTF8(src, src_len, output); 167 } 168 169 std::string UTF16ToUTF8(const string16& utf16) { 170 return WideToUTF8(utf16); 171 } 172 173 #endif 174 175 std::wstring ASCIIToWide(const StringPiece& ascii) { 176 DCHECK(IsStringASCII(ascii)) << ascii; 177 return std::wstring(ascii.begin(), ascii.end()); 178 } 179 180 string16 ASCIIToUTF16(const StringPiece& ascii) { 181 DCHECK(IsStringASCII(ascii)) << ascii; 182 return string16(ascii.begin(), ascii.end()); 183 } 184 185 } // namespace base 186