1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/sys_string_conversions.h" 6 7 #include <stddef.h> 8 #include <wchar.h> 9 10 #include "base/strings/string_piece.h" 11 #include "base/strings/utf_string_conversions.h" 12 #include "build/build_config.h" 13 14 namespace base { 15 16 std::string SysWideToUTF8(const std::wstring& wide) { 17 // In theory this should be using the system-provided conversion rather 18 // than our ICU, but this will do for now. 19 return WideToUTF8(wide); 20 } 21 std::wstring SysUTF8ToWide(const StringPiece& utf8) { 22 // In theory this should be using the system-provided conversion rather 23 // than our ICU, but this will do for now. 24 std::wstring out; 25 UTF8ToWide(utf8.data(), utf8.size(), &out); 26 return out; 27 } 28 29 #if defined(SYSTEM_NATIVE_UTF8) || defined(OS_ANDROID) 30 // TODO(port): Consider reverting the OS_ANDROID when we have wcrtomb() 31 // support and a better understanding of what calls these routines. 32 33 std::string SysWideToNativeMB(const std::wstring& wide) { 34 return WideToUTF8(wide); 35 } 36 37 std::wstring SysNativeMBToWide(const StringPiece& native_mb) { 38 return SysUTF8ToWide(native_mb); 39 } 40 41 #else 42 43 std::string SysWideToNativeMB(const std::wstring& wide) { 44 mbstate_t ps; 45 46 // Calculate the number of multi-byte characters. We walk through the string 47 // without writing the output, counting the number of multi-byte characters. 48 size_t num_out_chars = 0; 49 memset(&ps, 0, sizeof(ps)); 50 for (size_t i = 0; i < wide.size(); ++i) { 51 const wchar_t src = wide[i]; 52 // Use a temp buffer since calling wcrtomb with an output of NULL does not 53 // calculate the output length. 54 char buf[16]; 55 // Skip NULLs to avoid wcrtomb's special handling of them. 56 size_t res = src ? wcrtomb(buf, src, &ps) : 0; 57 switch (res) { 58 // Handle any errors and return an empty string. 59 case static_cast<size_t>(-1): 60 return std::string(); 61 break; 62 case 0: 63 // We hit an embedded null byte, keep going. 64 ++num_out_chars; 65 break; 66 default: 67 num_out_chars += res; 68 break; 69 } 70 } 71 72 if (num_out_chars == 0) 73 return std::string(); 74 75 std::string out; 76 out.resize(num_out_chars); 77 78 // We walk the input string again, with |i| tracking the index of the 79 // wide input, and |j| tracking the multi-byte output. 80 memset(&ps, 0, sizeof(ps)); 81 for (size_t i = 0, j = 0; i < wide.size(); ++i) { 82 const wchar_t src = wide[i]; 83 // We don't want wcrtomb to do its funkiness for embedded NULLs. 84 size_t res = src ? wcrtomb(&out[j], src, &ps) : 0; 85 switch (res) { 86 // Handle any errors and return an empty string. 87 case static_cast<size_t>(-1): 88 return std::string(); 89 break; 90 case 0: 91 // We hit an embedded null byte, keep going. 92 ++j; // Output is already zeroed. 93 break; 94 default: 95 j += res; 96 break; 97 } 98 } 99 100 return out; 101 } 102 103 std::wstring SysNativeMBToWide(const StringPiece& native_mb) { 104 mbstate_t ps; 105 106 // Calculate the number of wide characters. We walk through the string 107 // without writing the output, counting the number of wide characters. 108 size_t num_out_chars = 0; 109 memset(&ps, 0, sizeof(ps)); 110 for (size_t i = 0; i < native_mb.size(); ) { 111 const char* src = native_mb.data() + i; 112 size_t res = mbrtowc(NULL, src, native_mb.size() - i, &ps); 113 switch (res) { 114 // Handle any errors and return an empty string. 115 case static_cast<size_t>(-2): 116 case static_cast<size_t>(-1): 117 return std::wstring(); 118 break; 119 case 0: 120 // We hit an embedded null byte, keep going. 121 i += 1; // Fall through. 122 default: 123 i += res; 124 ++num_out_chars; 125 break; 126 } 127 } 128 129 if (num_out_chars == 0) 130 return std::wstring(); 131 132 std::wstring out; 133 out.resize(num_out_chars); 134 135 memset(&ps, 0, sizeof(ps)); // Clear the shift state. 136 // We walk the input string again, with |i| tracking the index of the 137 // multi-byte input, and |j| tracking the wide output. 138 for (size_t i = 0, j = 0; i < native_mb.size(); ++j) { 139 const char* src = native_mb.data() + i; 140 wchar_t* dst = &out[j]; 141 size_t res = mbrtowc(dst, src, native_mb.size() - i, &ps); 142 switch (res) { 143 // Handle any errors and return an empty string. 144 case static_cast<size_t>(-2): 145 case static_cast<size_t>(-1): 146 return std::wstring(); 147 break; 148 case 0: 149 i += 1; // Skip null byte. 150 break; 151 default: 152 i += res; 153 break; 154 } 155 } 156 157 return out; 158 } 159 160 #endif // OS_CHROMEOS 161 162 } // namespace base 163