1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <locale.h> 6 7 #include <string> 8 9 #include "base/basictypes.h" 10 #include "base/string_piece.h" 11 #include "base/utf_string_conversions.h" 12 #include "base/sys_string_conversions.h" 13 #include "testing/gtest/include/gtest/gtest.h" 14 15 #ifdef WCHAR_T_IS_UTF32 16 static const std::wstring kSysWideOldItalicLetterA = L"\x10300"; 17 #else 18 static const std::wstring kSysWideOldItalicLetterA = L"\xd800\xdf00"; 19 #endif 20 21 TEST(SysStrings, SysWideToUTF8) { 22 using base::SysWideToUTF8; 23 EXPECT_EQ("Hello, world", SysWideToUTF8(L"Hello, world")); 24 EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToUTF8(L"\x4f60\x597d")); 25 26 // >16 bits 27 EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToUTF8(kSysWideOldItalicLetterA)); 28 29 // Error case. When Windows finds a UTF-16 character going off the end of 30 // a string, it just converts that literal value to UTF-8, even though this 31 // is invalid. 32 // 33 // This is what XP does, but Vista has different behavior, so we don't bother 34 // verifying it: 35 // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw", 36 // SysWideToUTF8(L"\x4f60\xd800zyxw")); 37 38 // Test embedded NULLs. 39 std::wstring wide_null(L"a"); 40 wide_null.push_back(0); 41 wide_null.push_back('b'); 42 43 std::string expected_null("a"); 44 expected_null.push_back(0); 45 expected_null.push_back('b'); 46 47 EXPECT_EQ(expected_null, SysWideToUTF8(wide_null)); 48 } 49 50 TEST(SysStrings, SysUTF8ToWide) { 51 using base::SysUTF8ToWide; 52 EXPECT_EQ(L"Hello, world", SysUTF8ToWide("Hello, world")); 53 EXPECT_EQ(L"\x4f60\x597d", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5\xbd")); 54 // >16 bits 55 EXPECT_EQ(kSysWideOldItalicLetterA, SysUTF8ToWide("\xF0\x90\x8C\x80")); 56 57 // Error case. When Windows finds an invalid UTF-8 character, it just skips 58 // it. This seems weird because it's inconsistent with the reverse conversion. 59 // 60 // This is what XP does, but Vista has different behavior, so we don't bother 61 // verifying it: 62 // EXPECT_EQ(L"\x4f60zyxw", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5zyxw")); 63 64 // Test embedded NULLs. 65 std::string utf8_null("a"); 66 utf8_null.push_back(0); 67 utf8_null.push_back('b'); 68 69 std::wstring expected_null(L"a"); 70 expected_null.push_back(0); 71 expected_null.push_back('b'); 72 73 EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null)); 74 } 75 76 #if defined(OS_LINUX) // Tests depend on setting a specific Linux locale. 77 namespace { 78 79 class ScopedSetLocale { 80 public: 81 explicit ScopedSetLocale(const char* locale) { 82 old_locale_ = setlocale(LC_ALL, NULL); 83 setlocale(LC_ALL, locale); 84 } 85 ~ScopedSetLocale() { 86 setlocale(LC_ALL, old_locale_.c_str()); 87 } 88 89 private: 90 std::string old_locale_; 91 }; 92 93 } // namespace 94 95 TEST(SysStrings, SysWideToNativeMB) { 96 using base::SysWideToNativeMB; 97 ScopedSetLocale locale("en_US.utf-8"); 98 EXPECT_EQ("Hello, world", SysWideToNativeMB(L"Hello, world")); 99 EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToNativeMB(L"\x4f60\x597d")); 100 101 // >16 bits 102 EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToNativeMB(kSysWideOldItalicLetterA)); 103 104 // Error case. When Windows finds a UTF-16 character going off the end of 105 // a string, it just converts that literal value to UTF-8, even though this 106 // is invalid. 107 // 108 // This is what XP does, but Vista has different behavior, so we don't bother 109 // verifying it: 110 // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw", 111 // SysWideToNativeMB(L"\x4f60\xd800zyxw")); 112 113 // Test embedded NULLs. 114 std::wstring wide_null(L"a"); 115 wide_null.push_back(0); 116 wide_null.push_back('b'); 117 118 std::string expected_null("a"); 119 expected_null.push_back(0); 120 expected_null.push_back('b'); 121 122 EXPECT_EQ(expected_null, SysWideToNativeMB(wide_null)); 123 } 124 125 // We assume the test is running in a UTF8 locale. 126 TEST(SysStrings, SysNativeMBToWide) { 127 using base::SysNativeMBToWide; 128 ScopedSetLocale locale("en_US.utf-8"); 129 EXPECT_EQ(L"Hello, world", SysNativeMBToWide("Hello, world")); 130 EXPECT_EQ(L"\x4f60\x597d", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5\xbd")); 131 // >16 bits 132 EXPECT_EQ(kSysWideOldItalicLetterA, SysNativeMBToWide("\xF0\x90\x8C\x80")); 133 134 // Error case. When Windows finds an invalid UTF-8 character, it just skips 135 // it. This seems weird because it's inconsistent with the reverse conversion. 136 // 137 // This is what XP does, but Vista has different behavior, so we don't bother 138 // verifying it: 139 // EXPECT_EQ(L"\x4f60zyxw", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5zyxw")); 140 141 // Test embedded NULLs. 142 std::string utf8_null("a"); 143 utf8_null.push_back(0); 144 utf8_null.push_back('b'); 145 146 std::wstring expected_null(L"a"); 147 expected_null.push_back(0); 148 expected_null.push_back('b'); 149 150 EXPECT_EQ(expected_null, SysNativeMBToWide(utf8_null)); 151 } 152 153 static const wchar_t* const kConvertRoundtripCases[] = { 154 L"Google Video", 155 // " " 156 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb", 157 // " " 158 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" 159 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2", 160 // " " 161 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442" 162 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430" 163 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c", 164 // "" 165 L"\xc804\xccb4\xc11c\xbe44\xc2a4", 166 167 // Test characters that take more than 16 bits. This will depend on whether 168 // wchar_t is 16 or 32 bits. 169 #if defined(WCHAR_T_IS_UTF16) 170 L"\xd800\xdf00", 171 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) 172 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44", 173 #elif defined(WCHAR_T_IS_UTF32) 174 L"\x10300", 175 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) 176 L"\x11d40\x11d41\x11d42\x11d43\x11d44", 177 #endif 178 }; 179 180 181 TEST(SysStrings, SysNativeMBAndWide) { 182 ScopedSetLocale locale("en_US.utf-8"); 183 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { 184 std::wstring wide = kConvertRoundtripCases[i]; 185 std::wstring trip = base::SysNativeMBToWide(base::SysWideToNativeMB(wide)); 186 EXPECT_EQ(wide.size(), trip.size()); 187 EXPECT_EQ(wide, trip); 188 } 189 190 // We assume our test is running in UTF-8, so double check through ICU. 191 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { 192 std::wstring wide = kConvertRoundtripCases[i]; 193 std::wstring trip = base::SysNativeMBToWide(WideToUTF8(wide)); 194 EXPECT_EQ(wide.size(), trip.size()); 195 EXPECT_EQ(wide, trip); 196 } 197 198 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { 199 std::wstring wide = kConvertRoundtripCases[i]; 200 std::wstring trip = UTF8ToWide(base::SysWideToNativeMB(wide)); 201 EXPECT_EQ(wide.size(), trip.size()); 202 EXPECT_EQ(wide, trip); 203 } 204 } 205 #endif // OS_LINUX 206