1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <string> 6 7 #include "base/basictypes.h" 8 #include "base/strings/string_piece.h" 9 #include "base/strings/sys_string_conversions.h" 10 #include "base/strings/utf_string_conversions.h" 11 #include "base/test/scoped_locale.h" 12 #include "testing/gtest/include/gtest/gtest.h" 13 14 #ifdef WCHAR_T_IS_UTF32 15 static const std::wstring kSysWideOldItalicLetterA = L"\x10300"; 16 #else 17 static const std::wstring kSysWideOldItalicLetterA = L"\xd800\xdf00"; 18 #endif 19 20 namespace base { 21 22 TEST(SysStrings, SysWideToUTF8) { 23 EXPECT_EQ("Hello, world", SysWideToUTF8(L"Hello, world")); 24 EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToUTF8(L"\x4f60\x597d")); 25 26 // >16 bits 27 EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToUTF8(kSysWideOldItalicLetterA)); 28 29 // Error case. When Windows finds a UTF-16 character going off the end of 30 // a string, it just converts that literal value to UTF-8, even though this 31 // is invalid. 32 // 33 // This is what XP does, but Vista has different behavior, so we don't bother 34 // verifying it: 35 // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw", 36 // SysWideToUTF8(L"\x4f60\xd800zyxw")); 37 38 // Test embedded NULLs. 39 std::wstring wide_null(L"a"); 40 wide_null.push_back(0); 41 wide_null.push_back('b'); 42 43 std::string expected_null("a"); 44 expected_null.push_back(0); 45 expected_null.push_back('b'); 46 47 EXPECT_EQ(expected_null, SysWideToUTF8(wide_null)); 48 } 49 50 TEST(SysStrings, SysUTF8ToWide) { 51 EXPECT_EQ(L"Hello, world", SysUTF8ToWide("Hello, world")); 52 EXPECT_EQ(L"\x4f60\x597d", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5\xbd")); 53 // >16 bits 54 EXPECT_EQ(kSysWideOldItalicLetterA, SysUTF8ToWide("\xF0\x90\x8C\x80")); 55 56 // Error case. When Windows finds an invalid UTF-8 character, it just skips 57 // it. This seems weird because it's inconsistent with the reverse conversion. 58 // 59 // This is what XP does, but Vista has different behavior, so we don't bother 60 // verifying it: 61 // EXPECT_EQ(L"\x4f60zyxw", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5zyxw")); 62 63 // Test embedded NULLs. 64 std::string utf8_null("a"); 65 utf8_null.push_back(0); 66 utf8_null.push_back('b'); 67 68 std::wstring expected_null(L"a"); 69 expected_null.push_back(0); 70 expected_null.push_back('b'); 71 72 EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null)); 73 } 74 75 #if defined(OS_LINUX) // Tests depend on setting a specific Linux locale. 76 77 TEST(SysStrings, SysWideToNativeMB) { 78 ScopedLocale locale("en_US.utf-8"); 79 EXPECT_EQ("Hello, world", SysWideToNativeMB(L"Hello, world")); 80 EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToNativeMB(L"\x4f60\x597d")); 81 82 // >16 bits 83 EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToNativeMB(kSysWideOldItalicLetterA)); 84 85 // Error case. When Windows finds a UTF-16 character going off the end of 86 // a string, it just converts that literal value to UTF-8, even though this 87 // is invalid. 88 // 89 // This is what XP does, but Vista has different behavior, so we don't bother 90 // verifying it: 91 // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw", 92 // SysWideToNativeMB(L"\x4f60\xd800zyxw")); 93 94 // Test embedded NULLs. 95 std::wstring wide_null(L"a"); 96 wide_null.push_back(0); 97 wide_null.push_back('b'); 98 99 std::string expected_null("a"); 100 expected_null.push_back(0); 101 expected_null.push_back('b'); 102 103 EXPECT_EQ(expected_null, SysWideToNativeMB(wide_null)); 104 } 105 106 // We assume the test is running in a UTF8 locale. 107 TEST(SysStrings, SysNativeMBToWide) { 108 ScopedLocale locale("en_US.utf-8"); 109 EXPECT_EQ(L"Hello, world", SysNativeMBToWide("Hello, world")); 110 EXPECT_EQ(L"\x4f60\x597d", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5\xbd")); 111 // >16 bits 112 EXPECT_EQ(kSysWideOldItalicLetterA, SysNativeMBToWide("\xF0\x90\x8C\x80")); 113 114 // Error case. When Windows finds an invalid UTF-8 character, it just skips 115 // it. This seems weird because it's inconsistent with the reverse conversion. 116 // 117 // This is what XP does, but Vista has different behavior, so we don't bother 118 // verifying it: 119 // EXPECT_EQ(L"\x4f60zyxw", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5zyxw")); 120 121 // Test embedded NULLs. 122 std::string utf8_null("a"); 123 utf8_null.push_back(0); 124 utf8_null.push_back('b'); 125 126 std::wstring expected_null(L"a"); 127 expected_null.push_back(0); 128 expected_null.push_back('b'); 129 130 EXPECT_EQ(expected_null, SysNativeMBToWide(utf8_null)); 131 } 132 133 static const wchar_t* const kConvertRoundtripCases[] = { 134 L"Google Video", 135 // " " 136 L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb", 137 // " " 138 L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9" 139 L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2", 140 // " " 141 L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442" 142 L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430" 143 L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c", 144 // "" 145 L"\xc804\xccb4\xc11c\xbe44\xc2a4", 146 147 // Test characters that take more than 16 bits. This will depend on whether 148 // wchar_t is 16 or 32 bits. 149 #if defined(WCHAR_T_IS_UTF16) 150 L"\xd800\xdf00", 151 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) 152 L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44", 153 #elif defined(WCHAR_T_IS_UTF32) 154 L"\x10300", 155 // ????? (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E) 156 L"\x11d40\x11d41\x11d42\x11d43\x11d44", 157 #endif 158 }; 159 160 161 TEST(SysStrings, SysNativeMBAndWide) { 162 ScopedLocale locale("en_US.utf-8"); 163 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { 164 std::wstring wide = kConvertRoundtripCases[i]; 165 std::wstring trip = SysNativeMBToWide(SysWideToNativeMB(wide)); 166 EXPECT_EQ(wide.size(), trip.size()); 167 EXPECT_EQ(wide, trip); 168 } 169 170 // We assume our test is running in UTF-8, so double check through ICU. 171 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { 172 std::wstring wide = kConvertRoundtripCases[i]; 173 std::wstring trip = SysNativeMBToWide(WideToUTF8(wide)); 174 EXPECT_EQ(wide.size(), trip.size()); 175 EXPECT_EQ(wide, trip); 176 } 177 178 for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) { 179 std::wstring wide = kConvertRoundtripCases[i]; 180 std::wstring trip = UTF8ToWide(SysWideToNativeMB(wide)); 181 EXPECT_EQ(wide.size(), trip.size()); 182 EXPECT_EQ(wide, trip); 183 } 184 } 185 #endif // OS_LINUX 186 187 } // namespace base 188