Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <locale.h>
      6 
      7 #include <string>
      8 
      9 #include "base/basictypes.h"
     10 #include "base/string_piece.h"
     11 #include "base/utf_string_conversions.h"
     12 #include "base/sys_string_conversions.h"
     13 #include "testing/gtest/include/gtest/gtest.h"
     14 
     15 #ifdef WCHAR_T_IS_UTF32
     16 static const std::wstring kSysWideOldItalicLetterA = L"\x10300";
     17 #else
     18 static const std::wstring kSysWideOldItalicLetterA = L"\xd800\xdf00";
     19 #endif
     20 
     21 TEST(SysStrings, SysWideToUTF8) {
     22   using base::SysWideToUTF8;
     23   EXPECT_EQ("Hello, world", SysWideToUTF8(L"Hello, world"));
     24   EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToUTF8(L"\x4f60\x597d"));
     25 
     26   // >16 bits
     27   EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToUTF8(kSysWideOldItalicLetterA));
     28 
     29   // Error case. When Windows finds a UTF-16 character going off the end of
     30   // a string, it just converts that literal value to UTF-8, even though this
     31   // is invalid.
     32   //
     33   // This is what XP does, but Vista has different behavior, so we don't bother
     34   // verifying it:
     35   // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw",
     36   //           SysWideToUTF8(L"\x4f60\xd800zyxw"));
     37 
     38   // Test embedded NULLs.
     39   std::wstring wide_null(L"a");
     40   wide_null.push_back(0);
     41   wide_null.push_back('b');
     42 
     43   std::string expected_null("a");
     44   expected_null.push_back(0);
     45   expected_null.push_back('b');
     46 
     47   EXPECT_EQ(expected_null, SysWideToUTF8(wide_null));
     48 }
     49 
     50 TEST(SysStrings, SysUTF8ToWide) {
     51   using base::SysUTF8ToWide;
     52   EXPECT_EQ(L"Hello, world", SysUTF8ToWide("Hello, world"));
     53   EXPECT_EQ(L"\x4f60\x597d", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5\xbd"));
     54   // >16 bits
     55   EXPECT_EQ(kSysWideOldItalicLetterA, SysUTF8ToWide("\xF0\x90\x8C\x80"));
     56 
     57   // Error case. When Windows finds an invalid UTF-8 character, it just skips
     58   // it. This seems weird because it's inconsistent with the reverse conversion.
     59   //
     60   // This is what XP does, but Vista has different behavior, so we don't bother
     61   // verifying it:
     62   // EXPECT_EQ(L"\x4f60zyxw", SysUTF8ToWide("\xe4\xbd\xa0\xe5\xa5zyxw"));
     63 
     64   // Test embedded NULLs.
     65   std::string utf8_null("a");
     66   utf8_null.push_back(0);
     67   utf8_null.push_back('b');
     68 
     69   std::wstring expected_null(L"a");
     70   expected_null.push_back(0);
     71   expected_null.push_back('b');
     72 
     73   EXPECT_EQ(expected_null, SysUTF8ToWide(utf8_null));
     74 }
     75 
     76 #if defined(OS_LINUX)  // Tests depend on setting a specific Linux locale.
     77 namespace {
     78 
     79 class ScopedSetLocale {
     80  public:
     81   explicit ScopedSetLocale(const char* locale) {
     82     old_locale_ = setlocale(LC_ALL, NULL);
     83     setlocale(LC_ALL, locale);
     84   }
     85   ~ScopedSetLocale() {
     86     setlocale(LC_ALL, old_locale_.c_str());
     87   }
     88 
     89  private:
     90   std::string old_locale_;
     91 };
     92 
     93 }  // namespace
     94 
     95 TEST(SysStrings, SysWideToNativeMB) {
     96   using base::SysWideToNativeMB;
     97   ScopedSetLocale locale("en_US.utf-8");
     98   EXPECT_EQ("Hello, world", SysWideToNativeMB(L"Hello, world"));
     99   EXPECT_EQ("\xe4\xbd\xa0\xe5\xa5\xbd", SysWideToNativeMB(L"\x4f60\x597d"));
    100 
    101   // >16 bits
    102   EXPECT_EQ("\xF0\x90\x8C\x80", SysWideToNativeMB(kSysWideOldItalicLetterA));
    103 
    104   // Error case. When Windows finds a UTF-16 character going off the end of
    105   // a string, it just converts that literal value to UTF-8, even though this
    106   // is invalid.
    107   //
    108   // This is what XP does, but Vista has different behavior, so we don't bother
    109   // verifying it:
    110   // EXPECT_EQ("\xE4\xBD\xA0\xED\xA0\x80zyxw",
    111   //           SysWideToNativeMB(L"\x4f60\xd800zyxw"));
    112 
    113   // Test embedded NULLs.
    114   std::wstring wide_null(L"a");
    115   wide_null.push_back(0);
    116   wide_null.push_back('b');
    117 
    118   std::string expected_null("a");
    119   expected_null.push_back(0);
    120   expected_null.push_back('b');
    121 
    122   EXPECT_EQ(expected_null, SysWideToNativeMB(wide_null));
    123 }
    124 
    125 // We assume the test is running in a UTF8 locale.
    126 TEST(SysStrings, SysNativeMBToWide) {
    127   using base::SysNativeMBToWide;
    128   ScopedSetLocale locale("en_US.utf-8");
    129   EXPECT_EQ(L"Hello, world", SysNativeMBToWide("Hello, world"));
    130   EXPECT_EQ(L"\x4f60\x597d", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5\xbd"));
    131   // >16 bits
    132   EXPECT_EQ(kSysWideOldItalicLetterA, SysNativeMBToWide("\xF0\x90\x8C\x80"));
    133 
    134   // Error case. When Windows finds an invalid UTF-8 character, it just skips
    135   // it. This seems weird because it's inconsistent with the reverse conversion.
    136   //
    137   // This is what XP does, but Vista has different behavior, so we don't bother
    138   // verifying it:
    139   // EXPECT_EQ(L"\x4f60zyxw", SysNativeMBToWide("\xe4\xbd\xa0\xe5\xa5zyxw"));
    140 
    141   // Test embedded NULLs.
    142   std::string utf8_null("a");
    143   utf8_null.push_back(0);
    144   utf8_null.push_back('b');
    145 
    146   std::wstring expected_null(L"a");
    147   expected_null.push_back(0);
    148   expected_null.push_back('b');
    149 
    150   EXPECT_EQ(expected_null, SysNativeMBToWide(utf8_null));
    151 }
    152 
    153 static const wchar_t* const kConvertRoundtripCases[] = {
    154   L"Google Video",
    155   // "   "
    156   L"\x7f51\x9875\x0020\x56fe\x7247\x0020\x8d44\x8baf\x66f4\x591a\x0020\x00bb",
    157   //  " "
    158   L"\x03a0\x03b1\x03b3\x03ba\x03cc\x03c3\x03bc\x03b9"
    159   L"\x03bf\x03c2\x0020\x0399\x03c3\x03c4\x03cc\x03c2",
    160   // "   "
    161   L"\x041f\x043e\x0438\x0441\x043a\x0020\x0441\x0442"
    162   L"\x0440\x0430\x043d\x0438\x0446\x0020\x043d\x0430"
    163   L"\x0020\x0440\x0443\x0441\x0441\x043a\x043e\x043c",
    164   // ""
    165   L"\xc804\xccb4\xc11c\xbe44\xc2a4",
    166 
    167   // Test characters that take more than 16 bits. This will depend on whether
    168   // wchar_t is 16 or 32 bits.
    169 #if defined(WCHAR_T_IS_UTF16)
    170   L"\xd800\xdf00",
    171   // ?????  (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
    172   L"\xd807\xdd40\xd807\xdd41\xd807\xdd42\xd807\xdd43\xd807\xdd44",
    173 #elif defined(WCHAR_T_IS_UTF32)
    174   L"\x10300",
    175   // ?????  (Mathematical Alphanumeric Symbols (U+011d40 - U+011d44 : A,B,C,D,E)
    176   L"\x11d40\x11d41\x11d42\x11d43\x11d44",
    177 #endif
    178 };
    179 
    180 
    181 TEST(SysStrings, SysNativeMBAndWide) {
    182   ScopedSetLocale locale("en_US.utf-8");
    183   for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
    184     std::wstring wide = kConvertRoundtripCases[i];
    185     std::wstring trip = base::SysNativeMBToWide(base::SysWideToNativeMB(wide));
    186     EXPECT_EQ(wide.size(), trip.size());
    187     EXPECT_EQ(wide, trip);
    188   }
    189 
    190   // We assume our test is running in UTF-8, so double check through ICU.
    191   for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
    192     std::wstring wide = kConvertRoundtripCases[i];
    193     std::wstring trip = base::SysNativeMBToWide(WideToUTF8(wide));
    194     EXPECT_EQ(wide.size(), trip.size());
    195     EXPECT_EQ(wide, trip);
    196   }
    197 
    198   for (size_t i = 0; i < arraysize(kConvertRoundtripCases); ++i) {
    199     std::wstring wide = kConvertRoundtripCases[i];
    200     std::wstring trip = UTF8ToWide(base::SysWideToNativeMB(wide));
    201     EXPECT_EQ(wide.size(), trip.size());
    202     EXPECT_EQ(wide, trip);
    203   }
    204 }
    205 #endif  // OS_LINUX
    206