Home | History | Annotate | Download | only in base
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <algorithm>
      6 
      7 #include "base/logging.h"
      8 #include "base/string_piece.h"
      9 #include "base/utf_offset_string_conversions.h"
     10 #include "testing/gtest/include/gtest/gtest.h"
     11 
     12 namespace base {
     13 
     14 namespace {
     15 
     16 static const size_t kNpos = std::wstring::npos;
     17 
     18 // Given a null-terminated string of wchar_t with each wchar_t representing
     19 // a UTF-16 code unit, returns a string16 made up of wchar_t's in the input.
     20 // Each wchar_t should be <= 0xFFFF and a non-BMP character (> U+FFFF)
     21 // should be represented as a surrogate pair (two UTF-16 units)
     22 // *even* where wchar_t is 32-bit (Linux and Mac).
     23 //
     24 // This is to help write tests for functions with string16 params until
     25 // the C++ 0x UTF-16 literal is well-supported by compilers.
     26 string16 BuildString16(const wchar_t* s) {
     27 #if defined(WCHAR_T_IS_UTF16)
     28   return string16(s);
     29 #elif defined(WCHAR_T_IS_UTF32)
     30   string16 u16;
     31   while (*s != 0) {
     32     DCHECK(static_cast<unsigned int>(*s) <= 0xFFFFu);
     33     u16.push_back(*s++);
     34   }
     35   return u16;
     36 #endif
     37 }
     38 
     39 }  // namespace
     40 
     41 TEST(UTFOffsetStringConversionsTest, AdjustOffset) {
     42   struct UTF8ToWideCase {
     43     const char* utf8;
     44     size_t input_offset;
     45     size_t output_offset;
     46   } utf8_to_wide_cases[] = {
     47     {"", 0, kNpos},
     48     {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, kNpos},
     49     {"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1},
     50     {"\xed\xb0\x80z", 3, 1},
     51     {"A\xF0\x90\x8C\x80z", 1, 1},
     52     {"A\xF0\x90\x8C\x80z", 2, kNpos},
     53 #if defined(WCHAR_T_IS_UTF16)
     54     {"A\xF0\x90\x8C\x80z", 5, 3},
     55 #elif defined(WCHAR_T_IS_UTF32)
     56     {"A\xF0\x90\x8C\x80z", 5, 2},
     57 #endif
     58   };
     59   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf8_to_wide_cases); ++i) {
     60     size_t offset = utf8_to_wide_cases[i].input_offset;
     61     UTF8ToWideAndAdjustOffset(utf8_to_wide_cases[i].utf8, &offset);
     62     EXPECT_EQ(utf8_to_wide_cases[i].output_offset, offset);
     63   }
     64 
     65 #if defined(WCHAR_T_IS_UTF32)
     66   struct UTF16ToWideCase {
     67     const wchar_t* wide;
     68     size_t input_offset;
     69     size_t output_offset;
     70   } utf16_to_wide_cases[] = {
     71     {L"\xD840\xDC00\x4E00", 0, 0},
     72     {L"\xD840\xDC00\x4E00", 1, kNpos},
     73     {L"\xD840\xDC00\x4E00", 2, 1},
     74   };
     75   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf16_to_wide_cases); ++i) {
     76     size_t offset = utf16_to_wide_cases[i].input_offset;
     77     UTF16ToWideAndAdjustOffset(BuildString16(utf16_to_wide_cases[i].wide),
     78                                &offset);
     79     EXPECT_EQ(utf16_to_wide_cases[i].output_offset, offset);
     80   }
     81 #endif
     82 }
     83 
     84 TEST(UTFOffsetStringConversionsTest, LimitOffsets) {
     85   const size_t kLimit = 10;
     86   const size_t kItems = 20;
     87   std::vector<size_t> size_ts;
     88   for (size_t t = 0; t < kItems; ++t)
     89     size_ts.push_back(t);
     90   std::for_each(size_ts.begin(), size_ts.end(),
     91                 LimitOffset<std::wstring>(kLimit));
     92   size_t unlimited_count = 0;
     93   for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
     94        ++ti) {
     95     if (*ti < kLimit && *ti != kNpos)
     96       ++unlimited_count;
     97   }
     98   EXPECT_EQ(10U, unlimited_count);
     99 
    100   // Reverse the values in the vector and try again.
    101   size_ts.clear();
    102   for (size_t t = kItems; t > 0; --t)
    103     size_ts.push_back(t - 1);
    104   std::for_each(size_ts.begin(), size_ts.end(),
    105                 LimitOffset<std::wstring>(kLimit));
    106   unlimited_count = 0;
    107   for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
    108        ++ti) {
    109     if (*ti < kLimit && *ti != kNpos)
    110       ++unlimited_count;
    111   }
    112   EXPECT_EQ(10U, unlimited_count);
    113 }
    114 
    115 TEST(UTFOffsetStringConversionsTest, AdjustOffsets) {
    116   // Imagine we have strings as shown in the following cases where the
    117   // X's represent encoded characters.
    118   // 1: abcXXXdef ==> abcXdef
    119   std::vector<size_t> offsets;
    120   for (size_t t = 0; t < 9; ++t)
    121     offsets.push_back(t);
    122   AdjustOffset::Adjustments adjustments;
    123   adjustments.push_back(AdjustOffset::Adjustment(3, 3, 1));
    124   std::for_each(offsets.begin(), offsets.end(), AdjustOffset(adjustments));
    125   size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6};
    126   EXPECT_EQ(offsets.size(), arraysize(expected_1));
    127   for (size_t i = 0; i < arraysize(expected_1); ++i)
    128     EXPECT_EQ(expected_1[i], offsets[i]);
    129 
    130   // 2: XXXaXXXXbcXXXXXXXdefXXX ==> XaXXbcXXXXdefX
    131   offsets.clear();
    132   for (size_t t = 0; t < 23; ++t)
    133     offsets.push_back(t);
    134   adjustments.clear();
    135   adjustments.push_back(AdjustOffset::Adjustment(0, 3, 1));
    136   adjustments.push_back(AdjustOffset::Adjustment(4, 4, 2));
    137   adjustments.push_back(AdjustOffset::Adjustment(10, 7, 4));
    138   adjustments.push_back(AdjustOffset::Adjustment(20, 3, 1));
    139   std::for_each(offsets.begin(), offsets.end(), AdjustOffset(adjustments));
    140   size_t expected_2[] = {0, kNpos, kNpos, 1, 2, kNpos, kNpos, kNpos, 4, 5, 6,
    141                          kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 10, 11, 12,
    142                          13, kNpos, kNpos};
    143   EXPECT_EQ(offsets.size(), arraysize(expected_2));
    144   for (size_t i = 0; i < arraysize(expected_2); ++i)
    145     EXPECT_EQ(expected_2[i], offsets[i]);
    146 
    147   // 3: XXXaXXXXbcdXXXeXX ==> aXXXXbcdXXXe
    148   offsets.clear();
    149   for (size_t t = 0; t < 17; ++t)
    150     offsets.push_back(t);
    151   adjustments.clear();
    152   adjustments.push_back(AdjustOffset::Adjustment(0, 3, 0));
    153   adjustments.push_back(AdjustOffset::Adjustment(4, 4, 4));
    154   adjustments.push_back(AdjustOffset::Adjustment(11, 3, 3));
    155   adjustments.push_back(AdjustOffset::Adjustment(15, 2, 0));
    156   std::for_each(offsets.begin(), offsets.end(), AdjustOffset(adjustments));
    157   size_t expected_3[] = {kNpos, kNpos, kNpos, 0, 1, kNpos, kNpos, kNpos, 5, 6,
    158                          7, 8, kNpos, kNpos, 11, kNpos, kNpos};
    159   EXPECT_EQ(offsets.size(), arraysize(expected_3));
    160   for (size_t i = 0; i < arraysize(expected_3); ++i)
    161     EXPECT_EQ(expected_3[i], offsets[i]);
    162 }
    163 
    164 }  // namaspace base
    165