Home | History | Annotate | Download | only in strings
      1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include <algorithm>
      6 
      7 #include "base/logging.h"
      8 #include "base/strings/string_piece.h"
      9 #include "base/strings/utf_offset_string_conversions.h"
     10 #include "testing/gtest/include/gtest/gtest.h"
     11 
     12 namespace base {
     13 
     14 namespace {
     15 
     16 static const size_t kNpos = string16::npos;
     17 
     18 }  // namespace
     19 
     20 TEST(UTFOffsetStringConversionsTest, AdjustOffset) {
     21   struct UTF8ToUTF16Case {
     22     const char* utf8;
     23     size_t input_offset;
     24     size_t output_offset;
     25   } utf8_to_utf16_cases[] = {
     26     {"", 0, 0},
     27     {"", kNpos, kNpos},
     28     {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, kNpos},
     29     {"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1},
     30     {"\xed\xb0\x80z", 3, 1},
     31     {"A\xF0\x90\x8C\x80z", 1, 1},
     32     {"A\xF0\x90\x8C\x80z", 2, kNpos},
     33     {"A\xF0\x90\x8C\x80z", 5, 3},
     34     {"A\xF0\x90\x8C\x80z", 6, 4},
     35     {"A\xF0\x90\x8C\x80z", kNpos, kNpos},
     36   };
     37   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf8_to_utf16_cases); ++i) {
     38     size_t offset = utf8_to_utf16_cases[i].input_offset;
     39     UTF8ToUTF16AndAdjustOffset(utf8_to_utf16_cases[i].utf8, &offset);
     40     EXPECT_EQ(utf8_to_utf16_cases[i].output_offset, offset);
     41   }
     42 
     43   struct UTF16ToUTF8Case {
     44     char16 utf16[10];
     45     size_t input_offset;
     46     size_t output_offset;
     47   } utf16_to_utf8_cases[] = {
     48       {{}, 0, 0},
     49       // Converted to 3-byte utf-8 sequences
     50       {{0x5909, 0x63DB}, 3, kNpos},
     51       {{0x5909, 0x63DB}, 2, 6},
     52       {{0x5909, 0x63DB}, 1, 3},
     53       {{0x5909, 0x63DB}, 0, 0},
     54       // Converted to 2-byte utf-8 sequences
     55       {{'A', 0x00bc, 0x00be, 'z'}, 1, 1},
     56       {{'A', 0x00bc, 0x00be, 'z'}, 2, 3},
     57       {{'A', 0x00bc, 0x00be, 'z'}, 3, 5},
     58       {{'A', 0x00bc, 0x00be, 'z'}, 4, 6},
     59       // Surrogate pair
     60       {{'A', 0xd800, 0xdf00, 'z'}, 1, 1},
     61       {{'A', 0xd800, 0xdf00, 'z'}, 2, kNpos},
     62       {{'A', 0xd800, 0xdf00, 'z'}, 3, 5},
     63       {{'A', 0xd800, 0xdf00, 'z'}, 4, 6},
     64   };
     65   for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf16_to_utf8_cases); ++i) {
     66     size_t offset = utf16_to_utf8_cases[i].input_offset;
     67     UTF16ToUTF8AndAdjustOffset(utf16_to_utf8_cases[i].utf16, &offset);
     68     EXPECT_EQ(utf16_to_utf8_cases[i].output_offset, offset);
     69   }
     70 }
     71 
     72 TEST(UTFOffsetStringConversionsTest, LimitOffsets) {
     73   const size_t kLimit = 10;
     74   const size_t kItems = 20;
     75   std::vector<size_t> size_ts;
     76   for (size_t t = 0; t < kItems; ++t)
     77     size_ts.push_back(t);
     78   std::for_each(size_ts.begin(), size_ts.end(),
     79                 LimitOffset<string16>(kLimit));
     80   size_t unlimited_count = 0;
     81   for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
     82        ++ti) {
     83     if (*ti != kNpos)
     84       ++unlimited_count;
     85   }
     86   EXPECT_EQ(11U, unlimited_count);
     87 
     88   // Reverse the values in the vector and try again.
     89   size_ts.clear();
     90   for (size_t t = kItems; t > 0; --t)
     91     size_ts.push_back(t - 1);
     92   std::for_each(size_ts.begin(), size_ts.end(),
     93                 LimitOffset<string16>(kLimit));
     94   unlimited_count = 0;
     95   for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end();
     96        ++ti) {
     97     if (*ti != kNpos)
     98       ++unlimited_count;
     99   }
    100   EXPECT_EQ(11U, unlimited_count);
    101 }
    102 
    103 TEST(UTFOffsetStringConversionsTest, AdjustOffsets) {
    104   // Imagine we have strings as shown in the following cases where the
    105   // X's represent encoded characters.
    106   // 1: abcXXXdef ==> abcXdef
    107   {
    108     std::vector<size_t> offsets;
    109     for (size_t t = 0; t <= 9; ++t)
    110       offsets.push_back(t);
    111     {
    112       OffsetAdjuster offset_adjuster(&offsets);
    113       offset_adjuster.Add(OffsetAdjuster::Adjustment(3, 3, 1));
    114     }
    115     size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6, 7};
    116     EXPECT_EQ(offsets.size(), arraysize(expected_1));
    117     for (size_t i = 0; i < arraysize(expected_1); ++i)
    118       EXPECT_EQ(expected_1[i], offsets[i]);
    119   }
    120 
    121   // 2: XXXaXXXXbcXXXXXXXdefXXX ==> XaXXbcXXXXdefX
    122   {
    123     std::vector<size_t> offsets;
    124     for (size_t t = 0; t <= 23; ++t)
    125       offsets.push_back(t);
    126     {
    127       OffsetAdjuster offset_adjuster(&offsets);
    128       offset_adjuster.Add(OffsetAdjuster::Adjustment(0, 3, 1));
    129       offset_adjuster.Add(OffsetAdjuster::Adjustment(4, 4, 2));
    130       offset_adjuster.Add(OffsetAdjuster::Adjustment(10, 7, 4));
    131       offset_adjuster.Add(OffsetAdjuster::Adjustment(20, 3, 1));
    132     }
    133     size_t expected_2[] = {
    134       0, kNpos, kNpos, 1, 2, kNpos, kNpos, kNpos, 4, 5, 6, kNpos, kNpos, kNpos,
    135       kNpos, kNpos, kNpos, 10, 11, 12, 13, kNpos, kNpos, 14
    136     };
    137     EXPECT_EQ(offsets.size(), arraysize(expected_2));
    138     for (size_t i = 0; i < arraysize(expected_2); ++i)
    139       EXPECT_EQ(expected_2[i], offsets[i]);
    140   }
    141 
    142   // 3: XXXaXXXXbcdXXXeXX ==> aXXXXbcdXXXe
    143   {
    144     std::vector<size_t> offsets;
    145     for (size_t t = 0; t <= 17; ++t)
    146       offsets.push_back(t);
    147     {
    148       OffsetAdjuster offset_adjuster(&offsets);
    149       offset_adjuster.Add(OffsetAdjuster::Adjustment(0, 3, 0));
    150       offset_adjuster.Add(OffsetAdjuster::Adjustment(4, 4, 4));
    151       offset_adjuster.Add(OffsetAdjuster::Adjustment(11, 3, 3));
    152       offset_adjuster.Add(OffsetAdjuster::Adjustment(15, 2, 0));
    153     }
    154     size_t expected_3[] = {
    155       0, kNpos, kNpos, 0, 1, kNpos, kNpos, kNpos, 5, 6, 7, 8, kNpos, kNpos, 11,
    156       12, kNpos, 12
    157     };
    158     EXPECT_EQ(offsets.size(), arraysize(expected_3));
    159     for (size_t i = 0; i < arraysize(expected_3); ++i)
    160       EXPECT_EQ(expected_3[i], offsets[i]);
    161   }
    162 }
    163 
    164 }  // namaspace base
    165