1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <algorithm> 6 7 #include "base/logging.h" 8 #include "base/strings/string_piece.h" 9 #include "base/strings/utf_offset_string_conversions.h" 10 #include "testing/gtest/include/gtest/gtest.h" 11 12 namespace base { 13 14 namespace { 15 16 static const size_t kNpos = string16::npos; 17 18 } // namespace 19 20 TEST(UTFOffsetStringConversionsTest, AdjustOffset) { 21 struct UTF8ToUTF16Case { 22 const char* utf8; 23 size_t input_offset; 24 size_t output_offset; 25 } utf8_to_utf16_cases[] = { 26 {"", 0, kNpos}, 27 {"\xe4\xbd\xa0\xe5\xa5\xbd", 1, kNpos}, 28 {"\xe4\xbd\xa0\xe5\xa5\xbd", 3, 1}, 29 {"\xed\xb0\x80z", 3, 1}, 30 {"A\xF0\x90\x8C\x80z", 1, 1}, 31 {"A\xF0\x90\x8C\x80z", 2, kNpos}, 32 {"A\xF0\x90\x8C\x80z", 5, 3}, 33 }; 34 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf8_to_utf16_cases); ++i) { 35 size_t offset = utf8_to_utf16_cases[i].input_offset; 36 UTF8ToUTF16AndAdjustOffset(utf8_to_utf16_cases[i].utf8, &offset); 37 EXPECT_EQ(utf8_to_utf16_cases[i].output_offset, offset); 38 } 39 40 struct UTF16ToUTF8Case { 41 char16 utf16[10]; 42 size_t input_offset; 43 size_t output_offset; 44 } utf16_to_utf8_cases[] = { 45 {{}, 0, kNpos}, 46 // Converted to 3-byte utf-8 sequences 47 {{0x5909, 0x63DB}, 2, kNpos}, 48 {{0x5909, 0x63DB}, 1, 3}, 49 // Converted to 2-byte utf-8 sequences 50 {{'A', 0x00bc, 0x00be, 'z'}, 1, 1}, 51 {{'A', 0x00bc, 0x00be, 'z'}, 2, 3}, 52 {{'A', 0x00bc, 0x00be, 'z'}, 3, 5}, 53 // Surrogate pair 54 {{'A', 0xd800, 0xdf00, 'z'}, 1, 1}, 55 {{'A', 0xd800, 0xdf00, 'z'}, 2, kNpos}, 56 {{'A', 0xd800, 0xdf00, 'z'}, 3, 5}, 57 }; 58 for (size_t i = 0; i < ARRAYSIZE_UNSAFE(utf16_to_utf8_cases); ++i) { 59 size_t offset = utf16_to_utf8_cases[i].input_offset; 60 UTF16ToUTF8AndAdjustOffset(utf16_to_utf8_cases[i].utf16, &offset); 61 EXPECT_EQ(utf16_to_utf8_cases[i].output_offset, offset); 62 } 63 } 64 65 TEST(UTFOffsetStringConversionsTest, LimitOffsets) { 66 const size_t kLimit = 10; 67 const size_t kItems = 20; 68 std::vector<size_t> size_ts; 69 for (size_t t = 0; t < kItems; ++t) 70 size_ts.push_back(t); 71 std::for_each(size_ts.begin(), size_ts.end(), 72 LimitOffset<string16>(kLimit)); 73 size_t unlimited_count = 0; 74 for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end(); 75 ++ti) { 76 if (*ti < kLimit && *ti != kNpos) 77 ++unlimited_count; 78 } 79 EXPECT_EQ(10U, unlimited_count); 80 81 // Reverse the values in the vector and try again. 82 size_ts.clear(); 83 for (size_t t = kItems; t > 0; --t) 84 size_ts.push_back(t - 1); 85 std::for_each(size_ts.begin(), size_ts.end(), 86 LimitOffset<string16>(kLimit)); 87 unlimited_count = 0; 88 for (std::vector<size_t>::iterator ti = size_ts.begin(); ti != size_ts.end(); 89 ++ti) { 90 if (*ti < kLimit && *ti != kNpos) 91 ++unlimited_count; 92 } 93 EXPECT_EQ(10U, unlimited_count); 94 } 95 96 TEST(UTFOffsetStringConversionsTest, AdjustOffsets) { 97 // Imagine we have strings as shown in the following cases where the 98 // X's represent encoded characters. 99 // 1: abcXXXdef ==> abcXdef 100 { 101 std::vector<size_t> offsets; 102 for (size_t t = 0; t < 9; ++t) 103 offsets.push_back(t); 104 { 105 OffsetAdjuster offset_adjuster(&offsets); 106 offset_adjuster.Add(OffsetAdjuster::Adjustment(3, 3, 1)); 107 } 108 size_t expected_1[] = {0, 1, 2, 3, kNpos, kNpos, 4, 5, 6}; 109 EXPECT_EQ(offsets.size(), arraysize(expected_1)); 110 for (size_t i = 0; i < arraysize(expected_1); ++i) 111 EXPECT_EQ(expected_1[i], offsets[i]); 112 } 113 114 // 2: XXXaXXXXbcXXXXXXXdefXXX ==> XaXXbcXXXXdefX 115 { 116 std::vector<size_t> offsets; 117 for (size_t t = 0; t < 23; ++t) 118 offsets.push_back(t); 119 { 120 OffsetAdjuster offset_adjuster(&offsets); 121 offset_adjuster.Add(OffsetAdjuster::Adjustment(0, 3, 1)); 122 offset_adjuster.Add(OffsetAdjuster::Adjustment(4, 4, 2)); 123 offset_adjuster.Add(OffsetAdjuster::Adjustment(10, 7, 4)); 124 offset_adjuster.Add(OffsetAdjuster::Adjustment(20, 3, 1)); 125 } 126 size_t expected_2[] = {0, kNpos, kNpos, 1, 2, kNpos, kNpos, kNpos, 4, 5, 6, 127 kNpos, kNpos, kNpos, kNpos, kNpos, kNpos, 10, 11, 12, 128 13, kNpos, kNpos}; 129 EXPECT_EQ(offsets.size(), arraysize(expected_2)); 130 for (size_t i = 0; i < arraysize(expected_2); ++i) 131 EXPECT_EQ(expected_2[i], offsets[i]); 132 } 133 134 // 3: XXXaXXXXbcdXXXeXX ==> aXXXXbcdXXXe 135 { 136 std::vector<size_t> offsets; 137 for (size_t t = 0; t < 17; ++t) 138 offsets.push_back(t); 139 { 140 OffsetAdjuster offset_adjuster(&offsets); 141 offset_adjuster.Add(OffsetAdjuster::Adjustment(0, 3, 0)); 142 offset_adjuster.Add(OffsetAdjuster::Adjustment(4, 4, 4)); 143 offset_adjuster.Add(OffsetAdjuster::Adjustment(11, 3, 3)); 144 offset_adjuster.Add(OffsetAdjuster::Adjustment(15, 2, 0)); 145 } 146 size_t expected_3[] = {kNpos, kNpos, kNpos, 0, 1, kNpos, kNpos, kNpos, 5, 6, 147 7, 8, kNpos, kNpos, 11, kNpos, kNpos}; 148 EXPECT_EQ(offsets.size(), arraysize(expected_3)); 149 for (size_t i = 0; i < arraysize(expected_3); ++i) 150 EXPECT_EQ(expected_3[i], offsets[i]); 151 } 152 } 153 154 } // namaspace base 155