1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "base/strings/string_util.h" 6 7 #include <math.h> 8 #include <stdarg.h> 9 #include <stddef.h> 10 #include <stdint.h> 11 12 #include <algorithm> 13 14 #include "base/macros.h" 15 #include "base/strings/string16.h" 16 #include "base/strings/utf_string_conversions.h" 17 #include "testing/gmock/include/gmock/gmock.h" 18 #include "testing/gtest/include/gtest/gtest.h" 19 20 using ::testing::ElementsAre; 21 22 namespace base { 23 24 static const struct trim_case { 25 const wchar_t* input; 26 const TrimPositions positions; 27 const wchar_t* output; 28 const TrimPositions return_value; 29 } trim_cases[] = { 30 {L" Google Video ", TRIM_LEADING, L"Google Video ", TRIM_LEADING}, 31 {L" Google Video ", TRIM_TRAILING, L" Google Video", TRIM_TRAILING}, 32 {L" Google Video ", TRIM_ALL, L"Google Video", TRIM_ALL}, 33 {L"Google Video", TRIM_ALL, L"Google Video", TRIM_NONE}, 34 {L"", TRIM_ALL, L"", TRIM_NONE}, 35 {L" ", TRIM_LEADING, L"", TRIM_LEADING}, 36 {L" ", TRIM_TRAILING, L"", TRIM_TRAILING}, 37 {L" ", TRIM_ALL, L"", TRIM_ALL}, 38 {L"\t\rTest String\n", TRIM_ALL, L"Test String", TRIM_ALL}, 39 {L"\x2002Test String\x00A0\x3000", TRIM_ALL, L"Test String", TRIM_ALL}, 40 }; 41 42 static const struct trim_case_ascii { 43 const char* input; 44 const TrimPositions positions; 45 const char* output; 46 const TrimPositions return_value; 47 } trim_cases_ascii[] = { 48 {" Google Video ", TRIM_LEADING, "Google Video ", TRIM_LEADING}, 49 {" Google Video ", TRIM_TRAILING, " Google Video", TRIM_TRAILING}, 50 {" Google Video ", TRIM_ALL, "Google Video", TRIM_ALL}, 51 {"Google Video", TRIM_ALL, "Google Video", TRIM_NONE}, 52 {"", TRIM_ALL, "", TRIM_NONE}, 53 {" ", TRIM_LEADING, "", TRIM_LEADING}, 54 {" ", TRIM_TRAILING, "", TRIM_TRAILING}, 55 {" ", TRIM_ALL, "", TRIM_ALL}, 56 {"\t\rTest String\n", TRIM_ALL, "Test String", TRIM_ALL}, 57 }; 58 59 namespace { 60 61 // Helper used to test TruncateUTF8ToByteSize. 62 bool Truncated(const std::string& input, 63 const size_t byte_size, 64 std::string* output) { 65 size_t prev = input.length(); 66 TruncateUTF8ToByteSize(input, byte_size, output); 67 return prev != output->length(); 68 } 69 70 } // namespace 71 72 TEST(StringUtilTest, TruncateUTF8ToByteSize) { 73 std::string output; 74 75 // Empty strings and invalid byte_size arguments 76 EXPECT_FALSE(Truncated(std::string(), 0, &output)); 77 EXPECT_EQ(output, ""); 78 EXPECT_TRUE(Truncated("\xe1\x80\xbf", 0, &output)); 79 EXPECT_EQ(output, ""); 80 EXPECT_FALSE(Truncated("\xe1\x80\xbf", static_cast<size_t>(-1), &output)); 81 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 4, &output)); 82 83 // Testing the truncation of valid UTF8 correctly 84 EXPECT_TRUE(Truncated("abc", 2, &output)); 85 EXPECT_EQ(output, "ab"); 86 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 2, &output)); 87 EXPECT_EQ(output.compare("\xc2\x81"), 0); 88 EXPECT_TRUE(Truncated("\xc2\x81\xc2\x81", 3, &output)); 89 EXPECT_EQ(output.compare("\xc2\x81"), 0); 90 EXPECT_FALSE(Truncated("\xc2\x81\xc2\x81", 4, &output)); 91 EXPECT_EQ(output.compare("\xc2\x81\xc2\x81"), 0); 92 93 { 94 const char array[] = "\x00\x00\xc2\x81\xc2\x81"; 95 const std::string array_string(array, arraysize(array)); 96 EXPECT_TRUE(Truncated(array_string, 4, &output)); 97 EXPECT_EQ(output.compare(std::string("\x00\x00\xc2\x81", 4)), 0); 98 } 99 100 { 101 const char array[] = "\x00\xc2\x81\xc2\x81"; 102 const std::string array_string(array, arraysize(array)); 103 EXPECT_TRUE(Truncated(array_string, 4, &output)); 104 EXPECT_EQ(output.compare(std::string("\x00\xc2\x81", 3)), 0); 105 } 106 107 // Testing invalid UTF8 108 EXPECT_TRUE(Truncated("\xed\xa0\x80\xed\xbf\xbf", 6, &output)); 109 EXPECT_EQ(output.compare(""), 0); 110 EXPECT_TRUE(Truncated("\xed\xa0\x8f", 3, &output)); 111 EXPECT_EQ(output.compare(""), 0); 112 EXPECT_TRUE(Truncated("\xed\xbf\xbf", 3, &output)); 113 EXPECT_EQ(output.compare(""), 0); 114 115 // Testing invalid UTF8 mixed with valid UTF8 116 EXPECT_FALSE(Truncated("\xe1\x80\xbf", 3, &output)); 117 EXPECT_EQ(output.compare("\xe1\x80\xbf"), 0); 118 EXPECT_FALSE(Truncated("\xf1\x80\xa0\xbf", 4, &output)); 119 EXPECT_EQ(output.compare("\xf1\x80\xa0\xbf"), 0); 120 EXPECT_FALSE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf", 121 10, &output)); 122 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf"), 0); 123 EXPECT_TRUE(Truncated("a\xc2\x81\xe1\x80\xbf\xf1""a""\x80\xa0", 124 10, &output)); 125 EXPECT_EQ(output.compare("a\xc2\x81\xe1\x80\xbf\xf1""a"), 0); 126 EXPECT_FALSE(Truncated("\xef\xbb\xbf" "abc", 6, &output)); 127 EXPECT_EQ(output.compare("\xef\xbb\xbf" "abc"), 0); 128 129 // Overlong sequences 130 EXPECT_TRUE(Truncated("\xc0\x80", 2, &output)); 131 EXPECT_EQ(output.compare(""), 0); 132 EXPECT_TRUE(Truncated("\xc1\x80\xc1\x81", 4, &output)); 133 EXPECT_EQ(output.compare(""), 0); 134 EXPECT_TRUE(Truncated("\xe0\x80\x80", 3, &output)); 135 EXPECT_EQ(output.compare(""), 0); 136 EXPECT_TRUE(Truncated("\xe0\x82\x80", 3, &output)); 137 EXPECT_EQ(output.compare(""), 0); 138 EXPECT_TRUE(Truncated("\xe0\x9f\xbf", 3, &output)); 139 EXPECT_EQ(output.compare(""), 0); 140 EXPECT_TRUE(Truncated("\xf0\x80\x80\x8D", 4, &output)); 141 EXPECT_EQ(output.compare(""), 0); 142 EXPECT_TRUE(Truncated("\xf0\x80\x82\x91", 4, &output)); 143 EXPECT_EQ(output.compare(""), 0); 144 EXPECT_TRUE(Truncated("\xf0\x80\xa0\x80", 4, &output)); 145 EXPECT_EQ(output.compare(""), 0); 146 EXPECT_TRUE(Truncated("\xf0\x8f\xbb\xbf", 4, &output)); 147 EXPECT_EQ(output.compare(""), 0); 148 EXPECT_TRUE(Truncated("\xf8\x80\x80\x80\xbf", 5, &output)); 149 EXPECT_EQ(output.compare(""), 0); 150 EXPECT_TRUE(Truncated("\xfc\x80\x80\x80\xa0\xa5", 6, &output)); 151 EXPECT_EQ(output.compare(""), 0); 152 153 // Beyond U+10FFFF (the upper limit of Unicode codespace) 154 EXPECT_TRUE(Truncated("\xf4\x90\x80\x80", 4, &output)); 155 EXPECT_EQ(output.compare(""), 0); 156 EXPECT_TRUE(Truncated("\xf8\xa0\xbf\x80\xbf", 5, &output)); 157 EXPECT_EQ(output.compare(""), 0); 158 EXPECT_TRUE(Truncated("\xfc\x9c\xbf\x80\xbf\x80", 6, &output)); 159 EXPECT_EQ(output.compare(""), 0); 160 161 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) 162 EXPECT_TRUE(Truncated("\xfe\xff", 2, &output)); 163 EXPECT_EQ(output.compare(""), 0); 164 EXPECT_TRUE(Truncated("\xff\xfe", 2, &output)); 165 EXPECT_EQ(output.compare(""), 0); 166 167 { 168 const char array[] = "\x00\x00\xfe\xff"; 169 const std::string array_string(array, arraysize(array)); 170 EXPECT_TRUE(Truncated(array_string, 4, &output)); 171 EXPECT_EQ(output.compare(std::string("\x00\x00", 2)), 0); 172 } 173 174 // Variants on the previous test 175 { 176 const char array[] = "\xff\xfe\x00\x00"; 177 const std::string array_string(array, 4); 178 EXPECT_FALSE(Truncated(array_string, 4, &output)); 179 EXPECT_EQ(output.compare(std::string("\xff\xfe\x00\x00", 4)), 0); 180 } 181 { 182 const char array[] = "\xff\x00\x00\xfe"; 183 const std::string array_string(array, arraysize(array)); 184 EXPECT_TRUE(Truncated(array_string, 4, &output)); 185 EXPECT_EQ(output.compare(std::string("\xff\x00\x00", 3)), 0); 186 } 187 188 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> 189 EXPECT_TRUE(Truncated("\xef\xbf\xbe", 3, &output)); 190 EXPECT_EQ(output.compare(""), 0); 191 EXPECT_TRUE(Truncated("\xf0\x8f\xbf\xbe", 4, &output)); 192 EXPECT_EQ(output.compare(""), 0); 193 EXPECT_TRUE(Truncated("\xf3\xbf\xbf\xbf", 4, &output)); 194 EXPECT_EQ(output.compare(""), 0); 195 EXPECT_TRUE(Truncated("\xef\xb7\x90", 3, &output)); 196 EXPECT_EQ(output.compare(""), 0); 197 EXPECT_TRUE(Truncated("\xef\xb7\xaf", 3, &output)); 198 EXPECT_EQ(output.compare(""), 0); 199 200 // Strings in legacy encodings that are valid in UTF-8, but 201 // are invalid as UTF-8 in real data. 202 EXPECT_TRUE(Truncated("caf\xe9", 4, &output)); 203 EXPECT_EQ(output.compare("caf"), 0); 204 EXPECT_TRUE(Truncated("\xb0\xa1\xb0\xa2", 4, &output)); 205 EXPECT_EQ(output.compare(""), 0); 206 EXPECT_FALSE(Truncated("\xa7\x41\xa6\x6e", 4, &output)); 207 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); 208 EXPECT_TRUE(Truncated("\xa7\x41\xa6\x6e\xd9\xee\xe4\xee", 7, 209 &output)); 210 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); 211 212 // Testing using the same string as input and output. 213 EXPECT_FALSE(Truncated(output, 4, &output)); 214 EXPECT_EQ(output.compare("\xa7\x41\xa6\x6e"), 0); 215 EXPECT_TRUE(Truncated(output, 3, &output)); 216 EXPECT_EQ(output.compare("\xa7\x41"), 0); 217 218 // "abc" with U+201[CD] in windows-125[0-8] 219 EXPECT_TRUE(Truncated("\x93" "abc\x94", 5, &output)); 220 EXPECT_EQ(output.compare("\x93" "abc"), 0); 221 222 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 223 EXPECT_TRUE(Truncated("\xd9\xee\xe4\xee", 4, &output)); 224 EXPECT_EQ(output.compare(""), 0); 225 226 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 227 EXPECT_TRUE(Truncated("\xe3\xe5\xe9\xdC", 4, &output)); 228 EXPECT_EQ(output.compare(""), 0); 229 } 230 231 TEST(StringUtilTest, TrimWhitespace) { 232 string16 output; // Allow contents to carry over to next testcase 233 for (size_t i = 0; i < arraysize(trim_cases); ++i) { 234 const trim_case& value = trim_cases[i]; 235 EXPECT_EQ(value.return_value, 236 TrimWhitespace(WideToUTF16(value.input), value.positions, 237 &output)); 238 EXPECT_EQ(WideToUTF16(value.output), output); 239 } 240 241 // Test that TrimWhitespace() can take the same string for input and output 242 output = ASCIIToUTF16(" This is a test \r\n"); 243 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); 244 EXPECT_EQ(ASCIIToUTF16("This is a test"), output); 245 246 // Once more, but with a string of whitespace 247 output = ASCIIToUTF16(" \r\n"); 248 EXPECT_EQ(TRIM_ALL, TrimWhitespace(output, TRIM_ALL, &output)); 249 EXPECT_EQ(string16(), output); 250 251 std::string output_ascii; 252 for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) { 253 const trim_case_ascii& value = trim_cases_ascii[i]; 254 EXPECT_EQ(value.return_value, 255 TrimWhitespaceASCII(value.input, value.positions, &output_ascii)); 256 EXPECT_EQ(value.output, output_ascii); 257 } 258 } 259 260 static const struct collapse_case { 261 const wchar_t* input; 262 const bool trim; 263 const wchar_t* output; 264 } collapse_cases[] = { 265 {L" Google Video ", false, L"Google Video"}, 266 {L"Google Video", false, L"Google Video"}, 267 {L"", false, L""}, 268 {L" ", false, L""}, 269 {L"\t\rTest String\n", false, L"Test String"}, 270 {L"\x2002Test String\x00A0\x3000", false, L"Test String"}, 271 {L" Test \n \t String ", false, L"Test String"}, 272 {L"\x2002Test\x1680 \x2028 \tString\x00A0\x3000", false, L"Test String"}, 273 {L" Test String", false, L"Test String"}, 274 {L"Test String ", false, L"Test String"}, 275 {L"Test String", false, L"Test String"}, 276 {L"", true, L""}, 277 {L"\n", true, L""}, 278 {L" \r ", true, L""}, 279 {L"\nFoo", true, L"Foo"}, 280 {L"\r Foo ", true, L"Foo"}, 281 {L" Foo bar ", true, L"Foo bar"}, 282 {L" \tFoo bar \n", true, L"Foo bar"}, 283 {L" a \r b\n c \r\n d \t\re \t f \n ", true, L"abcde f"}, 284 }; 285 286 TEST(StringUtilTest, CollapseWhitespace) { 287 for (size_t i = 0; i < arraysize(collapse_cases); ++i) { 288 const collapse_case& value = collapse_cases[i]; 289 EXPECT_EQ(WideToUTF16(value.output), 290 CollapseWhitespace(WideToUTF16(value.input), value.trim)); 291 } 292 } 293 294 static const struct collapse_case_ascii { 295 const char* input; 296 const bool trim; 297 const char* output; 298 } collapse_cases_ascii[] = { 299 {" Google Video ", false, "Google Video"}, 300 {"Google Video", false, "Google Video"}, 301 {"", false, ""}, 302 {" ", false, ""}, 303 {"\t\rTest String\n", false, "Test String"}, 304 {" Test \n \t String ", false, "Test String"}, 305 {" Test String", false, "Test String"}, 306 {"Test String ", false, "Test String"}, 307 {"Test String", false, "Test String"}, 308 {"", true, ""}, 309 {"\n", true, ""}, 310 {" \r ", true, ""}, 311 {"\nFoo", true, "Foo"}, 312 {"\r Foo ", true, "Foo"}, 313 {" Foo bar ", true, "Foo bar"}, 314 {" \tFoo bar \n", true, "Foo bar"}, 315 {" a \r b\n c \r\n d \t\re \t f \n ", true, "abcde f"}, 316 }; 317 318 TEST(StringUtilTest, CollapseWhitespaceASCII) { 319 for (size_t i = 0; i < arraysize(collapse_cases_ascii); ++i) { 320 const collapse_case_ascii& value = collapse_cases_ascii[i]; 321 EXPECT_EQ(value.output, CollapseWhitespaceASCII(value.input, value.trim)); 322 } 323 } 324 325 TEST(StringUtilTest, IsStringUTF8) { 326 EXPECT_TRUE(IsStringUTF8("abc")); 327 EXPECT_TRUE(IsStringUTF8("\xc2\x81")); 328 EXPECT_TRUE(IsStringUTF8("\xe1\x80\xbf")); 329 EXPECT_TRUE(IsStringUTF8("\xf1\x80\xa0\xbf")); 330 EXPECT_TRUE(IsStringUTF8("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf")); 331 EXPECT_TRUE(IsStringUTF8("\xef\xbb\xbf" "abc")); // UTF-8 BOM 332 333 // surrogate code points 334 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x80\xed\xbf\xbf")); 335 EXPECT_FALSE(IsStringUTF8("\xed\xa0\x8f")); 336 EXPECT_FALSE(IsStringUTF8("\xed\xbf\xbf")); 337 338 // overlong sequences 339 EXPECT_FALSE(IsStringUTF8("\xc0\x80")); // U+0000 340 EXPECT_FALSE(IsStringUTF8("\xc1\x80\xc1\x81")); // "AB" 341 EXPECT_FALSE(IsStringUTF8("\xe0\x80\x80")); // U+0000 342 EXPECT_FALSE(IsStringUTF8("\xe0\x82\x80")); // U+0080 343 EXPECT_FALSE(IsStringUTF8("\xe0\x9f\xbf")); // U+07ff 344 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x80\x8D")); // U+000D 345 EXPECT_FALSE(IsStringUTF8("\xf0\x80\x82\x91")); // U+0091 346 EXPECT_FALSE(IsStringUTF8("\xf0\x80\xa0\x80")); // U+0800 347 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbb\xbf")); // U+FEFF (BOM) 348 EXPECT_FALSE(IsStringUTF8("\xf8\x80\x80\x80\xbf")); // U+003F 349 EXPECT_FALSE(IsStringUTF8("\xfc\x80\x80\x80\xa0\xa5")); // U+00A5 350 351 // Beyond U+10FFFF (the upper limit of Unicode codespace) 352 EXPECT_FALSE(IsStringUTF8("\xf4\x90\x80\x80")); // U+110000 353 EXPECT_FALSE(IsStringUTF8("\xf8\xa0\xbf\x80\xbf")); // 5 bytes 354 EXPECT_FALSE(IsStringUTF8("\xfc\x9c\xbf\x80\xbf\x80")); // 6 bytes 355 356 // BOMs in UTF-16(BE|LE) and UTF-32(BE|LE) 357 EXPECT_FALSE(IsStringUTF8("\xfe\xff")); 358 EXPECT_FALSE(IsStringUTF8("\xff\xfe")); 359 EXPECT_FALSE(IsStringUTF8(std::string("\x00\x00\xfe\xff", 4))); 360 EXPECT_FALSE(IsStringUTF8("\xff\xfe\x00\x00")); 361 362 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF> 363 EXPECT_FALSE(IsStringUTF8("\xef\xbf\xbe")); // U+FFFE) 364 EXPECT_FALSE(IsStringUTF8("\xf0\x8f\xbf\xbe")); // U+1FFFE 365 EXPECT_FALSE(IsStringUTF8("\xf3\xbf\xbf\xbf")); // U+10FFFF 366 EXPECT_FALSE(IsStringUTF8("\xef\xb7\x90")); // U+FDD0 367 EXPECT_FALSE(IsStringUTF8("\xef\xb7\xaf")); // U+FDEF 368 // Strings in legacy encodings. We can certainly make up strings 369 // in a legacy encoding that are valid in UTF-8, but in real data, 370 // most of them are invalid as UTF-8. 371 EXPECT_FALSE(IsStringUTF8("caf\xe9")); // cafe with U+00E9 in ISO-8859-1 372 EXPECT_FALSE(IsStringUTF8("\xb0\xa1\xb0\xa2")); // U+AC00, U+AC001 in EUC-KR 373 EXPECT_FALSE(IsStringUTF8("\xa7\x41\xa6\x6e")); // U+4F60 U+597D in Big5 374 // "abc" with U+201[CD] in windows-125[0-8] 375 EXPECT_FALSE(IsStringUTF8("\x93" "abc\x94")); 376 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 377 EXPECT_FALSE(IsStringUTF8("\xd9\xee\xe4\xee")); 378 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 379 EXPECT_FALSE(IsStringUTF8("\xe3\xe5\xe9\xdC")); 380 381 // Check that we support Embedded Nulls. The first uses the canonical UTF-8 382 // representation, and the second uses a 2-byte sequence. The second version 383 // is invalid UTF-8 since UTF-8 states that the shortest encoding for a 384 // given codepoint must be used. 385 static const char kEmbeddedNull[] = "embedded\0null"; 386 EXPECT_TRUE(IsStringUTF8( 387 std::string(kEmbeddedNull, sizeof(kEmbeddedNull)))); 388 EXPECT_FALSE(IsStringUTF8("embedded\xc0\x80U+0000")); 389 } 390 391 TEST(StringUtilTest, IsStringASCII) { 392 static char char_ascii[] = 393 "0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"; 394 static char16 char16_ascii[] = { 395 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '0', 'A', 396 'B', 'C', 'D', 'E', 'F', '0', '1', '2', '3', '4', '5', '6', 397 '7', '8', '9', '0', 'A', 'B', 'C', 'D', 'E', 'F', 0 }; 398 static std::wstring wchar_ascii( 399 L"0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"); 400 401 // Test a variety of the fragment start positions and lengths in order to make 402 // sure that bit masking in IsStringASCII works correctly. 403 // Also, test that a non-ASCII character will be detected regardless of its 404 // position inside the string. 405 { 406 const size_t string_length = arraysize(char_ascii) - 1; 407 for (size_t offset = 0; offset < 8; ++offset) { 408 for (size_t len = 0, max_len = string_length - offset; len < max_len; 409 ++len) { 410 EXPECT_TRUE(IsStringASCII(StringPiece(char_ascii + offset, len))); 411 for (size_t char_pos = offset; char_pos < len; ++char_pos) { 412 char_ascii[char_pos] |= '\x80'; 413 EXPECT_FALSE(IsStringASCII(StringPiece(char_ascii + offset, len))); 414 char_ascii[char_pos] &= ~'\x80'; 415 } 416 } 417 } 418 } 419 420 { 421 const size_t string_length = arraysize(char16_ascii) - 1; 422 for (size_t offset = 0; offset < 4; ++offset) { 423 for (size_t len = 0, max_len = string_length - offset; len < max_len; 424 ++len) { 425 EXPECT_TRUE(IsStringASCII(StringPiece16(char16_ascii + offset, len))); 426 for (size_t char_pos = offset; char_pos < len; ++char_pos) { 427 char16_ascii[char_pos] |= 0x80; 428 EXPECT_FALSE( 429 IsStringASCII(StringPiece16(char16_ascii + offset, len))); 430 char16_ascii[char_pos] &= ~0x80; 431 // Also test when the upper half is non-zero. 432 char16_ascii[char_pos] |= 0x100; 433 EXPECT_FALSE( 434 IsStringASCII(StringPiece16(char16_ascii + offset, len))); 435 char16_ascii[char_pos] &= ~0x100; 436 } 437 } 438 } 439 } 440 441 { 442 const size_t string_length = wchar_ascii.length(); 443 for (size_t len = 0; len < string_length; ++len) { 444 EXPECT_TRUE(IsStringASCII(wchar_ascii.substr(0, len))); 445 for (size_t char_pos = 0; char_pos < len; ++char_pos) { 446 wchar_ascii[char_pos] |= 0x80; 447 EXPECT_FALSE( 448 IsStringASCII(wchar_ascii.substr(0, len))); 449 wchar_ascii[char_pos] &= ~0x80; 450 wchar_ascii[char_pos] |= 0x100; 451 EXPECT_FALSE( 452 IsStringASCII(wchar_ascii.substr(0, len))); 453 wchar_ascii[char_pos] &= ~0x100; 454 #if defined(WCHAR_T_IS_UTF32) 455 wchar_ascii[char_pos] |= 0x10000; 456 EXPECT_FALSE( 457 IsStringASCII(wchar_ascii.substr(0, len))); 458 wchar_ascii[char_pos] &= ~0x10000; 459 #endif // WCHAR_T_IS_UTF32 460 } 461 } 462 } 463 } 464 465 TEST(StringUtilTest, ConvertASCII) { 466 static const char* const char_cases[] = { 467 "Google Video", 468 "Hello, world\n", 469 "0123ABCDwxyz \a\b\t\r\n!+,.~" 470 }; 471 472 static const wchar_t* const wchar_cases[] = { 473 L"Google Video", 474 L"Hello, world\n", 475 L"0123ABCDwxyz \a\b\t\r\n!+,.~" 476 }; 477 478 for (size_t i = 0; i < arraysize(char_cases); ++i) { 479 EXPECT_TRUE(IsStringASCII(char_cases[i])); 480 string16 utf16 = ASCIIToUTF16(char_cases[i]); 481 EXPECT_EQ(WideToUTF16(wchar_cases[i]), utf16); 482 483 std::string ascii = UTF16ToASCII(WideToUTF16(wchar_cases[i])); 484 EXPECT_EQ(char_cases[i], ascii); 485 } 486 487 EXPECT_FALSE(IsStringASCII("Google \x80Video")); 488 489 // Convert empty strings. 490 string16 empty16; 491 std::string empty; 492 EXPECT_EQ(empty, UTF16ToASCII(empty16)); 493 EXPECT_EQ(empty16, ASCIIToUTF16(empty)); 494 495 // Convert strings with an embedded NUL character. 496 const char chars_with_nul[] = "test\0string"; 497 const int length_with_nul = arraysize(chars_with_nul) - 1; 498 std::string string_with_nul(chars_with_nul, length_with_nul); 499 string16 string16_with_nul = ASCIIToUTF16(string_with_nul); 500 EXPECT_EQ(static_cast<string16::size_type>(length_with_nul), 501 string16_with_nul.length()); 502 std::string narrow_with_nul = UTF16ToASCII(string16_with_nul); 503 EXPECT_EQ(static_cast<std::string::size_type>(length_with_nul), 504 narrow_with_nul.length()); 505 EXPECT_EQ(0, string_with_nul.compare(narrow_with_nul)); 506 } 507 508 TEST(StringUtilTest, ToLowerASCII) { 509 EXPECT_EQ('c', ToLowerASCII('C')); 510 EXPECT_EQ('c', ToLowerASCII('c')); 511 EXPECT_EQ('2', ToLowerASCII('2')); 512 513 EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('C'))); 514 EXPECT_EQ(static_cast<char16>('c'), ToLowerASCII(static_cast<char16>('c'))); 515 EXPECT_EQ(static_cast<char16>('2'), ToLowerASCII(static_cast<char16>('2'))); 516 517 EXPECT_EQ("cc2", ToLowerASCII("Cc2")); 518 EXPECT_EQ(ASCIIToUTF16("cc2"), ToLowerASCII(ASCIIToUTF16("Cc2"))); 519 } 520 521 TEST(StringUtilTest, ToUpperASCII) { 522 EXPECT_EQ('C', ToUpperASCII('C')); 523 EXPECT_EQ('C', ToUpperASCII('c')); 524 EXPECT_EQ('2', ToUpperASCII('2')); 525 526 EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('C'))); 527 EXPECT_EQ(static_cast<char16>('C'), ToUpperASCII(static_cast<char16>('c'))); 528 EXPECT_EQ(static_cast<char16>('2'), ToUpperASCII(static_cast<char16>('2'))); 529 530 EXPECT_EQ("CC2", ToUpperASCII("Cc2")); 531 EXPECT_EQ(ASCIIToUTF16("CC2"), ToUpperASCII(ASCIIToUTF16("Cc2"))); 532 } 533 534 TEST(StringUtilTest, LowerCaseEqualsASCII) { 535 static const struct { 536 const char* src_a; 537 const char* dst; 538 } lowercase_cases[] = { 539 { "FoO", "foo" }, 540 { "foo", "foo" }, 541 { "FOO", "foo" }, 542 }; 543 544 for (size_t i = 0; i < arraysize(lowercase_cases); ++i) { 545 EXPECT_TRUE(LowerCaseEqualsASCII(ASCIIToUTF16(lowercase_cases[i].src_a), 546 lowercase_cases[i].dst)); 547 EXPECT_TRUE(LowerCaseEqualsASCII(lowercase_cases[i].src_a, 548 lowercase_cases[i].dst)); 549 } 550 } 551 552 TEST(StringUtilTest, FormatBytesUnlocalized) { 553 static const struct { 554 int64_t bytes; 555 const char* expected; 556 } cases[] = { 557 // Expected behavior: we show one post-decimal digit when we have 558 // under two pre-decimal digits, except in cases where it makes no 559 // sense (zero or bytes). 560 // Since we switch units once we cross the 1000 mark, this keeps 561 // the display of file sizes or bytes consistently around three 562 // digits. 563 {0, "0 B"}, 564 {512, "512 B"}, 565 {1024*1024, "1.0 MB"}, 566 {1024*1024*1024, "1.0 GB"}, 567 {10LL*1024*1024*1024, "10.0 GB"}, 568 {99LL*1024*1024*1024, "99.0 GB"}, 569 {105LL*1024*1024*1024, "105 GB"}, 570 {105LL*1024*1024*1024 + 500LL*1024*1024, "105 GB"}, 571 {~(1LL << 63), "8192 PB"}, 572 573 {99*1024 + 103, "99.1 kB"}, 574 {1024*1024 + 103, "1.0 MB"}, 575 {1024*1024 + 205 * 1024, "1.2 MB"}, 576 {1024*1024*1024 + (927 * 1024*1024), "1.9 GB"}, 577 {10LL*1024*1024*1024, "10.0 GB"}, 578 {100LL*1024*1024*1024, "100 GB"}, 579 }; 580 581 for (size_t i = 0; i < arraysize(cases); ++i) { 582 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), 583 FormatBytesUnlocalized(cases[i].bytes)); 584 } 585 } 586 TEST(StringUtilTest, ReplaceSubstringsAfterOffset) { 587 static const struct { 588 const char* str; 589 string16::size_type start_offset; 590 const char* find_this; 591 const char* replace_with; 592 const char* expected; 593 } cases[] = { 594 {"aaa", 0, "a", "b", "bbb"}, 595 {"abb", 0, "ab", "a", "ab"}, 596 {"Removing some substrings inging", 0, "ing", "", "Remov some substrs "}, 597 {"Not found", 0, "x", "0", "Not found"}, 598 {"Not found again", 5, "x", "0", "Not found again"}, 599 {" Making it much longer ", 0, " ", "Four score and seven years ago", 600 "Four score and seven years agoMakingFour score and seven years agoit" 601 "Four score and seven years agomuchFour score and seven years agolonger" 602 "Four score and seven years ago"}, 603 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, 604 {"Replace me only me once", 9, "me ", "", "Replace me only once"}, 605 {"abababab", 2, "ab", "c", "abccc"}, 606 }; 607 608 for (size_t i = 0; i < arraysize(cases); i++) { 609 string16 str = ASCIIToUTF16(cases[i].str); 610 ReplaceSubstringsAfterOffset(&str, cases[i].start_offset, 611 ASCIIToUTF16(cases[i].find_this), 612 ASCIIToUTF16(cases[i].replace_with)); 613 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); 614 } 615 } 616 617 TEST(StringUtilTest, ReplaceFirstSubstringAfterOffset) { 618 static const struct { 619 const char* str; 620 string16::size_type start_offset; 621 const char* find_this; 622 const char* replace_with; 623 const char* expected; 624 } cases[] = { 625 {"aaa", 0, "a", "b", "baa"}, 626 {"abb", 0, "ab", "a", "ab"}, 627 {"Removing some substrings inging", 0, "ing", "", 628 "Remov some substrings inging"}, 629 {"Not found", 0, "x", "0", "Not found"}, 630 {"Not found again", 5, "x", "0", "Not found again"}, 631 {" Making it much longer ", 0, " ", "Four score and seven years ago", 632 "Four score and seven years agoMaking it much longer "}, 633 {"Invalid offset", 9999, "t", "foobar", "Invalid offset"}, 634 {"Replace me only me once", 4, "me ", "", "Replace only me once"}, 635 {"abababab", 2, "ab", "c", "abcabab"}, 636 }; 637 638 for (size_t i = 0; i < arraysize(cases); i++) { 639 string16 str = ASCIIToUTF16(cases[i].str); 640 ReplaceFirstSubstringAfterOffset(&str, cases[i].start_offset, 641 ASCIIToUTF16(cases[i].find_this), 642 ASCIIToUTF16(cases[i].replace_with)); 643 EXPECT_EQ(ASCIIToUTF16(cases[i].expected), str); 644 } 645 } 646 647 TEST(StringUtilTest, HexDigitToInt) { 648 EXPECT_EQ(0, HexDigitToInt('0')); 649 EXPECT_EQ(1, HexDigitToInt('1')); 650 EXPECT_EQ(2, HexDigitToInt('2')); 651 EXPECT_EQ(3, HexDigitToInt('3')); 652 EXPECT_EQ(4, HexDigitToInt('4')); 653 EXPECT_EQ(5, HexDigitToInt('5')); 654 EXPECT_EQ(6, HexDigitToInt('6')); 655 EXPECT_EQ(7, HexDigitToInt('7')); 656 EXPECT_EQ(8, HexDigitToInt('8')); 657 EXPECT_EQ(9, HexDigitToInt('9')); 658 EXPECT_EQ(10, HexDigitToInt('A')); 659 EXPECT_EQ(11, HexDigitToInt('B')); 660 EXPECT_EQ(12, HexDigitToInt('C')); 661 EXPECT_EQ(13, HexDigitToInt('D')); 662 EXPECT_EQ(14, HexDigitToInt('E')); 663 EXPECT_EQ(15, HexDigitToInt('F')); 664 665 // Verify the lower case as well. 666 EXPECT_EQ(10, HexDigitToInt('a')); 667 EXPECT_EQ(11, HexDigitToInt('b')); 668 EXPECT_EQ(12, HexDigitToInt('c')); 669 EXPECT_EQ(13, HexDigitToInt('d')); 670 EXPECT_EQ(14, HexDigitToInt('e')); 671 EXPECT_EQ(15, HexDigitToInt('f')); 672 } 673 674 TEST(StringUtilTest, JoinString) { 675 std::string separator(", "); 676 std::vector<std::string> parts; 677 EXPECT_EQ(std::string(), JoinString(parts, separator)); 678 679 parts.push_back("a"); 680 EXPECT_EQ("a", JoinString(parts, separator)); 681 682 parts.push_back("b"); 683 parts.push_back("c"); 684 EXPECT_EQ("a, b, c", JoinString(parts, separator)); 685 686 parts.push_back(std::string()); 687 EXPECT_EQ("a, b, c, ", JoinString(parts, separator)); 688 parts.push_back(" "); 689 EXPECT_EQ("a|b|c|| ", JoinString(parts, "|")); 690 } 691 692 TEST(StringUtilTest, JoinString16) { 693 string16 separator = ASCIIToUTF16(", "); 694 std::vector<string16> parts; 695 EXPECT_EQ(string16(), JoinString(parts, separator)); 696 697 parts.push_back(ASCIIToUTF16("a")); 698 EXPECT_EQ(ASCIIToUTF16("a"), JoinString(parts, separator)); 699 700 parts.push_back(ASCIIToUTF16("b")); 701 parts.push_back(ASCIIToUTF16("c")); 702 EXPECT_EQ(ASCIIToUTF16("a, b, c"), JoinString(parts, separator)); 703 704 parts.push_back(ASCIIToUTF16("")); 705 EXPECT_EQ(ASCIIToUTF16("a, b, c, "), JoinString(parts, separator)); 706 parts.push_back(ASCIIToUTF16(" ")); 707 EXPECT_EQ(ASCIIToUTF16("a|b|c|| "), JoinString(parts, ASCIIToUTF16("|"))); 708 } 709 710 TEST(StringUtilTest, StartsWith) { 711 EXPECT_TRUE(StartsWith("javascript:url", "javascript", 712 base::CompareCase::SENSITIVE)); 713 EXPECT_FALSE(StartsWith("JavaScript:url", "javascript", 714 base::CompareCase::SENSITIVE)); 715 EXPECT_TRUE(StartsWith("javascript:url", "javascript", 716 base::CompareCase::INSENSITIVE_ASCII)); 717 EXPECT_TRUE(StartsWith("JavaScript:url", "javascript", 718 base::CompareCase::INSENSITIVE_ASCII)); 719 EXPECT_FALSE(StartsWith("java", "javascript", base::CompareCase::SENSITIVE)); 720 EXPECT_FALSE(StartsWith("java", "javascript", 721 base::CompareCase::INSENSITIVE_ASCII)); 722 EXPECT_FALSE(StartsWith(std::string(), "javascript", 723 base::CompareCase::INSENSITIVE_ASCII)); 724 EXPECT_FALSE(StartsWith(std::string(), "javascript", 725 base::CompareCase::SENSITIVE)); 726 EXPECT_TRUE(StartsWith("java", std::string(), 727 base::CompareCase::INSENSITIVE_ASCII)); 728 EXPECT_TRUE(StartsWith("java", std::string(), base::CompareCase::SENSITIVE)); 729 730 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"), 731 ASCIIToUTF16("javascript"), 732 base::CompareCase::SENSITIVE)); 733 EXPECT_FALSE(StartsWith(ASCIIToUTF16("JavaScript:url"), 734 ASCIIToUTF16("javascript"), 735 base::CompareCase::SENSITIVE)); 736 EXPECT_TRUE(StartsWith(ASCIIToUTF16("javascript:url"), 737 ASCIIToUTF16("javascript"), 738 base::CompareCase::INSENSITIVE_ASCII)); 739 EXPECT_TRUE(StartsWith(ASCIIToUTF16("JavaScript:url"), 740 ASCIIToUTF16("javascript"), 741 base::CompareCase::INSENSITIVE_ASCII)); 742 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"), 743 base::CompareCase::SENSITIVE)); 744 EXPECT_FALSE(StartsWith(ASCIIToUTF16("java"), ASCIIToUTF16("javascript"), 745 base::CompareCase::INSENSITIVE_ASCII)); 746 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), 747 base::CompareCase::INSENSITIVE_ASCII)); 748 EXPECT_FALSE(StartsWith(string16(), ASCIIToUTF16("javascript"), 749 base::CompareCase::SENSITIVE)); 750 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), 751 base::CompareCase::INSENSITIVE_ASCII)); 752 EXPECT_TRUE(StartsWith(ASCIIToUTF16("java"), string16(), 753 base::CompareCase::SENSITIVE)); 754 } 755 756 TEST(StringUtilTest, EndsWith) { 757 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"), 758 base::CompareCase::SENSITIVE)); 759 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"), 760 base::CompareCase::SENSITIVE)); 761 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), ASCIIToUTF16(".plugin"), 762 base::CompareCase::INSENSITIVE_ASCII)); 763 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.Plugin"), ASCIIToUTF16(".plugin"), 764 base::CompareCase::INSENSITIVE_ASCII)); 765 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), 766 base::CompareCase::SENSITIVE)); 767 EXPECT_FALSE(EndsWith(ASCIIToUTF16(".plug"), ASCIIToUTF16(".plugin"), 768 base::CompareCase::INSENSITIVE_ASCII)); 769 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"), 770 base::CompareCase::SENSITIVE)); 771 EXPECT_FALSE(EndsWith(ASCIIToUTF16("Foo.plugin Bar"), ASCIIToUTF16(".plugin"), 772 base::CompareCase::INSENSITIVE_ASCII)); 773 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), 774 base::CompareCase::INSENSITIVE_ASCII)); 775 EXPECT_FALSE(EndsWith(string16(), ASCIIToUTF16(".plugin"), 776 base::CompareCase::SENSITIVE)); 777 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), 778 base::CompareCase::INSENSITIVE_ASCII)); 779 EXPECT_TRUE(EndsWith(ASCIIToUTF16("Foo.plugin"), string16(), 780 base::CompareCase::SENSITIVE)); 781 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), 782 base::CompareCase::INSENSITIVE_ASCII)); 783 EXPECT_TRUE(EndsWith(ASCIIToUTF16(".plugin"), ASCIIToUTF16(".plugin"), 784 base::CompareCase::SENSITIVE)); 785 EXPECT_TRUE( 786 EndsWith(string16(), string16(), base::CompareCase::INSENSITIVE_ASCII)); 787 EXPECT_TRUE(EndsWith(string16(), string16(), base::CompareCase::SENSITIVE)); 788 } 789 790 TEST(StringUtilTest, GetStringFWithOffsets) { 791 std::vector<string16> subst; 792 subst.push_back(ASCIIToUTF16("1")); 793 subst.push_back(ASCIIToUTF16("2")); 794 std::vector<size_t> offsets; 795 796 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $1. Your number is $2."), 797 subst, 798 &offsets); 799 EXPECT_EQ(2U, offsets.size()); 800 EXPECT_EQ(7U, offsets[0]); 801 EXPECT_EQ(25U, offsets[1]); 802 offsets.clear(); 803 804 ReplaceStringPlaceholders(ASCIIToUTF16("Hello, $2. Your number is $1."), 805 subst, 806 &offsets); 807 EXPECT_EQ(2U, offsets.size()); 808 EXPECT_EQ(25U, offsets[0]); 809 EXPECT_EQ(7U, offsets[1]); 810 offsets.clear(); 811 } 812 813 TEST(StringUtilTest, ReplaceStringPlaceholdersTooFew) { 814 // Test whether replacestringplaceholders works as expected when there 815 // are fewer inputs than outputs. 816 std::vector<string16> subst; 817 subst.push_back(ASCIIToUTF16("9a")); 818 subst.push_back(ASCIIToUTF16("8b")); 819 subst.push_back(ASCIIToUTF16("7c")); 820 821 string16 formatted = 822 ReplaceStringPlaceholders( 823 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$1g,$2h,$3i"), subst, nullptr); 824 825 EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,d,e,f,9ag,8bh,7ci"), formatted); 826 } 827 828 TEST(StringUtilTest, ReplaceStringPlaceholders) { 829 std::vector<string16> subst; 830 subst.push_back(ASCIIToUTF16("9a")); 831 subst.push_back(ASCIIToUTF16("8b")); 832 subst.push_back(ASCIIToUTF16("7c")); 833 subst.push_back(ASCIIToUTF16("6d")); 834 subst.push_back(ASCIIToUTF16("5e")); 835 subst.push_back(ASCIIToUTF16("4f")); 836 subst.push_back(ASCIIToUTF16("3g")); 837 subst.push_back(ASCIIToUTF16("2h")); 838 subst.push_back(ASCIIToUTF16("1i")); 839 840 string16 formatted = 841 ReplaceStringPlaceholders( 842 ASCIIToUTF16("$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i"), subst, nullptr); 843 844 EXPECT_EQ(ASCIIToUTF16("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii"), formatted); 845 } 846 847 TEST(StringUtilTest, ReplaceStringPlaceholdersOneDigit) { 848 std::vector<string16> subst; 849 subst.push_back(ASCIIToUTF16("1a")); 850 string16 formatted = 851 ReplaceStringPlaceholders(ASCIIToUTF16(" $16 "), subst, nullptr); 852 EXPECT_EQ(ASCIIToUTF16(" 1a6 "), formatted); 853 } 854 855 TEST(StringUtilTest, ReplaceStringPlaceholdersInvalidPlaceholder) { 856 std::vector<string16> subst; 857 subst.push_back(ASCIIToUTF16("1a")); 858 string16 formatted = 859 ReplaceStringPlaceholders(ASCIIToUTF16("+$-+$A+$1+"), subst, nullptr); 860 EXPECT_EQ(ASCIIToUTF16("+++1a+"), formatted); 861 } 862 863 TEST(StringUtilTest, StdStringReplaceStringPlaceholders) { 864 std::vector<std::string> subst; 865 subst.push_back("9a"); 866 subst.push_back("8b"); 867 subst.push_back("7c"); 868 subst.push_back("6d"); 869 subst.push_back("5e"); 870 subst.push_back("4f"); 871 subst.push_back("3g"); 872 subst.push_back("2h"); 873 subst.push_back("1i"); 874 875 std::string formatted = 876 ReplaceStringPlaceholders( 877 "$1a,$2b,$3c,$4d,$5e,$6f,$7g,$8h,$9i", subst, nullptr); 878 879 EXPECT_EQ("9aa,8bb,7cc,6dd,5ee,4ff,3gg,2hh,1ii", formatted); 880 } 881 882 TEST(StringUtilTest, ReplaceStringPlaceholdersConsecutiveDollarSigns) { 883 std::vector<std::string> subst; 884 subst.push_back("a"); 885 subst.push_back("b"); 886 subst.push_back("c"); 887 EXPECT_EQ(ReplaceStringPlaceholders("$$1 $$$2 $$$$3", subst, nullptr), 888 "$1 $$2 $$$3"); 889 } 890 891 TEST(StringUtilTest, LcpyTest) { 892 // Test the normal case where we fit in our buffer. 893 { 894 char dst[10]; 895 wchar_t wdst[10]; 896 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst))); 897 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); 898 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 899 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); 900 } 901 902 // Test dst_size == 0, nothing should be written to |dst| and we should 903 // have the equivalent of strlen(src). 904 { 905 char dst[2] = {1, 2}; 906 wchar_t wdst[2] = {1, 2}; 907 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", 0)); 908 EXPECT_EQ(1, dst[0]); 909 EXPECT_EQ(2, dst[1]); 910 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", 0)); 911 EXPECT_EQ(static_cast<wchar_t>(1), wdst[0]); 912 EXPECT_EQ(static_cast<wchar_t>(2), wdst[1]); 913 } 914 915 // Test the case were we _just_ competely fit including the null. 916 { 917 char dst[8]; 918 wchar_t wdst[8]; 919 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst))); 920 EXPECT_EQ(0, memcmp(dst, "abcdefg", 8)); 921 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 922 EXPECT_EQ(0, memcmp(wdst, L"abcdefg", sizeof(wchar_t) * 8)); 923 } 924 925 // Test the case were we we are one smaller, so we can't fit the null. 926 { 927 char dst[7]; 928 wchar_t wdst[7]; 929 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst))); 930 EXPECT_EQ(0, memcmp(dst, "abcdef", 7)); 931 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 932 EXPECT_EQ(0, memcmp(wdst, L"abcdef", sizeof(wchar_t) * 7)); 933 } 934 935 // Test the case were we are just too small. 936 { 937 char dst[3]; 938 wchar_t wdst[3]; 939 EXPECT_EQ(7U, strlcpy(dst, "abcdefg", arraysize(dst))); 940 EXPECT_EQ(0, memcmp(dst, "ab", 3)); 941 EXPECT_EQ(7U, wcslcpy(wdst, L"abcdefg", arraysize(wdst))); 942 EXPECT_EQ(0, memcmp(wdst, L"ab", sizeof(wchar_t) * 3)); 943 } 944 } 945 946 TEST(StringUtilTest, WprintfFormatPortabilityTest) { 947 static const struct { 948 const wchar_t* input; 949 bool portable; 950 } cases[] = { 951 { L"%ls", true }, 952 { L"%s", false }, 953 { L"%S", false }, 954 { L"%lS", false }, 955 { L"Hello, %s", false }, 956 { L"%lc", true }, 957 { L"%c", false }, 958 { L"%C", false }, 959 { L"%lC", false }, 960 { L"%ls %s", false }, 961 { L"%s %ls", false }, 962 { L"%s %ls %s", false }, 963 { L"%f", true }, 964 { L"%f %F", false }, 965 { L"%d %D", false }, 966 { L"%o %O", false }, 967 { L"%u %U", false }, 968 { L"%f %d %o %u", true }, 969 { L"%-8d (%02.1f%)", true }, 970 { L"% 10s", false }, 971 { L"% 10ls", true } 972 }; 973 for (size_t i = 0; i < arraysize(cases); ++i) 974 EXPECT_EQ(cases[i].portable, IsWprintfFormatPortable(cases[i].input)); 975 } 976 977 TEST(StringUtilTest, RemoveChars) { 978 const char kRemoveChars[] = "-/+*"; 979 std::string input = "A-+bc/d!*"; 980 EXPECT_TRUE(RemoveChars(input, kRemoveChars, &input)); 981 EXPECT_EQ("Abcd!", input); 982 983 // No characters match kRemoveChars. 984 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input)); 985 EXPECT_EQ("Abcd!", input); 986 987 // Empty string. 988 input.clear(); 989 EXPECT_FALSE(RemoveChars(input, kRemoveChars, &input)); 990 EXPECT_EQ(std::string(), input); 991 } 992 993 TEST(StringUtilTest, ReplaceChars) { 994 struct TestData { 995 const char* input; 996 const char* replace_chars; 997 const char* replace_with; 998 const char* output; 999 bool result; 1000 } cases[] = { 1001 { "", "", "", "", false }, 1002 { "test", "", "", "test", false }, 1003 { "test", "", "!", "test", false }, 1004 { "test", "z", "!", "test", false }, 1005 { "test", "e", "!", "t!st", true }, 1006 { "test", "e", "!?", "t!?st", true }, 1007 { "test", "ez", "!", "t!st", true }, 1008 { "test", "zed", "!?", "t!?st", true }, 1009 { "test", "t", "!?", "!?es!?", true }, 1010 { "test", "et", "!>", "!>!>s!>", true }, 1011 { "test", "zest", "!", "!!!!", true }, 1012 { "test", "szt", "!", "!e!!", true }, 1013 { "test", "t", "test", "testestest", true }, 1014 }; 1015 1016 for (size_t i = 0; i < arraysize(cases); ++i) { 1017 std::string output; 1018 bool result = ReplaceChars(cases[i].input, 1019 cases[i].replace_chars, 1020 cases[i].replace_with, 1021 &output); 1022 EXPECT_EQ(cases[i].result, result); 1023 EXPECT_EQ(cases[i].output, output); 1024 } 1025 } 1026 1027 TEST(StringUtilTest, ContainsOnlyChars) { 1028 // Providing an empty list of characters should return false but for the empty 1029 // string. 1030 EXPECT_TRUE(ContainsOnlyChars(std::string(), std::string())); 1031 EXPECT_FALSE(ContainsOnlyChars("Hello", std::string())); 1032 1033 EXPECT_TRUE(ContainsOnlyChars(std::string(), "1234")); 1034 EXPECT_TRUE(ContainsOnlyChars("1", "1234")); 1035 EXPECT_TRUE(ContainsOnlyChars("1", "4321")); 1036 EXPECT_TRUE(ContainsOnlyChars("123", "4321")); 1037 EXPECT_FALSE(ContainsOnlyChars("123a", "4321")); 1038 1039 EXPECT_TRUE(ContainsOnlyChars(std::string(), kWhitespaceASCII)); 1040 EXPECT_TRUE(ContainsOnlyChars(" ", kWhitespaceASCII)); 1041 EXPECT_TRUE(ContainsOnlyChars("\t", kWhitespaceASCII)); 1042 EXPECT_TRUE(ContainsOnlyChars("\t \r \n ", kWhitespaceASCII)); 1043 EXPECT_FALSE(ContainsOnlyChars("a", kWhitespaceASCII)); 1044 EXPECT_FALSE(ContainsOnlyChars("\thello\r \n ", kWhitespaceASCII)); 1045 1046 EXPECT_TRUE(ContainsOnlyChars(string16(), kWhitespaceUTF16)); 1047 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16(" "), kWhitespaceUTF16)); 1048 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t"), kWhitespaceUTF16)); 1049 EXPECT_TRUE(ContainsOnlyChars(ASCIIToUTF16("\t \r \n "), kWhitespaceUTF16)); 1050 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("a"), kWhitespaceUTF16)); 1051 EXPECT_FALSE(ContainsOnlyChars(ASCIIToUTF16("\thello\r \n "), 1052 kWhitespaceUTF16)); 1053 } 1054 1055 TEST(StringUtilTest, CompareCaseInsensitiveASCII) { 1056 EXPECT_EQ(0, CompareCaseInsensitiveASCII("", "")); 1057 EXPECT_EQ(0, CompareCaseInsensitiveASCII("Asdf", "aSDf")); 1058 1059 // Differing lengths. 1060 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("Asdf", "aSDfA")); 1061 EXPECT_EQ(1, CompareCaseInsensitiveASCII("AsdfA", "aSDf")); 1062 1063 // Differing values. 1064 EXPECT_EQ(-1, CompareCaseInsensitiveASCII("AsdfA", "aSDfb")); 1065 EXPECT_EQ(1, CompareCaseInsensitiveASCII("Asdfb", "aSDfA")); 1066 } 1067 1068 TEST(StringUtilTest, EqualsCaseInsensitiveASCII) { 1069 EXPECT_TRUE(EqualsCaseInsensitiveASCII("", "")); 1070 EXPECT_TRUE(EqualsCaseInsensitiveASCII("Asdf", "aSDF")); 1071 EXPECT_FALSE(EqualsCaseInsensitiveASCII("bsdf", "aSDF")); 1072 EXPECT_FALSE(EqualsCaseInsensitiveASCII("Asdf", "aSDFz")); 1073 } 1074 1075 TEST(StringUtilTest, IsUnicodeWhitespace) { 1076 // NOT unicode white space. 1077 EXPECT_FALSE(IsUnicodeWhitespace(L'\0')); 1078 EXPECT_FALSE(IsUnicodeWhitespace(L'A')); 1079 EXPECT_FALSE(IsUnicodeWhitespace(L'0')); 1080 EXPECT_FALSE(IsUnicodeWhitespace(L'.')); 1081 EXPECT_FALSE(IsUnicodeWhitespace(L';')); 1082 EXPECT_FALSE(IsUnicodeWhitespace(L'\x4100')); 1083 1084 // Actual unicode whitespace. 1085 EXPECT_TRUE(IsUnicodeWhitespace(L' ')); 1086 EXPECT_TRUE(IsUnicodeWhitespace(L'\xa0')); 1087 EXPECT_TRUE(IsUnicodeWhitespace(L'\x3000')); 1088 EXPECT_TRUE(IsUnicodeWhitespace(L'\t')); 1089 EXPECT_TRUE(IsUnicodeWhitespace(L'\r')); 1090 EXPECT_TRUE(IsUnicodeWhitespace(L'\v')); 1091 EXPECT_TRUE(IsUnicodeWhitespace(L'\f')); 1092 EXPECT_TRUE(IsUnicodeWhitespace(L'\n')); 1093 } 1094 1095 class WriteIntoTest : public testing::Test { 1096 protected: 1097 static void WritesCorrectly(size_t num_chars) { 1098 std::string buffer; 1099 char kOriginal[] = "supercali"; 1100 strncpy(WriteInto(&buffer, num_chars + 1), kOriginal, num_chars); 1101 // Using std::string(buffer.c_str()) instead of |buffer| truncates the 1102 // string at the first \0. 1103 EXPECT_EQ(std::string(kOriginal, 1104 std::min(num_chars, arraysize(kOriginal) - 1)), 1105 std::string(buffer.c_str())); 1106 EXPECT_EQ(num_chars, buffer.size()); 1107 } 1108 }; 1109 1110 TEST_F(WriteIntoTest, WriteInto) { 1111 // Validate that WriteInto reserves enough space and 1112 // sizes a string correctly. 1113 WritesCorrectly(1); 1114 WritesCorrectly(2); 1115 WritesCorrectly(5000); 1116 1117 // Validate that WriteInto doesn't modify other strings 1118 // when using a Copy-on-Write implementation. 1119 const char kLive[] = "live"; 1120 const char kDead[] = "dead"; 1121 const std::string live = kLive; 1122 std::string dead = live; 1123 strncpy(WriteInto(&dead, 5), kDead, 4); 1124 EXPECT_EQ(kDead, dead); 1125 EXPECT_EQ(4u, dead.size()); 1126 EXPECT_EQ(kLive, live); 1127 EXPECT_EQ(4u, live.size()); 1128 } 1129 1130 } // namespace base 1131