1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "ppapi/tests/test_char_set.h" 6 7 #include "ppapi/c/dev/ppb_char_set_dev.h" 8 #include "ppapi/c/trusted/ppb_char_set_trusted.h" 9 #include "ppapi/cpp/dev/memory_dev.h" 10 #include "ppapi/cpp/module.h" 11 #include "ppapi/tests/testing_instance.h" 12 13 REGISTER_TEST_CASE(CharSet); 14 15 TestCharSet::TestCharSet(TestingInstance* instance) 16 : TestCase(instance), 17 char_set_interface_(NULL) { 18 } 19 20 bool TestCharSet::Init() { 21 char_set_interface_ = static_cast<const PPB_CharSet_Dev*>( 22 pp::Module::Get()->GetBrowserInterface(PPB_CHAR_SET_DEV_INTERFACE)); 23 char_set_trusted_interface_ = static_cast<const PPB_CharSet_Trusted*>( 24 pp::Module::Get()->GetBrowserInterface(PPB_CHARSET_TRUSTED_INTERFACE)); 25 return char_set_interface_ && char_set_trusted_interface_; 26 } 27 28 void TestCharSet::RunTests(const std::string& filter) { 29 RUN_TEST(UTF16ToCharSetDeprecated, filter); 30 RUN_TEST(UTF16ToCharSet, filter); 31 RUN_TEST(CharSetToUTF16Deprecated, filter); 32 RUN_TEST(CharSetToUTF16, filter); 33 RUN_TEST(GetDefaultCharSet, filter); 34 } 35 36 // TODO(brettw) remove this when the old interface is removed. 37 std::string TestCharSet::TestUTF16ToCharSetDeprecated() { 38 // Empty string. 39 std::vector<uint16_t> utf16; 40 utf16.push_back(0); 41 uint32_t utf8result_len = 0; 42 pp::Memory_Dev memory; 43 char* utf8result = char_set_interface_->UTF16ToCharSet( 44 instance_->pp_instance(), &utf16[0], 0, "latin1", 45 PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len); 46 ASSERT_TRUE(utf8result); 47 ASSERT_TRUE(utf8result[0] == 0); 48 ASSERT_TRUE(utf8result_len == 0); 49 memory.MemFree(utf8result); 50 51 // Try round-tripping some English & Chinese from UTF-8 through UTF-16 52 std::string utf8source("Hello, world. \xe4\xbd\xa0\xe5\xa5\xbd"); 53 utf16 = UTF8ToUTF16(utf8source); 54 utf8result = char_set_interface_->UTF16ToCharSet( 55 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()), 56 "Utf-8", PP_CHARSET_CONVERSIONERROR_FAIL, &utf8result_len); 57 ASSERT_TRUE(utf8source == std::string(utf8result, utf8result_len)); 58 memory.MemFree(utf8result); 59 60 // Test an un-encodable character with various modes. 61 utf16 = UTF8ToUTF16("h\xe4\xbd\xa0i"); 62 63 // Fail mode. 64 utf8result_len = 1234; // Test that this gets 0'ed on failure. 65 utf8result = char_set_interface_->UTF16ToCharSet( 66 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()), 67 "latin1", PP_CHARSET_CONVERSIONERROR_FAIL, &utf8result_len); 68 ASSERT_TRUE(utf8result_len == 0); 69 ASSERT_TRUE(utf8result == NULL); 70 71 // Skip mode. 72 utf8result = char_set_interface_->UTF16ToCharSet( 73 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()), 74 "latin1", PP_CHARSET_CONVERSIONERROR_SKIP, &utf8result_len); 75 ASSERT_TRUE(utf8result_len == 2); 76 ASSERT_TRUE(utf8result[0] == 'h' && utf8result[1] == 'i' && 77 utf8result[2] == 0); 78 memory.MemFree(utf8result); 79 80 // Substitute mode. 81 utf8result = char_set_interface_->UTF16ToCharSet( 82 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()), 83 "latin1", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len); 84 ASSERT_TRUE(utf8result_len == 3); 85 ASSERT_TRUE(utf8result[0] == 'h' && utf8result[1] == '?' && 86 utf8result[2] == 'i' && utf8result[3] == 0); 87 memory.MemFree(utf8result); 88 89 // Try some invalid input encoding. 90 utf16.clear(); 91 utf16.push_back(0xD800); // High surrogate. 92 utf16.push_back('A'); // Not a low surrogate. 93 utf8result = char_set_interface_->UTF16ToCharSet( 94 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()), 95 "latin1", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len); 96 ASSERT_TRUE(utf8result_len == 2); 97 ASSERT_TRUE(utf8result[0] == '?' && utf8result[1] == 'A' && 98 utf8result[2] == 0); 99 memory.MemFree(utf8result); 100 101 // Invalid encoding name. 102 utf8result = char_set_interface_->UTF16ToCharSet( 103 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()), 104 "poopiepants", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len); 105 ASSERT_TRUE(!utf8result); 106 ASSERT_TRUE(utf8result_len == 0); 107 108 PASS(); 109 } 110 111 std::string TestCharSet::TestUTF16ToCharSet() { 112 // Empty string. 113 std::vector<uint16_t> utf16; 114 utf16.push_back(0); 115 std::string output_buffer; 116 uint32_t utf8result_len = static_cast<uint32_t>(output_buffer.size()); 117 PP_Bool result = char_set_trusted_interface_->UTF16ToCharSet( 118 &utf16[0], 0, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, 119 &output_buffer[0], &utf8result_len); 120 ASSERT_TRUE(result == PP_TRUE); 121 ASSERT_TRUE(utf8result_len == 0); 122 123 // No output buffer returns length of string. 124 utf16 = UTF8ToUTF16("hello"); 125 utf8result_len = 0; 126 result = char_set_trusted_interface_->UTF16ToCharSet( 127 &utf16[0], static_cast<uint32_t>(utf16.size()), "latin1", 128 PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, NULL, &utf8result_len); 129 ASSERT_TRUE(result == PP_TRUE); 130 ASSERT_TRUE(utf8result_len == 5); 131 132 // Giving too small of a buffer just fills in that many items and gives us 133 // the desired size. 134 output_buffer.resize(100); 135 utf8result_len = 2; 136 output_buffer[utf8result_len] = '$'; // Barrier character. 137 result = char_set_trusted_interface_->UTF16ToCharSet( 138 &utf16[0], static_cast<uint32_t>(utf16.size()), "latin1", 139 PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, 140 &output_buffer[0], &utf8result_len); 141 ASSERT_TRUE(result == PP_TRUE); 142 ASSERT_TRUE(utf8result_len == 5); 143 ASSERT_TRUE(output_buffer[0] == 'h' && output_buffer[1] == 'e' && 144 output_buffer[2] == '$'); 145 146 // Try round-tripping some English & Chinese from UTF-8 through UTF-16 147 std::string utf8source("Hello, world. \xe4\xbd\xa0\xe5\xa5\xbd"); 148 utf16 = UTF8ToUTF16(utf8source); 149 output_buffer.resize(100); 150 utf8result_len = static_cast<uint32_t>(output_buffer.size()); 151 result = char_set_trusted_interface_->UTF16ToCharSet( 152 &utf16[0], static_cast<uint32_t>(utf16.size()), 153 "Utf-8", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL, 154 &output_buffer[0], &utf8result_len); 155 ASSERT_TRUE(result == PP_TRUE); 156 output_buffer.resize(utf8result_len); 157 ASSERT_TRUE(utf8source == output_buffer); 158 159 // Test an un-encodable character with various modes. 160 utf16 = UTF8ToUTF16("h\xe4\xbd\xa0i"); 161 162 // Fail mode, size should get 0'ed on failure. 163 output_buffer.resize(100); 164 utf8result_len = static_cast<uint32_t>(output_buffer.size()); 165 result = char_set_trusted_interface_->UTF16ToCharSet( 166 &utf16[0], static_cast<uint32_t>(utf16.size()), 167 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL, 168 &output_buffer[0], &utf8result_len); 169 ASSERT_TRUE(result == PP_FALSE); 170 ASSERT_TRUE(utf8result_len == 0); 171 172 // Skip mode. 173 output_buffer.resize(100); 174 utf8result_len = static_cast<uint32_t>(output_buffer.size()); 175 result = char_set_trusted_interface_->UTF16ToCharSet( 176 &utf16[0], static_cast<uint32_t>(utf16.size()), 177 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP, 178 &output_buffer[0], &utf8result_len); 179 ASSERT_TRUE(result == PP_TRUE); 180 ASSERT_TRUE(utf8result_len == 2); 181 ASSERT_TRUE(output_buffer[0] == 'h' && output_buffer[1] == 'i'); 182 183 // Substitute mode. 184 output_buffer.resize(100); 185 utf8result_len = static_cast<uint32_t>(output_buffer.size()); 186 result = char_set_trusted_interface_->UTF16ToCharSet( 187 &utf16[0], static_cast<uint32_t>(utf16.size()), 188 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, 189 &output_buffer[0], &utf8result_len); 190 ASSERT_TRUE(utf8result_len == 3); 191 output_buffer.resize(utf8result_len); 192 ASSERT_TRUE(output_buffer == "h?i"); 193 194 // Try some invalid input encoding. 195 output_buffer.resize(100); 196 utf8result_len = static_cast<uint32_t>(output_buffer.size()); 197 utf16.clear(); 198 utf16.push_back(0xD800); // High surrogate. 199 utf16.push_back('A'); // Not a low surrogate. 200 result = char_set_trusted_interface_->UTF16ToCharSet( 201 &utf16[0], static_cast<uint32_t>(utf16.size()), 202 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, 203 &output_buffer[0], &utf8result_len); 204 ASSERT_TRUE(utf8result_len == 2); 205 ASSERT_TRUE(output_buffer[0] == '?' && output_buffer[1] == 'A'); 206 207 // Invalid encoding name. 208 output_buffer.resize(100); 209 utf8result_len = static_cast<uint32_t>(output_buffer.size()); 210 result = char_set_trusted_interface_->UTF16ToCharSet( 211 &utf16[0], static_cast<uint32_t>(utf16.size()), 212 "poopiepants", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, 213 &output_buffer[0], &utf8result_len); 214 ASSERT_TRUE(result == PP_FALSE); 215 ASSERT_TRUE(utf8result_len == 0); 216 217 PASS(); 218 } 219 220 // TODO(brettw) remove this when the old interface is removed. 221 std::string TestCharSet::TestCharSetToUTF16Deprecated() { 222 pp::Memory_Dev memory; 223 224 // Empty string. 225 uint32_t utf16result_len; 226 uint16_t* utf16result = char_set_interface_->CharSetToUTF16( 227 instance_->pp_instance(), "", 0, "latin1", 228 PP_CHARSET_CONVERSIONERROR_FAIL, &utf16result_len); 229 ASSERT_TRUE(utf16result); 230 ASSERT_TRUE(utf16result_len == 0); 231 ASSERT_TRUE(utf16result[0] == 0); 232 memory.MemFree(utf16result); 233 234 // Basic Latin1. 235 char latin1[] = "H\xef"; 236 utf16result = char_set_interface_->CharSetToUTF16( 237 instance_->pp_instance(), latin1, 2, "latin1", 238 PP_CHARSET_CONVERSIONERROR_FAIL, &utf16result_len); 239 ASSERT_TRUE(utf16result); 240 ASSERT_TRUE(utf16result_len == 2); 241 ASSERT_TRUE(utf16result[0] == 'H' && utf16result[1] == 0xef && 242 utf16result[2] == 0); 243 memory.MemFree(utf16result); 244 245 // Invalid input encoding with FAIL. 246 char badutf8[] = "A\xe4Z"; 247 utf16result = char_set_interface_->CharSetToUTF16( 248 instance_->pp_instance(), badutf8, 3, "utf8", 249 PP_CHARSET_CONVERSIONERROR_FAIL, &utf16result_len); 250 ASSERT_TRUE(!utf16result); 251 ASSERT_TRUE(utf16result_len == 0); 252 memory.MemFree(utf16result); 253 254 // Invalid input with SKIP. 255 utf16result = char_set_interface_->CharSetToUTF16( 256 instance_->pp_instance(), badutf8, 3, "utf8", 257 PP_CHARSET_CONVERSIONERROR_SKIP, &utf16result_len); 258 ASSERT_TRUE(utf16result); 259 ASSERT_TRUE(utf16result_len == 2); 260 ASSERT_TRUE(utf16result[0] == 'A' && utf16result[1] == 'Z' && 261 utf16result[2] == 0); 262 memory.MemFree(utf16result); 263 264 // Invalid input with SUBSTITUTE. 265 utf16result = char_set_interface_->CharSetToUTF16( 266 instance_->pp_instance(), badutf8, 3, "utf8", 267 PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf16result_len); 268 ASSERT_TRUE(utf16result); 269 ASSERT_TRUE(utf16result_len == 3); 270 ASSERT_TRUE(utf16result[0] == 'A' && utf16result[1] == 0xFFFD && 271 utf16result[2] == 'Z' && utf16result[3] == 0); 272 memory.MemFree(utf16result); 273 274 // Invalid encoding name. 275 utf16result = char_set_interface_->CharSetToUTF16( 276 instance_->pp_instance(), badutf8, 3, "poopiepants", 277 PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf16result_len); 278 ASSERT_TRUE(!utf16result); 279 ASSERT_TRUE(utf16result_len == 0); 280 memory.MemFree(utf16result); 281 282 PASS(); 283 } 284 285 std::string TestCharSet::TestCharSetToUTF16() { 286 std::vector<uint16_t> output_buffer; 287 output_buffer.resize(100); 288 289 // Empty string. 290 output_buffer.resize(100); 291 uint32_t utf16result_len = static_cast<uint32_t>(output_buffer.size()); 292 PP_Bool result = char_set_trusted_interface_->CharSetToUTF16( 293 "", 0, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL, 294 &output_buffer[0], &utf16result_len); 295 ASSERT_TRUE(result); 296 ASSERT_TRUE(utf16result_len == 0); 297 ASSERT_TRUE(output_buffer[0] == 0); 298 299 // Basic Latin1. 300 output_buffer.resize(100); 301 utf16result_len = static_cast<uint32_t>(output_buffer.size()); 302 char latin1[] = "H\xef"; 303 result = char_set_trusted_interface_->CharSetToUTF16( 304 latin1, 2, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL, 305 &output_buffer[0], &utf16result_len); 306 ASSERT_TRUE(result); 307 ASSERT_TRUE(utf16result_len == 2); 308 ASSERT_TRUE(output_buffer[0] == 'H' && output_buffer[1] == 0xef); 309 310 // Invalid input encoding with FAIL. 311 output_buffer.resize(100); 312 utf16result_len = static_cast<uint32_t>(output_buffer.size()); 313 char badutf8[] = "A\xe4Z"; 314 result = char_set_trusted_interface_->CharSetToUTF16( 315 badutf8, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL, 316 &output_buffer[0], &utf16result_len); 317 ASSERT_TRUE(!result); 318 ASSERT_TRUE(utf16result_len == 0); 319 320 // Invalid input with SKIP. 321 output_buffer.resize(100); 322 utf16result_len = static_cast<uint32_t>(output_buffer.size()); 323 result = char_set_trusted_interface_->CharSetToUTF16( 324 badutf8, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP, 325 &output_buffer[0], &utf16result_len); 326 ASSERT_TRUE(result); 327 ASSERT_TRUE(utf16result_len == 2); 328 ASSERT_TRUE(output_buffer[0] == 'A' && output_buffer[1] == 'Z'); 329 330 // Invalid input with SUBSTITUTE. 331 output_buffer.resize(100); 332 utf16result_len = static_cast<uint32_t>(output_buffer.size()); 333 result = char_set_trusted_interface_->CharSetToUTF16( 334 badutf8, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, 335 &output_buffer[0], &utf16result_len); 336 ASSERT_TRUE(result); 337 ASSERT_TRUE(utf16result_len == 3); 338 ASSERT_TRUE(output_buffer[0] == 'A' && output_buffer[1] == 0xFFFD && 339 output_buffer[2] == 'Z'); 340 341 // Invalid encoding name. 342 output_buffer.resize(100); 343 utf16result_len = static_cast<uint32_t>(output_buffer.size()); 344 result = char_set_trusted_interface_->CharSetToUTF16( 345 badutf8, 3, "poopiepants", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, 346 &output_buffer[0], &utf16result_len); 347 ASSERT_TRUE(!result); 348 ASSERT_TRUE(utf16result_len == 0); 349 350 PASS(); 351 } 352 353 std::string TestCharSet::TestGetDefaultCharSet() { 354 // Test invalid instance. 355 pp::Var result(pp::PASS_REF, char_set_interface_->GetDefaultCharSet(0)); 356 ASSERT_TRUE(result.is_undefined()); 357 358 // Just make sure the default char set is a nonempty string. 359 result = pp::Var(pp::PASS_REF, 360 char_set_interface_->GetDefaultCharSet(instance_->pp_instance())); 361 ASSERT_TRUE(result.is_string()); 362 ASSERT_FALSE(result.AsString().empty()); 363 364 PASS(); 365 } 366 367 std::vector<uint16_t> TestCharSet::UTF8ToUTF16(const std::string& utf8) { 368 uint32_t result_len = 0; 369 uint16_t* result = char_set_interface_->CharSetToUTF16( 370 instance_->pp_instance(), utf8.c_str(), 371 static_cast<uint32_t>(utf8.size()), 372 "utf-8", PP_CHARSET_CONVERSIONERROR_FAIL, &result_len); 373 374 std::vector<uint16_t> result_vector; 375 if (!result) 376 return result_vector; 377 378 result_vector.assign(result, &result[result_len]); 379 pp::Memory_Dev memory; 380 memory.MemFree(result); 381 return result_vector; 382 } 383