1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "ppapi/tests/test_char_set.h" 6 7 #include "ppapi/c/dev/ppb_char_set_dev.h" 8 #include "ppapi/c/trusted/ppb_char_set_trusted.h" 9 #include "ppapi/cpp/dev/memory_dev.h" 10 #include "ppapi/cpp/module.h" 11 #include "ppapi/tests/testing_instance.h" 12 13 REGISTER_TEST_CASE(CharSet); 14 15 TestCharSet::TestCharSet(TestingInstance* instance) 16 : TestCase(instance), 17 char_set_interface_(NULL) { 18 } 19 20 bool TestCharSet::Init() { 21 char_set_interface_ = static_cast<const PPB_CharSet_Dev*>( 22 pp::Module::Get()->GetBrowserInterface(PPB_CHAR_SET_DEV_INTERFACE)); 23 char_set_trusted_interface_ = static_cast<const PPB_CharSet_Trusted*>( 24 pp::Module::Get()->GetBrowserInterface(PPB_CHARSET_TRUSTED_INTERFACE)); 25 return char_set_interface_ && char_set_trusted_interface_; 26 } 27 28 void TestCharSet::RunTests(const std::string& filter) { 29 RUN_TEST(UTF16ToCharSetDeprecated, filter); 30 RUN_TEST(UTF16ToCharSet, filter); 31 RUN_TEST(CharSetToUTF16Deprecated, filter); 32 RUN_TEST(CharSetToUTF16, filter); 33 RUN_TEST(GetDefaultCharSet, filter); 34 } 35 36 // TODO(brettw) remove this when the old interface is removed. 37 std::string TestCharSet::TestUTF16ToCharSetDeprecated() { 38 // Empty string. 39 std::vector<uint16_t> utf16; 40 utf16.push_back(0); 41 uint32_t utf8result_len = 0; 42 pp::Memory_Dev memory; 43 char* utf8result = char_set_interface_->UTF16ToCharSet( 44 instance_->pp_instance(), &utf16[0], 0, "latin1", 45 PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len); 46 ASSERT_TRUE(utf8result); 47 ASSERT_TRUE(utf8result[0] == 0); 48 ASSERT_TRUE(utf8result_len == 0); 49 memory.MemFree(utf8result); 50 51 // Try round-tripping some English & Chinese from UTF-8 through UTF-16 52 std::string utf8source("Hello, world. \xe4\xbd\xa0\xe5\xa5\xbd"); 53 utf16 = UTF8ToUTF16(utf8source); 54 utf8result = char_set_interface_->UTF16ToCharSet( 55 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()), 56 "Utf-8", PP_CHARSET_CONVERSIONERROR_FAIL, &utf8result_len); 57 ASSERT_TRUE(utf8source == std::string(utf8result, utf8result_len)); 58 memory.MemFree(utf8result); 59 60 // Test an un-encodable character with various modes. 61 utf16 = UTF8ToUTF16("h\xe4\xbd\xa0i"); 62 63 // Fail mode. 64 utf8result_len = 1234; // Test that this gets 0'ed on failure. 65 utf8result = char_set_interface_->UTF16ToCharSet( 66 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()), 67 "latin1", PP_CHARSET_CONVERSIONERROR_FAIL, &utf8result_len); 68 ASSERT_TRUE(utf8result_len == 0); 69 ASSERT_TRUE(utf8result == NULL); 70 71 // Skip mode. 72 utf8result = char_set_interface_->UTF16ToCharSet( 73 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()), 74 "latin1", PP_CHARSET_CONVERSIONERROR_SKIP, &utf8result_len); 75 ASSERT_TRUE(utf8result_len == 2); 76 ASSERT_TRUE(utf8result[0] == 'h' && utf8result[1] == 'i' && 77 utf8result[2] == 0); 78 memory.MemFree(utf8result); 79 80 // Substitute mode. 81 utf8result = char_set_interface_->UTF16ToCharSet( 82 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()), 83 "latin1", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len); 84 ASSERT_TRUE(utf8result_len == 3); 85 ASSERT_TRUE(utf8result[0] == 'h' && utf8result[1] == '?' && 86 utf8result[2] == 'i' && utf8result[3] == 0); 87 memory.MemFree(utf8result); 88 89 // Try some invalid input encoding. 90 utf16.clear(); 91 utf16.push_back(0xD800); // High surrogate. 92 utf16.push_back('A'); // Not a low surrogate. 93 utf8result = char_set_interface_->UTF16ToCharSet( 94 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()), 95 "latin1", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len); 96 ASSERT_TRUE(utf8result_len == 2); 97 ASSERT_TRUE(utf8result[0] == '?' && utf8result[1] == 'A' && 98 utf8result[2] == 0); 99 memory.MemFree(utf8result); 100 101 // Invalid encoding name. 102 utf8result = char_set_interface_->UTF16ToCharSet( 103 instance_->pp_instance(), &utf16[0], static_cast<uint32_t>(utf16.size()), 104 "poopiepants", PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf8result_len); 105 ASSERT_TRUE(!utf8result); 106 ASSERT_TRUE(utf8result_len == 0); 107 108 PASS(); 109 } 110 111 std::string TestCharSet::TestUTF16ToCharSet() { 112 // Empty string. 113 std::vector<uint16_t> utf16; 114 utf16.push_back(0); 115 std::string output_buffer; 116 output_buffer.resize(1); 117 uint32_t utf8result_len = 0; 118 PP_Bool result = char_set_trusted_interface_->UTF16ToCharSet( 119 &utf16[0], 0, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, 120 &output_buffer[0], &utf8result_len); 121 ASSERT_TRUE(result == PP_TRUE); 122 ASSERT_TRUE(utf8result_len == 0); 123 124 // No output buffer returns length of string. 125 utf16 = UTF8ToUTF16("hello"); 126 utf8result_len = 0; 127 result = char_set_trusted_interface_->UTF16ToCharSet( 128 &utf16[0], static_cast<uint32_t>(utf16.size()), "latin1", 129 PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, NULL, &utf8result_len); 130 ASSERT_TRUE(result == PP_TRUE); 131 ASSERT_TRUE(utf8result_len == 5); 132 133 // Giving too small of a buffer just fills in that many items and gives us 134 // the desired size. 135 output_buffer.resize(100); 136 utf8result_len = 2; 137 output_buffer[utf8result_len] = '$'; // Barrier character. 138 result = char_set_trusted_interface_->UTF16ToCharSet( 139 &utf16[0], static_cast<uint32_t>(utf16.size()), "latin1", 140 PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, 141 &output_buffer[0], &utf8result_len); 142 ASSERT_TRUE(result == PP_TRUE); 143 ASSERT_TRUE(utf8result_len == 5); 144 ASSERT_TRUE(output_buffer[0] == 'h' && output_buffer[1] == 'e' && 145 output_buffer[2] == '$'); 146 147 // Try round-tripping some English & Chinese from UTF-8 through UTF-16 148 std::string utf8source("Hello, world. \xe4\xbd\xa0\xe5\xa5\xbd"); 149 utf16 = UTF8ToUTF16(utf8source); 150 output_buffer.resize(100); 151 utf8result_len = static_cast<uint32_t>(output_buffer.size()); 152 result = char_set_trusted_interface_->UTF16ToCharSet( 153 &utf16[0], static_cast<uint32_t>(utf16.size()), 154 "Utf-8", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL, 155 &output_buffer[0], &utf8result_len); 156 ASSERT_TRUE(result == PP_TRUE); 157 output_buffer.resize(utf8result_len); 158 ASSERT_TRUE(utf8source == output_buffer); 159 160 // Test an un-encodable character with various modes. 161 utf16 = UTF8ToUTF16("h\xe4\xbd\xa0i"); 162 163 // Fail mode, size should get 0'ed on failure. 164 output_buffer.resize(100); 165 utf8result_len = static_cast<uint32_t>(output_buffer.size()); 166 result = char_set_trusted_interface_->UTF16ToCharSet( 167 &utf16[0], static_cast<uint32_t>(utf16.size()), 168 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL, 169 &output_buffer[0], &utf8result_len); 170 ASSERT_TRUE(result == PP_FALSE); 171 ASSERT_TRUE(utf8result_len == 0); 172 173 // Skip mode. 174 output_buffer.resize(100); 175 utf8result_len = static_cast<uint32_t>(output_buffer.size()); 176 result = char_set_trusted_interface_->UTF16ToCharSet( 177 &utf16[0], static_cast<uint32_t>(utf16.size()), 178 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP, 179 &output_buffer[0], &utf8result_len); 180 ASSERT_TRUE(result == PP_TRUE); 181 ASSERT_TRUE(utf8result_len == 2); 182 ASSERT_TRUE(output_buffer[0] == 'h' && output_buffer[1] == 'i'); 183 184 // Substitute mode. 185 output_buffer.resize(100); 186 utf8result_len = static_cast<uint32_t>(output_buffer.size()); 187 result = char_set_trusted_interface_->UTF16ToCharSet( 188 &utf16[0], static_cast<uint32_t>(utf16.size()), 189 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, 190 &output_buffer[0], &utf8result_len); 191 ASSERT_TRUE(result == PP_TRUE); 192 ASSERT_TRUE(utf8result_len == 3); 193 output_buffer.resize(utf8result_len); 194 ASSERT_TRUE(output_buffer == "h?i"); 195 196 // Try some invalid input encoding. 197 output_buffer.resize(100); 198 utf8result_len = static_cast<uint32_t>(output_buffer.size()); 199 utf16.clear(); 200 utf16.push_back(0xD800); // High surrogate. 201 utf16.push_back('A'); // Not a low surrogate. 202 result = char_set_trusted_interface_->UTF16ToCharSet( 203 &utf16[0], static_cast<uint32_t>(utf16.size()), 204 "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, 205 &output_buffer[0], &utf8result_len); 206 ASSERT_TRUE(result == PP_TRUE); 207 ASSERT_TRUE(utf8result_len == 2); 208 ASSERT_TRUE(output_buffer[0] == '?' && output_buffer[1] == 'A'); 209 210 // Invalid encoding name. 211 output_buffer.resize(100); 212 utf8result_len = static_cast<uint32_t>(output_buffer.size()); 213 result = char_set_trusted_interface_->UTF16ToCharSet( 214 &utf16[0], static_cast<uint32_t>(utf16.size()), 215 "poopiepants", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, 216 &output_buffer[0], &utf8result_len); 217 ASSERT_TRUE(result == PP_FALSE); 218 ASSERT_TRUE(utf8result_len == 0); 219 220 PASS(); 221 } 222 223 // TODO(brettw) remove this when the old interface is removed. 224 std::string TestCharSet::TestCharSetToUTF16Deprecated() { 225 pp::Memory_Dev memory; 226 227 // Empty string. 228 uint32_t utf16result_len; 229 uint16_t* utf16result = char_set_interface_->CharSetToUTF16( 230 instance_->pp_instance(), "", 0, "latin1", 231 PP_CHARSET_CONVERSIONERROR_FAIL, &utf16result_len); 232 ASSERT_TRUE(utf16result); 233 ASSERT_TRUE(utf16result_len == 0); 234 ASSERT_TRUE(utf16result[0] == 0); 235 memory.MemFree(utf16result); 236 237 // Basic Latin1. 238 char latin1[] = "H\xef"; 239 utf16result = char_set_interface_->CharSetToUTF16( 240 instance_->pp_instance(), latin1, 2, "latin1", 241 PP_CHARSET_CONVERSIONERROR_FAIL, &utf16result_len); 242 ASSERT_TRUE(utf16result); 243 ASSERT_TRUE(utf16result_len == 2); 244 ASSERT_TRUE(utf16result[0] == 'H' && utf16result[1] == 0xef && 245 utf16result[2] == 0); 246 memory.MemFree(utf16result); 247 248 // Invalid input encoding with FAIL. 249 char badutf8[] = "A\xe4Z"; 250 utf16result = char_set_interface_->CharSetToUTF16( 251 instance_->pp_instance(), badutf8, 3, "utf8", 252 PP_CHARSET_CONVERSIONERROR_FAIL, &utf16result_len); 253 ASSERT_TRUE(!utf16result); 254 ASSERT_TRUE(utf16result_len == 0); 255 memory.MemFree(utf16result); 256 257 // Invalid input with SKIP. 258 utf16result = char_set_interface_->CharSetToUTF16( 259 instance_->pp_instance(), badutf8, 3, "utf8", 260 PP_CHARSET_CONVERSIONERROR_SKIP, &utf16result_len); 261 ASSERT_TRUE(utf16result); 262 ASSERT_TRUE(utf16result_len == 2); 263 ASSERT_TRUE(utf16result[0] == 'A' && utf16result[1] == 'Z' && 264 utf16result[2] == 0); 265 memory.MemFree(utf16result); 266 267 // Invalid input with SUBSTITUTE. 268 utf16result = char_set_interface_->CharSetToUTF16( 269 instance_->pp_instance(), badutf8, 3, "utf8", 270 PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf16result_len); 271 ASSERT_TRUE(utf16result); 272 ASSERT_TRUE(utf16result_len == 3); 273 ASSERT_TRUE(utf16result[0] == 'A' && utf16result[1] == 0xFFFD && 274 utf16result[2] == 'Z' && utf16result[3] == 0); 275 memory.MemFree(utf16result); 276 277 // Invalid encoding name. 278 utf16result = char_set_interface_->CharSetToUTF16( 279 instance_->pp_instance(), badutf8, 3, "poopiepants", 280 PP_CHARSET_CONVERSIONERROR_SUBSTITUTE, &utf16result_len); 281 ASSERT_TRUE(!utf16result); 282 ASSERT_TRUE(utf16result_len == 0); 283 memory.MemFree(utf16result); 284 285 PASS(); 286 } 287 288 std::string TestCharSet::TestCharSetToUTF16() { 289 std::vector<uint16_t> output_buffer; 290 output_buffer.resize(100); 291 292 // Empty string. 293 output_buffer.resize(100); 294 uint32_t utf16result_len = static_cast<uint32_t>(output_buffer.size()); 295 PP_Bool result = char_set_trusted_interface_->CharSetToUTF16( 296 "", 0, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL, 297 &output_buffer[0], &utf16result_len); 298 ASSERT_TRUE(result); 299 ASSERT_TRUE(utf16result_len == 0); 300 ASSERT_TRUE(output_buffer[0] == 0); 301 302 // Basic Latin1. 303 output_buffer.resize(100); 304 utf16result_len = static_cast<uint32_t>(output_buffer.size()); 305 char latin1[] = "H\xef"; 306 result = char_set_trusted_interface_->CharSetToUTF16( 307 latin1, 2, "latin1", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL, 308 &output_buffer[0], &utf16result_len); 309 ASSERT_TRUE(result); 310 ASSERT_TRUE(utf16result_len == 2); 311 ASSERT_TRUE(output_buffer[0] == 'H' && output_buffer[1] == 0xef); 312 313 // Invalid input encoding with FAIL. 314 output_buffer.resize(100); 315 utf16result_len = static_cast<uint32_t>(output_buffer.size()); 316 char badutf8[] = "A\xe4Z"; 317 result = char_set_trusted_interface_->CharSetToUTF16( 318 badutf8, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL, 319 &output_buffer[0], &utf16result_len); 320 ASSERT_TRUE(!result); 321 ASSERT_TRUE(utf16result_len == 0); 322 323 // Invalid input with SKIP. 324 output_buffer.resize(100); 325 utf16result_len = static_cast<uint32_t>(output_buffer.size()); 326 result = char_set_trusted_interface_->CharSetToUTF16( 327 badutf8, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP, 328 &output_buffer[0], &utf16result_len); 329 ASSERT_TRUE(result); 330 ASSERT_TRUE(utf16result_len == 2); 331 ASSERT_TRUE(output_buffer[0] == 'A' && output_buffer[1] == 'Z'); 332 333 // Invalid input with SUBSTITUTE. 334 output_buffer.resize(100); 335 utf16result_len = static_cast<uint32_t>(output_buffer.size()); 336 result = char_set_trusted_interface_->CharSetToUTF16( 337 badutf8, 3, "utf8", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, 338 &output_buffer[0], &utf16result_len); 339 ASSERT_TRUE(result); 340 ASSERT_TRUE(utf16result_len == 3); 341 ASSERT_TRUE(output_buffer[0] == 'A' && output_buffer[1] == 0xFFFD && 342 output_buffer[2] == 'Z'); 343 344 // Invalid encoding name. 345 output_buffer.resize(100); 346 utf16result_len = static_cast<uint32_t>(output_buffer.size()); 347 result = char_set_trusted_interface_->CharSetToUTF16( 348 badutf8, 3, "poopiepants", PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE, 349 &output_buffer[0], &utf16result_len); 350 ASSERT_TRUE(!result); 351 ASSERT_TRUE(utf16result_len == 0); 352 353 PASS(); 354 } 355 356 std::string TestCharSet::TestGetDefaultCharSet() { 357 // Test invalid instance. 358 pp::Var result(pp::PASS_REF, char_set_interface_->GetDefaultCharSet(0)); 359 ASSERT_TRUE(result.is_undefined()); 360 361 // Just make sure the default char set is a nonempty string. 362 result = pp::Var(pp::PASS_REF, 363 char_set_interface_->GetDefaultCharSet(instance_->pp_instance())); 364 ASSERT_TRUE(result.is_string()); 365 ASSERT_FALSE(result.AsString().empty()); 366 367 PASS(); 368 } 369 370 std::vector<uint16_t> TestCharSet::UTF8ToUTF16(const std::string& utf8) { 371 uint32_t result_len = 0; 372 uint16_t* result = char_set_interface_->CharSetToUTF16( 373 instance_->pp_instance(), utf8.c_str(), 374 static_cast<uint32_t>(utf8.size()), 375 "utf-8", PP_CHARSET_CONVERSIONERROR_FAIL, &result_len); 376 377 std::vector<uint16_t> result_vector; 378 if (!result) 379 return result_vector; 380 381 result_vector.assign(result, &result[result_len]); 382 pp::Memory_Dev memory; 383 memory.MemFree(result); 384 return result_vector; 385 } 386