1 // Copyright 2015 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <memory> 6 7 #include "core/fxcrt/fx_memory.h" 8 #include "public/fpdf_text.h" 9 #include "public/fpdfview.h" 10 #include "testing/embedder_test.h" 11 #include "testing/gtest/include/gtest/gtest.h" 12 #include "testing/test_support.h" 13 14 namespace { 15 16 bool check_unsigned_shorts(const char* expected, 17 const unsigned short* actual, 18 size_t length) { 19 if (length > strlen(expected) + 1) 20 return false; 21 22 for (size_t i = 0; i < length; ++i) { 23 if (actual[i] != static_cast<unsigned short>(expected[i])) 24 return false; 25 } 26 return true; 27 } 28 29 } // namespace 30 31 class FPDFTextEmbeddertest : public EmbedderTest {}; 32 33 TEST_F(FPDFTextEmbeddertest, Text) { 34 EXPECT_TRUE(OpenDocument("hello_world.pdf")); 35 FPDF_PAGE page = LoadPage(0); 36 EXPECT_TRUE(page); 37 38 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 39 EXPECT_TRUE(textpage); 40 41 static const char expected[] = "Hello, world!\r\nGoodbye, world!"; 42 unsigned short fixed_buffer[128]; 43 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 44 45 // Check that edge cases are handled gracefully 46 EXPECT_EQ(0, FPDFText_GetText(textpage, 0, 128, nullptr)); 47 EXPECT_EQ(0, FPDFText_GetText(textpage, -1, 128, fixed_buffer)); 48 EXPECT_EQ(0, FPDFText_GetText(textpage, 0, -1, fixed_buffer)); 49 EXPECT_EQ(1, FPDFText_GetText(textpage, 0, 0, fixed_buffer)); 50 EXPECT_EQ(0, fixed_buffer[0]); 51 52 // Keep going and check the next case. 53 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 54 EXPECT_EQ(2, FPDFText_GetText(textpage, 0, 1, fixed_buffer)); 55 EXPECT_EQ(expected[0], fixed_buffer[0]); 56 EXPECT_EQ(0, fixed_buffer[1]); 57 58 // Check includes the terminating NUL that is provided. 59 int num_chars = FPDFText_GetText(textpage, 0, 128, fixed_buffer); 60 ASSERT_GE(num_chars, 0); 61 EXPECT_EQ(sizeof(expected), static_cast<size_t>(num_chars)); 62 EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, sizeof(expected))); 63 64 // Count does not include the terminating NUL in the string literal. 65 EXPECT_EQ(sizeof(expected) - 1, 66 static_cast<size_t>(FPDFText_CountChars(textpage))); 67 for (size_t i = 0; i < sizeof(expected) - 1; ++i) { 68 EXPECT_EQ(static_cast<unsigned int>(expected[i]), 69 FPDFText_GetUnicode(textpage, i)) 70 << " at " << i; 71 } 72 73 // Extracting using a buffer that will be completely filled. Small buffer is 74 // 12 elements long, since it will need 2 locations per displayed character in 75 // the expected string, plus 2 more for the terminating character. 76 static const char small_expected[] = "Hello"; 77 unsigned short small_buffer[12]; 78 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 79 EXPECT_EQ(6, FPDFText_GetText(textpage, 0, 5, small_buffer)); 80 EXPECT_TRUE(check_unsigned_shorts(small_expected, small_buffer, 81 sizeof(small_expected))); 82 83 EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0)); 84 EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15)); 85 86 double left = 0.0; 87 double right = 0.0; 88 double bottom = 0.0; 89 double top = 0.0; 90 EXPECT_FALSE(FPDFText_GetCharBox(nullptr, 4, &left, &right, &bottom, &top)); 91 EXPECT_DOUBLE_EQ(0.0, left); 92 EXPECT_DOUBLE_EQ(0.0, right); 93 EXPECT_DOUBLE_EQ(0.0, bottom); 94 EXPECT_DOUBLE_EQ(0.0, top); 95 EXPECT_FALSE(FPDFText_GetCharBox(textpage, -1, &left, &right, &bottom, &top)); 96 EXPECT_DOUBLE_EQ(0.0, left); 97 EXPECT_DOUBLE_EQ(0.0, right); 98 EXPECT_DOUBLE_EQ(0.0, bottom); 99 EXPECT_DOUBLE_EQ(0.0, top); 100 EXPECT_FALSE(FPDFText_GetCharBox(textpage, 55, &left, &right, &bottom, &top)); 101 EXPECT_DOUBLE_EQ(0.0, left); 102 EXPECT_DOUBLE_EQ(0.0, right); 103 EXPECT_DOUBLE_EQ(0.0, bottom); 104 EXPECT_DOUBLE_EQ(0.0, top); 105 106 EXPECT_TRUE(FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top)); 107 EXPECT_NEAR(41.071, left, 0.001); 108 EXPECT_NEAR(46.243, right, 0.001); 109 EXPECT_NEAR(49.844, bottom, 0.001); 110 EXPECT_NEAR(55.520, top, 0.001); 111 112 double x = 0.0; 113 double y = 0.0; 114 EXPECT_TRUE(FPDFText_GetCharOrigin(textpage, 4, &x, &y)); 115 EXPECT_NEAR(40.664, x, 0.001); 116 EXPECT_NEAR(50.000, y, 0.001); 117 118 EXPECT_EQ(4, FPDFText_GetCharIndexAtPos(textpage, 42.0, 50.0, 1.0, 1.0)); 119 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 0.0, 0.0, 1.0, 1.0)); 120 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 199.0, 199.0, 1.0, 1.0)); 121 122 // Test out of range indicies. 123 EXPECT_EQ(-1, 124 FPDFText_GetCharIndexAtPos(textpage, 42.0, 10000000.0, 1.0, 1.0)); 125 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, -1.0, 50.0, 1.0, 1.0)); 126 127 // Count does not include the terminating NUL in the string literal. 128 EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, sizeof(expected) - 1)); 129 130 left = 0.0; 131 right = 0.0; 132 bottom = 0.0; 133 top = 0.0; 134 EXPECT_TRUE(FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom)); 135 EXPECT_NEAR(20.847, left, 0.001); 136 EXPECT_NEAR(135.167, right, 0.001); 137 EXPECT_NEAR(96.655, bottom, 0.001); 138 EXPECT_NEAR(116.000, top, 0.001); 139 140 // Test out of range indicies set outputs to (0.0, 0.0, 0.0, 0.0). 141 left = -1.0; 142 right = -1.0; 143 bottom = -1.0; 144 top = -1.0; 145 EXPECT_FALSE(FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom)); 146 EXPECT_EQ(0.0, left); 147 EXPECT_EQ(0.0, right); 148 EXPECT_EQ(0.0, bottom); 149 EXPECT_EQ(0.0, top); 150 151 left = -2.0; 152 right = -2.0; 153 bottom = -2.0; 154 top = -2.0; 155 EXPECT_FALSE(FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom)); 156 EXPECT_EQ(0.0, left); 157 EXPECT_EQ(0.0, right); 158 EXPECT_EQ(0.0, bottom); 159 EXPECT_EQ(0.0, top); 160 161 EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 0, 0)); 162 163 // Extract starting at character 4 as above. 164 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 165 EXPECT_EQ(1, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 166 fixed_buffer, 1)); 167 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 1)); 168 EXPECT_EQ(0xbdbd, fixed_buffer[1]); 169 170 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 171 EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 172 fixed_buffer, 9)); 173 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); 174 EXPECT_EQ(0xbdbd, fixed_buffer[9]); 175 176 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 177 EXPECT_EQ(10, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 178 fixed_buffer, 128)); 179 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); 180 EXPECT_EQ(0u, fixed_buffer[9]); 181 EXPECT_EQ(0xbdbd, fixed_buffer[10]); 182 183 FPDFText_ClosePage(textpage); 184 UnloadPage(page); 185 } 186 187 TEST_F(FPDFTextEmbeddertest, TextSearch) { 188 EXPECT_TRUE(OpenDocument("hello_world.pdf")); 189 FPDF_PAGE page = LoadPage(0); 190 EXPECT_TRUE(page); 191 192 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 193 EXPECT_TRUE(textpage); 194 195 std::unique_ptr<unsigned short, pdfium::FreeDeleter> nope = 196 GetFPDFWideString(L"nope"); 197 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world = 198 GetFPDFWideString(L"world"); 199 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_caps = 200 GetFPDFWideString(L"WORLD"); 201 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_substr = 202 GetFPDFWideString(L"orld"); 203 204 // No occurences of "nope" in test page. 205 FPDF_SCHHANDLE search = FPDFText_FindStart(textpage, nope.get(), 0, 0); 206 EXPECT_TRUE(search); 207 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 208 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 209 210 // Advancing finds nothing. 211 EXPECT_FALSE(FPDFText_FindNext(search)); 212 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 213 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 214 215 // Retreating finds nothing. 216 EXPECT_FALSE(FPDFText_FindPrev(search)); 217 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 218 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 219 FPDFText_FindClose(search); 220 221 // Two occurences of "world" in test page. 222 search = FPDFText_FindStart(textpage, world.get(), 0, 2); 223 EXPECT_TRUE(search); 224 225 // Remains not found until advanced. 226 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 227 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 228 229 // First occurence of "world" in this test page. 230 EXPECT_TRUE(FPDFText_FindNext(search)); 231 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 232 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 233 234 // Last occurence of "world" in this test page. 235 EXPECT_TRUE(FPDFText_FindNext(search)); 236 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); 237 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 238 239 // Found position unchanged when fails to advance. 240 EXPECT_FALSE(FPDFText_FindNext(search)); 241 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); 242 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 243 244 // Back to first occurence. 245 EXPECT_TRUE(FPDFText_FindPrev(search)); 246 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 247 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 248 249 // Found position unchanged when fails to retreat. 250 EXPECT_FALSE(FPDFText_FindPrev(search)); 251 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 252 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 253 FPDFText_FindClose(search); 254 255 // Exact search unaffected by case sensitiity and whole word flags. 256 search = FPDFText_FindStart(textpage, world.get(), 257 FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0); 258 EXPECT_TRUE(search); 259 EXPECT_TRUE(FPDFText_FindNext(search)); 260 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 261 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 262 FPDFText_FindClose(search); 263 264 // Default is case-insensitive, so matching agaist caps works. 265 search = FPDFText_FindStart(textpage, world_caps.get(), 0, 0); 266 EXPECT_TRUE(search); 267 EXPECT_TRUE(FPDFText_FindNext(search)); 268 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 269 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 270 FPDFText_FindClose(search); 271 272 // But can be made case sensitive, in which case this fails. 273 search = FPDFText_FindStart(textpage, world_caps.get(), FPDF_MATCHCASE, 0); 274 EXPECT_FALSE(FPDFText_FindNext(search)); 275 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 276 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 277 FPDFText_FindClose(search); 278 279 // Default is match anywhere within word, so matching substirng works. 280 search = FPDFText_FindStart(textpage, world_substr.get(), 0, 0); 281 EXPECT_TRUE(FPDFText_FindNext(search)); 282 EXPECT_EQ(8, FPDFText_GetSchResultIndex(search)); 283 EXPECT_EQ(4, FPDFText_GetSchCount(search)); 284 FPDFText_FindClose(search); 285 286 // But can be made to mach word boundaries, in which case this fails. 287 search = 288 FPDFText_FindStart(textpage, world_substr.get(), FPDF_MATCHWHOLEWORD, 0); 289 EXPECT_FALSE(FPDFText_FindNext(search)); 290 // TODO(tsepez): investigate strange index/count values in this state. 291 FPDFText_FindClose(search); 292 293 FPDFText_ClosePage(textpage); 294 UnloadPage(page); 295 } 296 297 // Test that the page has characters despite a bad stream length. 298 TEST_F(FPDFTextEmbeddertest, StreamLengthPastEndOfFile) { 299 EXPECT_TRUE(OpenDocument("bug_57.pdf")); 300 FPDF_PAGE page = LoadPage(0); 301 EXPECT_TRUE(page); 302 303 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 304 EXPECT_TRUE(textpage); 305 EXPECT_EQ(13, FPDFText_CountChars(textpage)); 306 307 FPDFText_ClosePage(textpage); 308 UnloadPage(page); 309 } 310 311 TEST_F(FPDFTextEmbeddertest, WebLinks) { 312 EXPECT_TRUE(OpenDocument("weblinks.pdf")); 313 FPDF_PAGE page = LoadPage(0); 314 EXPECT_TRUE(page); 315 316 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 317 EXPECT_TRUE(textpage); 318 319 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage); 320 EXPECT_TRUE(pagelink); 321 322 // Page contains two HTTP-style URLs. 323 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink)); 324 325 // Only a terminating NUL required for bogus links. 326 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 2, nullptr, 0)); 327 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 1400, nullptr, 0)); 328 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, -1, nullptr, 0)); 329 330 // Query the number of characters required for each link (incl NUL). 331 EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0)); 332 EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0)); 333 334 static const char expected_url[] = "http://example.com?q=foo"; 335 static const size_t expected_len = sizeof(expected_url); 336 unsigned short fixed_buffer[128]; 337 338 // Retrieve a link with too small a buffer. Buffer will not be 339 // NUL-terminated, but must not be modified past indicated length, 340 // so pre-fill with a pattern to check write bounds. 341 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 342 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 1)); 343 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 1)); 344 EXPECT_EQ(0xbdbd, fixed_buffer[1]); 345 346 // Check buffer that doesn't have space for a terminating NUL. 347 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 348 EXPECT_EQ(static_cast<int>(expected_len - 1), 349 FPDFLink_GetURL(pagelink, 0, fixed_buffer, expected_len - 1)); 350 EXPECT_TRUE( 351 check_unsigned_shorts(expected_url, fixed_buffer, expected_len - 1)); 352 EXPECT_EQ(0xbdbd, fixed_buffer[expected_len - 1]); 353 354 // Retreive link with exactly-sized buffer. 355 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 356 EXPECT_EQ(static_cast<int>(expected_len), 357 FPDFLink_GetURL(pagelink, 0, fixed_buffer, expected_len)); 358 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, expected_len)); 359 EXPECT_EQ(0u, fixed_buffer[expected_len - 1]); 360 EXPECT_EQ(0xbdbd, fixed_buffer[expected_len]); 361 362 // Retreive link with ample-sized-buffer. 363 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 364 EXPECT_EQ(static_cast<int>(expected_len), 365 FPDFLink_GetURL(pagelink, 0, fixed_buffer, 128)); 366 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, expected_len)); 367 EXPECT_EQ(0u, fixed_buffer[expected_len - 1]); 368 EXPECT_EQ(0xbdbd, fixed_buffer[expected_len]); 369 370 // Each link rendered in a single rect in this test page. 371 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0)); 372 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1)); 373 374 // Each link rendered in a single rect in this test page. 375 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1)); 376 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2)); 377 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000)); 378 379 // Check boundary of valid link index with valid rect index. 380 double left = 0.0; 381 double right = 0.0; 382 double top = 0.0; 383 double bottom = 0.0; 384 EXPECT_TRUE(FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom)); 385 EXPECT_NEAR(50.791, left, 0.001); 386 EXPECT_NEAR(187.963, right, 0.001); 387 EXPECT_NEAR(97.624, bottom, 0.001); 388 EXPECT_NEAR(108.736, top, 0.001); 389 390 // Check that valid link with invalid rect index leaves parameters unchanged. 391 left = -1.0; 392 right = -1.0; 393 top = -1.0; 394 bottom = -1.0; 395 EXPECT_FALSE(FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom)); 396 EXPECT_EQ(-1.0, left); 397 EXPECT_EQ(-1.0, right); 398 EXPECT_EQ(-1.0, bottom); 399 EXPECT_EQ(-1.0, top); 400 401 // Check that invalid link index leaves parameters unchanged. 402 left = -2.0; 403 right = -2.0; 404 top = -2.0; 405 bottom = -2.0; 406 EXPECT_FALSE(FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom)); 407 EXPECT_EQ(-2.0, left); 408 EXPECT_EQ(-2.0, right); 409 EXPECT_EQ(-2.0, bottom); 410 EXPECT_EQ(-2.0, top); 411 412 FPDFLink_CloseWebLinks(pagelink); 413 FPDFText_ClosePage(textpage); 414 UnloadPage(page); 415 } 416 417 TEST_F(FPDFTextEmbeddertest, WebLinksAcrossLines) { 418 EXPECT_TRUE(OpenDocument("weblinks_across_lines.pdf")); 419 FPDF_PAGE page = LoadPage(0); 420 EXPECT_TRUE(page); 421 422 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 423 EXPECT_TRUE(textpage); 424 425 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage); 426 EXPECT_TRUE(pagelink); 427 428 static const char* const kExpectedUrls[] = { 429 "http://example.com", // from "http://www.example.com?\r\nfoo" 430 "http://example.com/", // from "http://www.example.com/\r\nfoo" 431 "http://example.com/test-foo", // from "http://example.com/test-\r\nfoo" 432 "http://abc.com/test-foo", // from "http://abc.com/test-\r\n\r\nfoo" 433 // Next two links from "http://www.example.com/\r\nhttp://www.abc.com/" 434 "http://example.com/", "http://www.abc.com", 435 }; 436 static const int kNumLinks = static_cast<int>(FX_ArraySize(kExpectedUrls)); 437 438 EXPECT_EQ(kNumLinks, FPDFLink_CountWebLinks(pagelink)); 439 440 unsigned short fixed_buffer[128]; 441 for (int i = 0; i < kNumLinks; i++) { 442 const size_t expected_len = strlen(kExpectedUrls[i]) + 1; 443 memset(fixed_buffer, 0, FX_ArraySize(fixed_buffer)); 444 EXPECT_EQ(static_cast<int>(expected_len), 445 FPDFLink_GetURL(pagelink, i, nullptr, 0)); 446 EXPECT_EQ( 447 static_cast<int>(expected_len), 448 FPDFLink_GetURL(pagelink, i, fixed_buffer, FX_ArraySize(fixed_buffer))); 449 EXPECT_TRUE( 450 check_unsigned_shorts(kExpectedUrls[i], fixed_buffer, expected_len)); 451 } 452 453 FPDFLink_CloseWebLinks(pagelink); 454 FPDFText_ClosePage(textpage); 455 UnloadPage(page); 456 } 457 458 TEST_F(FPDFTextEmbeddertest, WebLinksAcrossLinesBug) { 459 EXPECT_TRUE(OpenDocument("bug_650.pdf")); 460 FPDF_PAGE page = LoadPage(0); 461 EXPECT_TRUE(page); 462 463 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 464 EXPECT_TRUE(textpage); 465 466 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage); 467 EXPECT_TRUE(pagelink); 468 469 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink)); 470 unsigned short fixed_buffer[128] = {0}; 471 static const char kExpectedUrl[] = 472 "http://tutorial45.com/learn-autocad-basics-day-166/"; 473 static const int kUrlSize = static_cast<int>(sizeof(kExpectedUrl)); 474 475 EXPECT_EQ(kUrlSize, FPDFLink_GetURL(pagelink, 1, nullptr, 0)); 476 EXPECT_EQ(kUrlSize, FPDFLink_GetURL(pagelink, 1, fixed_buffer, 477 FX_ArraySize(fixed_buffer))); 478 EXPECT_TRUE(check_unsigned_shorts(kExpectedUrl, fixed_buffer, kUrlSize)); 479 480 FPDFLink_CloseWebLinks(pagelink); 481 FPDFText_ClosePage(textpage); 482 UnloadPage(page); 483 } 484 485 TEST_F(FPDFTextEmbeddertest, GetFontSize) { 486 EXPECT_TRUE(OpenDocument("hello_world.pdf")); 487 FPDF_PAGE page = LoadPage(0); 488 EXPECT_TRUE(page); 489 490 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 491 EXPECT_TRUE(textpage); 492 493 const double kExpectedFontsSizes[] = {12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 494 12, 12, 12, 1, 1, 16, 16, 16, 16, 16, 495 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}; 496 497 int count = FPDFText_CountChars(textpage); 498 ASSERT_EQ(FX_ArraySize(kExpectedFontsSizes), static_cast<size_t>(count)); 499 for (int i = 0; i < count; ++i) 500 EXPECT_EQ(kExpectedFontsSizes[i], FPDFText_GetFontSize(textpage, i)) << i; 501 502 FPDFText_ClosePage(textpage); 503 UnloadPage(page); 504 } 505 506 TEST_F(FPDFTextEmbeddertest, ToUnicode) { 507 EXPECT_TRUE(OpenDocument("bug_583.pdf")); 508 FPDF_PAGE page = LoadPage(0); 509 EXPECT_TRUE(page); 510 511 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 512 EXPECT_TRUE(textpage); 513 514 ASSERT_EQ(1, FPDFText_CountChars(textpage)); 515 EXPECT_EQ(static_cast<unsigned int>(0), FPDFText_GetUnicode(textpage, 0)); 516 517 FPDFText_ClosePage(textpage); 518 UnloadPage(page); 519 } 520 521 TEST_F(FPDFTextEmbeddertest, Bug_921) { 522 EXPECT_TRUE(OpenDocument("bug_921.pdf")); 523 FPDF_PAGE page = LoadPage(0); 524 EXPECT_TRUE(page); 525 526 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 527 EXPECT_TRUE(textpage); 528 529 static constexpr unsigned int kData[] = { 530 1095, 1077, 1083, 1086, 1074, 1077, 1095, 1077, 1089, 1082, 1086, 1077, 531 32, 1089, 1090, 1088, 1072, 1076, 1072, 1085, 1080, 1077, 46, 32}; 532 static constexpr int kStartIndex = 238; 533 534 ASSERT_EQ(268, FPDFText_CountChars(textpage)); 535 for (size_t i = 0; i < FX_ArraySize(kData); ++i) 536 EXPECT_EQ(kData[i], FPDFText_GetUnicode(textpage, kStartIndex + i)); 537 538 unsigned short buffer[FX_ArraySize(kData) + 1]; 539 memset(buffer, 0xbd, sizeof(buffer)); 540 int count = 541 FPDFText_GetText(textpage, kStartIndex, FX_ArraySize(kData), buffer); 542 ASSERT_GT(count, 0); 543 ASSERT_EQ(FX_ArraySize(kData) + 1, static_cast<size_t>(count)); 544 for (size_t i = 0; i < FX_ArraySize(kData); ++i) 545 EXPECT_EQ(kData[i], buffer[i]); 546 EXPECT_EQ(0, buffer[FX_ArraySize(kData)]); 547 548 FPDFText_ClosePage(textpage); 549 UnloadPage(page); 550 } 551 552 TEST_F(FPDFTextEmbeddertest, GetTextWithHyphen) { 553 EXPECT_TRUE(OpenDocument("bug_781804.pdf")); 554 FPDF_PAGE page = LoadPage(0); 555 EXPECT_TRUE(page); 556 557 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 558 EXPECT_TRUE(textpage); 559 560 // Check that soft hyphens are not included 561 // Expecting 'Veritaserum', except there is a \uFFFE where the hyphen was in 562 // the original text. This is a weird thing that Adobe does, which we 563 // replicate. 564 constexpr unsigned short soft_expected[] = { 565 0x0056, 0x0065, 0x0072, 0x0069, 0x0074, 0x0061, 0xfffe, 566 0x0073, 0x0065, 0x0072, 0x0075, 0x006D, 0x0000}; 567 { 568 constexpr int count = FX_ArraySize(soft_expected) - 1; 569 unsigned short buffer[FX_ArraySize(soft_expected)]; 570 memset(buffer, 0, sizeof(buffer)); 571 572 EXPECT_EQ(count + 1, FPDFText_GetText(textpage, 0, count, buffer)); 573 for (int i = 0; i < count; i++) 574 EXPECT_EQ(soft_expected[i], buffer[i]); 575 } 576 577 // Check that hard hyphens are included 578 { 579 // There isn't the \0 in the actual doc, but there is a \r\n, so need to 580 // add 1 to get aligned. 581 constexpr size_t offset = FX_ArraySize(soft_expected) + 1; 582 // Expecting 'User-\r\ngenerated', the - is a unicode character, so cannnot 583 // store in a char[]. 584 constexpr unsigned short hard_expected[] = { 585 0x0055, 0x0073, 0x0065, 0x0072, 0x2010, 0x000d, 0x000a, 0x0067, 0x0065, 586 0x006e, 0x0065, 0x0072, 0x0061, 0x0074, 0x0065, 0x0064, 0x0000}; 587 constexpr int count = FX_ArraySize(hard_expected) - 1; 588 unsigned short buffer[FX_ArraySize(hard_expected)]; 589 590 EXPECT_EQ(count + 1, FPDFText_GetText(textpage, offset, count, buffer)); 591 for (int i = 0; i < count; i++) 592 EXPECT_EQ(hard_expected[i], buffer[i]); 593 } 594 595 FPDFText_ClosePage(textpage); 596 UnloadPage(page); 597 } 598 599 TEST_F(FPDFTextEmbeddertest, bug_782596) { 600 // If there is a regression in this test, it will only fail under ASAN 601 EXPECT_TRUE(OpenDocument("bug_782596.pdf")); 602 FPDF_PAGE page = LoadPage(0); 603 EXPECT_TRUE(page); 604 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 605 EXPECT_TRUE(textpage); 606 FPDFText_ClosePage(textpage); 607 UnloadPage(page); 608 } 609 610 TEST_F(FPDFTextEmbeddertest, ControlCharacters) { 611 EXPECT_TRUE(OpenDocument("control_characters.pdf")); 612 FPDF_PAGE page = LoadPage(0); 613 EXPECT_TRUE(page); 614 615 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 616 EXPECT_TRUE(textpage); 617 618 // Should not include the control characters in the output 619 static const char expected[] = "Hello, world!\r\nGoodbye, world!"; 620 unsigned short fixed_buffer[128]; 621 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 622 int num_chars = FPDFText_GetText(textpage, 0, 128, fixed_buffer); 623 624 ASSERT_GE(num_chars, 0); 625 EXPECT_EQ(sizeof(expected), static_cast<size_t>(num_chars)); 626 EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, sizeof(expected))); 627 628 // Attempting to get a chunk of text after the control characters 629 static const char expected_substring[] = "Goodbye, world!"; 630 // Offset is the length of 'Hello, world!\r\n' + 2 control characters in the 631 // original stream 632 static const int offset = 17; 633 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 634 num_chars = FPDFText_GetText(textpage, offset, 128, fixed_buffer); 635 636 ASSERT_GE(num_chars, 0); 637 EXPECT_EQ(sizeof(expected_substring), static_cast<size_t>(num_chars)); 638 EXPECT_TRUE(check_unsigned_shorts(expected_substring, fixed_buffer, 639 sizeof(expected_substring))); 640 641 FPDFText_ClosePage(textpage); 642 UnloadPage(page); 643 } 644