1 // Copyright 2015 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include <memory> 6 7 #include "core/fxcrt/fx_basic.h" 8 #include "public/fpdf_text.h" 9 #include "public/fpdfview.h" 10 #include "testing/embedder_test.h" 11 #include "testing/gtest/include/gtest/gtest.h" 12 #include "testing/test_support.h" 13 14 namespace { 15 16 bool check_unsigned_shorts(const char* expected, 17 const unsigned short* actual, 18 size_t length) { 19 if (length > strlen(expected) + 1) { 20 return false; 21 } 22 for (size_t i = 0; i < length; ++i) { 23 if (actual[i] != static_cast<unsigned short>(expected[i])) { 24 return false; 25 } 26 } 27 return true; 28 } 29 30 } // namespace 31 32 class FPDFTextEmbeddertest : public EmbedderTest {}; 33 34 TEST_F(FPDFTextEmbeddertest, Text) { 35 EXPECT_TRUE(OpenDocument("hello_world.pdf")); 36 FPDF_PAGE page = LoadPage(0); 37 EXPECT_TRUE(page); 38 39 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 40 EXPECT_TRUE(textpage); 41 42 static const char expected[] = "Hello, world!\r\nGoodbye, world!"; 43 unsigned short fixed_buffer[128]; 44 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 45 46 // Check includes the terminating NUL that is provided. 47 int num_chars = FPDFText_GetText(textpage, 0, 128, fixed_buffer); 48 ASSERT_GE(num_chars, 0); 49 EXPECT_EQ(sizeof(expected), static_cast<size_t>(num_chars)); 50 EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, sizeof(expected))); 51 52 // Count does not include the terminating NUL in the string literal. 53 EXPECT_EQ(sizeof(expected) - 1, 54 static_cast<size_t>(FPDFText_CountChars(textpage))); 55 for (size_t i = 0; i < sizeof(expected) - 1; ++i) { 56 EXPECT_EQ(static_cast<unsigned int>(expected[i]), 57 FPDFText_GetUnicode(textpage, i)) 58 << " at " << i; 59 } 60 61 EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0)); 62 EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15)); 63 64 double left = 0.0; 65 double right = 0.0; 66 double bottom = 0.0; 67 double top = 0.0; 68 FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top); 69 EXPECT_NEAR(41.071, left, 0.001); 70 EXPECT_NEAR(46.243, right, 0.001); 71 EXPECT_NEAR(49.844, bottom, 0.001); 72 EXPECT_NEAR(55.520, top, 0.001); 73 74 EXPECT_EQ(4, FPDFText_GetCharIndexAtPos(textpage, 42.0, 50.0, 1.0, 1.0)); 75 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 0.0, 0.0, 1.0, 1.0)); 76 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, 199.0, 199.0, 1.0, 1.0)); 77 78 // Test out of range indicies. 79 EXPECT_EQ(-1, 80 FPDFText_GetCharIndexAtPos(textpage, 42.0, 10000000.0, 1.0, 1.0)); 81 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(textpage, -1.0, 50.0, 1.0, 1.0)); 82 83 // Count does not include the terminating NUL in the string literal. 84 EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, sizeof(expected) - 1)); 85 86 left = 0.0; 87 right = 0.0; 88 bottom = 0.0; 89 top = 0.0; 90 FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom); 91 EXPECT_NEAR(20.847, left, 0.001); 92 EXPECT_NEAR(135.167, right, 0.001); 93 EXPECT_NEAR(96.655, bottom, 0.001); 94 EXPECT_NEAR(116.000, top, 0.001); 95 96 // Test out of range indicies set outputs to (0.0, 0.0, 0.0, 0.0). 97 left = -1.0; 98 right = -1.0; 99 bottom = -1.0; 100 top = -1.0; 101 FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom); 102 EXPECT_EQ(0.0, left); 103 EXPECT_EQ(0.0, right); 104 EXPECT_EQ(0.0, bottom); 105 EXPECT_EQ(0.0, top); 106 107 left = -2.0; 108 right = -2.0; 109 bottom = -2.0; 110 top = -2.0; 111 FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom); 112 EXPECT_EQ(0.0, left); 113 EXPECT_EQ(0.0, right); 114 EXPECT_EQ(0.0, bottom); 115 EXPECT_EQ(0.0, top); 116 117 EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 0, 0)); 118 119 // Extract starting at character 4 as above. 120 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 121 EXPECT_EQ(1, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 122 fixed_buffer, 1)); 123 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 1)); 124 EXPECT_EQ(0xbdbd, fixed_buffer[1]); 125 126 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 127 EXPECT_EQ(9, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 128 fixed_buffer, 9)); 129 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); 130 EXPECT_EQ(0xbdbd, fixed_buffer[9]); 131 132 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 133 EXPECT_EQ(10, FPDFText_GetBoundedText(textpage, 41.0, 56.0, 82.0, 48.0, 134 fixed_buffer, 128)); 135 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); 136 EXPECT_EQ(0u, fixed_buffer[9]); 137 EXPECT_EQ(0xbdbd, fixed_buffer[10]); 138 139 FPDFText_ClosePage(textpage); 140 UnloadPage(page); 141 } 142 143 TEST_F(FPDFTextEmbeddertest, TextSearch) { 144 EXPECT_TRUE(OpenDocument("hello_world.pdf")); 145 FPDF_PAGE page = LoadPage(0); 146 EXPECT_TRUE(page); 147 148 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 149 EXPECT_TRUE(textpage); 150 151 std::unique_ptr<unsigned short, pdfium::FreeDeleter> nope = 152 GetFPDFWideString(L"nope"); 153 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world = 154 GetFPDFWideString(L"world"); 155 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_caps = 156 GetFPDFWideString(L"WORLD"); 157 std::unique_ptr<unsigned short, pdfium::FreeDeleter> world_substr = 158 GetFPDFWideString(L"orld"); 159 160 // No occurences of "nope" in test page. 161 FPDF_SCHHANDLE search = FPDFText_FindStart(textpage, nope.get(), 0, 0); 162 EXPECT_TRUE(search); 163 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 164 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 165 166 // Advancing finds nothing. 167 EXPECT_FALSE(FPDFText_FindNext(search)); 168 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 169 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 170 171 // Retreating finds nothing. 172 EXPECT_FALSE(FPDFText_FindPrev(search)); 173 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 174 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 175 FPDFText_FindClose(search); 176 177 // Two occurences of "world" in test page. 178 search = FPDFText_FindStart(textpage, world.get(), 0, 2); 179 EXPECT_TRUE(search); 180 181 // Remains not found until advanced. 182 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 183 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 184 185 // First occurence of "world" in this test page. 186 EXPECT_TRUE(FPDFText_FindNext(search)); 187 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 188 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 189 190 // Last occurence of "world" in this test page. 191 EXPECT_TRUE(FPDFText_FindNext(search)); 192 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); 193 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 194 195 // Found position unchanged when fails to advance. 196 EXPECT_FALSE(FPDFText_FindNext(search)); 197 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); 198 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 199 200 // Back to first occurence. 201 EXPECT_TRUE(FPDFText_FindPrev(search)); 202 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 203 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 204 205 // Found position unchanged when fails to retreat. 206 EXPECT_FALSE(FPDFText_FindPrev(search)); 207 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 208 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 209 FPDFText_FindClose(search); 210 211 // Exact search unaffected by case sensitiity and whole word flags. 212 search = FPDFText_FindStart(textpage, world.get(), 213 FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0); 214 EXPECT_TRUE(search); 215 EXPECT_TRUE(FPDFText_FindNext(search)); 216 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 217 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 218 FPDFText_FindClose(search); 219 220 // Default is case-insensitive, so matching agaist caps works. 221 search = FPDFText_FindStart(textpage, world_caps.get(), 0, 0); 222 EXPECT_TRUE(search); 223 EXPECT_TRUE(FPDFText_FindNext(search)); 224 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 225 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 226 FPDFText_FindClose(search); 227 228 // But can be made case sensitive, in which case this fails. 229 search = FPDFText_FindStart(textpage, world_caps.get(), FPDF_MATCHCASE, 0); 230 EXPECT_FALSE(FPDFText_FindNext(search)); 231 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 232 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 233 FPDFText_FindClose(search); 234 235 // Default is match anywhere within word, so matching substirng works. 236 search = FPDFText_FindStart(textpage, world_substr.get(), 0, 0); 237 EXPECT_TRUE(FPDFText_FindNext(search)); 238 EXPECT_EQ(8, FPDFText_GetSchResultIndex(search)); 239 EXPECT_EQ(4, FPDFText_GetSchCount(search)); 240 FPDFText_FindClose(search); 241 242 // But can be made to mach word boundaries, in which case this fails. 243 search = 244 FPDFText_FindStart(textpage, world_substr.get(), FPDF_MATCHWHOLEWORD, 0); 245 EXPECT_FALSE(FPDFText_FindNext(search)); 246 // TODO(tsepez): investigate strange index/count values in this state. 247 FPDFText_FindClose(search); 248 249 FPDFText_ClosePage(textpage); 250 UnloadPage(page); 251 } 252 253 // Test that the page has characters despite a bad stream length. 254 TEST_F(FPDFTextEmbeddertest, StreamLengthPastEndOfFile) { 255 EXPECT_TRUE(OpenDocument("bug_57.pdf")); 256 FPDF_PAGE page = LoadPage(0); 257 EXPECT_TRUE(page); 258 259 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 260 EXPECT_TRUE(textpage); 261 EXPECT_EQ(13, FPDFText_CountChars(textpage)); 262 263 FPDFText_ClosePage(textpage); 264 UnloadPage(page); 265 } 266 267 TEST_F(FPDFTextEmbeddertest, WebLinks) { 268 EXPECT_TRUE(OpenDocument("weblinks.pdf")); 269 FPDF_PAGE page = LoadPage(0); 270 EXPECT_TRUE(page); 271 272 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 273 EXPECT_TRUE(textpage); 274 275 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage); 276 EXPECT_TRUE(pagelink); 277 278 // Page contains two HTTP-style URLs. 279 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink)); 280 281 // Only a terminating NUL required for bogus links. 282 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 2, nullptr, 0)); 283 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 1400, nullptr, 0)); 284 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, -1, nullptr, 0)); 285 286 // Query the number of characters required for each link (incl NUL). 287 EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0)); 288 EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0)); 289 290 static const char expected_url[] = "http://example.com?q=foo"; 291 static const size_t expected_len = sizeof(expected_url); 292 unsigned short fixed_buffer[128]; 293 294 // Retrieve a link with too small a buffer. Buffer will not be 295 // NUL-terminated, but must not be modified past indicated length, 296 // so pre-fill with a pattern to check write bounds. 297 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 298 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 1)); 299 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 1)); 300 EXPECT_EQ(0xbdbd, fixed_buffer[1]); 301 302 // Check buffer that doesn't have space for a terminating NUL. 303 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 304 EXPECT_EQ(static_cast<int>(expected_len - 1), 305 FPDFLink_GetURL(pagelink, 0, fixed_buffer, expected_len - 1)); 306 EXPECT_TRUE( 307 check_unsigned_shorts(expected_url, fixed_buffer, expected_len - 1)); 308 EXPECT_EQ(0xbdbd, fixed_buffer[expected_len - 1]); 309 310 // Retreive link with exactly-sized buffer. 311 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 312 EXPECT_EQ(static_cast<int>(expected_len), 313 FPDFLink_GetURL(pagelink, 0, fixed_buffer, expected_len)); 314 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, expected_len)); 315 EXPECT_EQ(0u, fixed_buffer[expected_len - 1]); 316 EXPECT_EQ(0xbdbd, fixed_buffer[expected_len]); 317 318 // Retreive link with ample-sized-buffer. 319 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 320 EXPECT_EQ(static_cast<int>(expected_len), 321 FPDFLink_GetURL(pagelink, 0, fixed_buffer, 128)); 322 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, expected_len)); 323 EXPECT_EQ(0u, fixed_buffer[expected_len - 1]); 324 EXPECT_EQ(0xbdbd, fixed_buffer[expected_len]); 325 326 // Each link rendered in a single rect in this test page. 327 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0)); 328 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1)); 329 330 // Each link rendered in a single rect in this test page. 331 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1)); 332 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2)); 333 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000)); 334 335 // Check boundary of valid link index with valid rect index. 336 double left = 0.0; 337 double right = 0.0; 338 double top = 0.0; 339 double bottom = 0.0; 340 FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom); 341 EXPECT_NEAR(50.791, left, 0.001); 342 EXPECT_NEAR(187.963, right, 0.001); 343 EXPECT_NEAR(97.624, bottom, 0.001); 344 EXPECT_NEAR(108.736, top, 0.001); 345 346 // Check that valid link with invalid rect index leaves parameters unchanged. 347 left = -1.0; 348 right = -1.0; 349 top = -1.0; 350 bottom = -1.0; 351 FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom); 352 EXPECT_EQ(-1.0, left); 353 EXPECT_EQ(-1.0, right); 354 EXPECT_EQ(-1.0, bottom); 355 EXPECT_EQ(-1.0, top); 356 357 // Check that invalid link index leaves parameters unchanged. 358 left = -2.0; 359 right = -2.0; 360 top = -2.0; 361 bottom = -2.0; 362 FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom); 363 EXPECT_EQ(-2.0, left); 364 EXPECT_EQ(-2.0, right); 365 EXPECT_EQ(-2.0, bottom); 366 EXPECT_EQ(-2.0, top); 367 368 FPDFLink_CloseWebLinks(pagelink); 369 FPDFText_ClosePage(textpage); 370 UnloadPage(page); 371 } 372 373 TEST_F(FPDFTextEmbeddertest, GetFontSize) { 374 EXPECT_TRUE(OpenDocument("hello_world.pdf")); 375 FPDF_PAGE page = LoadPage(0); 376 EXPECT_TRUE(page); 377 378 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 379 EXPECT_TRUE(textpage); 380 381 const double kExpectedFontsSizes[] = {12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 382 12, 12, 12, 1, 1, 16, 16, 16, 16, 16, 383 16, 16, 16, 16, 16, 16, 16, 16, 16, 16}; 384 385 int count = FPDFText_CountChars(textpage); 386 ASSERT_EQ(FX_ArraySize(kExpectedFontsSizes), static_cast<size_t>(count)); 387 for (int i = 0; i < count; ++i) 388 EXPECT_EQ(kExpectedFontsSizes[i], FPDFText_GetFontSize(textpage, i)) << i; 389 390 FPDFText_ClosePage(textpage); 391 UnloadPage(page); 392 } 393 394 TEST_F(FPDFTextEmbeddertest, ToUnicode) { 395 EXPECT_TRUE(OpenDocument("bug_583.pdf")); 396 FPDF_PAGE page = LoadPage(0); 397 EXPECT_TRUE(page); 398 399 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 400 EXPECT_TRUE(textpage); 401 402 ASSERT_EQ(1, FPDFText_CountChars(textpage)); 403 EXPECT_EQ(static_cast<unsigned int>(0), FPDFText_GetUnicode(textpage, 0)); 404 405 FPDFText_ClosePage(textpage); 406 UnloadPage(page); 407 } 408