1 // Copyright 2015 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "../../public/fpdf_text.h" 6 #include "../../public/fpdfview.h" 7 #include "../../testing/embedder_test.h" 8 #include "testing/gtest/include/gtest/gtest.h" 9 10 namespace { 11 12 static bool check_unsigned_shorts(const char* expected, 13 const unsigned short* actual, 14 size_t length) { 15 if (length > strlen(expected) + 1) { 16 return false; 17 } 18 for (size_t i = 0; i < length; ++i) { 19 if (actual[i] != static_cast<unsigned short>(expected[i])) { 20 return false; 21 } 22 } 23 return true; 24 } 25 26 } // namespace 27 28 class FPDFTextEmbeddertest : public EmbedderTest { 29 }; 30 31 TEST_F(FPDFTextEmbeddertest, Text) { 32 EXPECT_TRUE(OpenDocument("testing/resources/hello_world.pdf")); 33 FPDF_PAGE page = LoadPage(0); 34 EXPECT_NE(nullptr, page); 35 36 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 37 EXPECT_NE(nullptr, textpage); 38 39 static const char expected[] = "Hello, world!\r\nGoodbye, world!"; 40 unsigned short fixed_buffer[128]; 41 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 42 43 // Check includes the terminating NUL that is provided. 44 EXPECT_EQ(sizeof(expected), FPDFText_GetText(textpage, 0, 128, fixed_buffer)); 45 EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, sizeof(expected))); 46 47 // Count does not include the terminating NUL in the string literal. 48 EXPECT_EQ(sizeof(expected) - 1, FPDFText_CountChars(textpage)); 49 for (size_t i = 0; i < sizeof(expected) - 1; ++i) { 50 EXPECT_EQ(expected[i], FPDFText_GetUnicode(textpage, i)) << " at " << i; 51 } 52 53 EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0)); 54 EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15)); 55 56 double left = 0.0; 57 double right = 0.0; 58 double bottom = 0.0; 59 double top = 0.0; 60 FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top); 61 EXPECT_NEAR(41.071, left, 0.001); 62 EXPECT_NEAR(46.243, right, 0.001); 63 EXPECT_NEAR(49.844, bottom, 0.001); 64 EXPECT_NEAR(55.520, top, 0.001); 65 66 EXPECT_EQ(4, FPDFText_GetCharIndexAtPos( 67 textpage, 42.0, 50.0, 1.0, 1.0)); 68 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos( 69 textpage, 0.0, 0.0, 1.0, 1.0)); 70 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos( 71 textpage, 199.0, 199.0, 1.0, 1.0)); 72 73 // Test out of range indicies. 74 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos( 75 textpage, 42.0, 10000000.0, 1.0, 1.0)); 76 EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos( 77 textpage, -1.0, 50.0, 1.0, 1.0)); 78 79 // Count does not include the terminating NUL in the string literal. 80 EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, sizeof(expected) - 1)); 81 82 left = 0.0; 83 right = 0.0; 84 bottom = 0.0; 85 top = 0.0; 86 FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom); 87 EXPECT_NEAR(20.847, left, 0.001); 88 EXPECT_NEAR(135.167, right, 0.001); 89 EXPECT_NEAR(96.655, bottom, 0.001); 90 EXPECT_NEAR(116.000, top, 0.001); 91 92 // Test out of range indicies set outputs to (0.0, 0.0, 0.0, 0.0). 93 left = -1.0; 94 right = -1.0; 95 bottom = -1.0; 96 top = -1.0; 97 FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom); 98 EXPECT_EQ(0.0, left); 99 EXPECT_EQ(0.0, right); 100 EXPECT_EQ(0.0, bottom); 101 EXPECT_EQ(0.0, top); 102 103 left = -2.0; 104 right = -2.0; 105 bottom = -2.0; 106 top = -2.0; 107 FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom); 108 EXPECT_EQ(0.0, left); 109 EXPECT_EQ(0.0, right); 110 EXPECT_EQ(0.0, bottom); 111 EXPECT_EQ(0.0, top); 112 113 EXPECT_EQ(9, FPDFText_GetBoundedText( 114 textpage, 41.0, 56.0, 82.0, 48.0, 0, 0)); 115 116 // Extract starting at character 4 as above. 117 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 118 EXPECT_EQ(1, FPDFText_GetBoundedText( 119 textpage, 41.0, 56.0, 82.0, 48.0, fixed_buffer, 1)); 120 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 1)); 121 EXPECT_EQ(0xbdbd, fixed_buffer[1]); 122 123 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 124 EXPECT_EQ(9, FPDFText_GetBoundedText( 125 textpage, 41.0, 56.0, 82.0, 48.0, fixed_buffer, 9)); 126 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); 127 EXPECT_EQ(0xbdbd, fixed_buffer[9]); 128 129 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 130 EXPECT_EQ(10, FPDFText_GetBoundedText( 131 textpage, 41.0, 56.0, 82.0, 48.0, fixed_buffer, 128)); 132 EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9)); 133 EXPECT_EQ(0u, fixed_buffer[9]); 134 EXPECT_EQ(0xbdbd, fixed_buffer[10]); 135 136 FPDFText_ClosePage(textpage); 137 UnloadPage(page); 138 } 139 140 TEST_F(FPDFTextEmbeddertest, TextSearch) { 141 EXPECT_TRUE(OpenDocument("testing/resources/hello_world.pdf")); 142 FPDF_PAGE page = LoadPage(0); 143 EXPECT_NE(nullptr, page); 144 145 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 146 EXPECT_NE(nullptr, textpage); 147 148 // Avoid issues with system wchar_t width vs. FPDF_WideString. 149 const unsigned short nope[] = { 'n', 'o', 'p', 'e', '\0' }; 150 const unsigned short world[] = { 'w', 'o', 'r', 'l', 'd', '\0' }; 151 const unsigned short world_caps[] = { 'W', 'O', 'R', 'L', 'D', '\0' }; 152 const unsigned short world_substr[] = { 'o', 'r', 'l', 'd', '\0' }; 153 154 // No occurences of "nope" in test page. 155 FPDF_SCHHANDLE search = FPDFText_FindStart(textpage, nope, 0, 0); 156 EXPECT_NE(nullptr, search); 157 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 158 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 159 160 // Advancing finds nothing. 161 EXPECT_FALSE(FPDFText_FindNext(search)); 162 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 163 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 164 165 // Retreating finds nothing. 166 EXPECT_FALSE(FPDFText_FindPrev(search)); 167 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 168 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 169 FPDFText_FindClose(search); 170 171 // Two occurences of "world" in test page. 172 search = FPDFText_FindStart(textpage, world, 0, 2); 173 EXPECT_NE(nullptr, search); 174 175 // Remains not found until advanced. 176 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 177 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 178 179 // First occurence of "world" in this test page. 180 EXPECT_TRUE(FPDFText_FindNext(search)); 181 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 182 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 183 184 // Last occurence of "world" in this test page. 185 EXPECT_TRUE(FPDFText_FindNext(search)); 186 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); 187 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 188 189 // Found position unchanged when fails to advance. 190 EXPECT_FALSE(FPDFText_FindNext(search)); 191 EXPECT_EQ(24, FPDFText_GetSchResultIndex(search)); 192 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 193 194 // Back to first occurence. 195 EXPECT_TRUE(FPDFText_FindPrev(search)); 196 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 197 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 198 199 // Found position unchanged when fails to retreat. 200 EXPECT_FALSE(FPDFText_FindPrev(search)); 201 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 202 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 203 FPDFText_FindClose(search); 204 205 // Exact search unaffected by case sensitiity and whole word flags. 206 search = FPDFText_FindStart( 207 textpage, world, FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0); 208 EXPECT_NE(nullptr, search); 209 EXPECT_TRUE(FPDFText_FindNext(search)); 210 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 211 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 212 FPDFText_FindClose(search); 213 214 // Default is case-insensitive, so matching agaist caps works. 215 search = FPDFText_FindStart(textpage, world_caps, 0, 0); 216 EXPECT_NE(nullptr, search); 217 EXPECT_TRUE(FPDFText_FindNext(search)); 218 EXPECT_EQ(7, FPDFText_GetSchResultIndex(search)); 219 EXPECT_EQ(5, FPDFText_GetSchCount(search)); 220 FPDFText_FindClose(search); 221 222 // But can be made case sensitive, in which case this fails. 223 search = FPDFText_FindStart(textpage, world_caps, FPDF_MATCHCASE, 0); 224 EXPECT_FALSE(FPDFText_FindNext(search)); 225 EXPECT_EQ(0, FPDFText_GetSchResultIndex(search)); 226 EXPECT_EQ(0, FPDFText_GetSchCount(search)); 227 FPDFText_FindClose(search); 228 229 // Default is match anywhere within word, so matching substirng works. 230 search = FPDFText_FindStart(textpage, world_substr, 0, 0); 231 EXPECT_TRUE(FPDFText_FindNext(search)); 232 EXPECT_EQ(8, FPDFText_GetSchResultIndex(search)); 233 EXPECT_EQ(4, FPDFText_GetSchCount(search)); 234 FPDFText_FindClose(search); 235 236 // But can be made to mach word boundaries, in which case this fails. 237 search = FPDFText_FindStart(textpage, world_substr, FPDF_MATCHWHOLEWORD, 0); 238 EXPECT_FALSE(FPDFText_FindNext(search)); 239 // TODO(tsepez): investigate strange index/count values in this state. 240 FPDFText_FindClose(search); 241 242 FPDFText_ClosePage(textpage); 243 UnloadPage(page); 244 } 245 246 // Test that the page has characters despite a bad stream length. 247 TEST_F(FPDFTextEmbeddertest, StreamLengthPastEndOfFile) { 248 EXPECT_TRUE(OpenDocument("testing/resources/bug_57.pdf")); 249 FPDF_PAGE page = LoadPage(0); 250 EXPECT_NE(nullptr, page); 251 252 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 253 EXPECT_NE(nullptr, textpage); 254 EXPECT_EQ(13, FPDFText_CountChars(textpage)); 255 256 FPDFText_ClosePage(textpage); 257 UnloadPage(page); 258 } 259 260 TEST_F(FPDFTextEmbeddertest, WebLinks) { 261 EXPECT_TRUE(OpenDocument("testing/resources/weblinks.pdf")); 262 FPDF_PAGE page = LoadPage(0); 263 EXPECT_NE(nullptr, page); 264 265 FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page); 266 EXPECT_NE(nullptr, textpage); 267 268 FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage); 269 EXPECT_NE(nullptr, pagelink); 270 271 // Page contains two HTTP-style URLs. 272 EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink)); 273 274 // Only a terminating NUL required for bogus links. 275 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 2, nullptr, 0)); 276 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 1400, nullptr, 0)); 277 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, -1, nullptr, 0)); 278 279 // Query the number of characters required for each link (incl NUL). 280 EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0)); 281 EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0)); 282 283 static const char expected_url[] = "http://example.com?q=foo"; 284 unsigned short fixed_buffer[128]; 285 286 // Retrieve a link with too small a buffer. Buffer will not be 287 // NUL-terminated, but must not be modified past indicated length, 288 // so pre-fill with a pattern to check write bounds. 289 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 290 EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 1)); 291 EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 1)); 292 EXPECT_EQ(0xbdbd, fixed_buffer[1]); 293 294 // Check buffer that doesn't have space for a terminating NUL. 295 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 296 EXPECT_EQ(sizeof(expected_url) - 1, FPDFLink_GetURL( 297 pagelink, 0, fixed_buffer, sizeof(expected_url) - 1)); 298 EXPECT_TRUE(check_unsigned_shorts( 299 expected_url, fixed_buffer, sizeof(expected_url) - 1)); 300 EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url) - 1]); 301 302 // Retreive link with exactly-sized buffer. 303 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 304 EXPECT_EQ(sizeof(expected_url), FPDFLink_GetURL( 305 pagelink, 0, fixed_buffer, sizeof(expected_url))); 306 EXPECT_TRUE(check_unsigned_shorts( 307 expected_url, fixed_buffer, sizeof(expected_url))); 308 EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]); 309 EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]); 310 311 // Retreive link with ample-sized-buffer. 312 memset(fixed_buffer, 0xbd, sizeof(fixed_buffer)); 313 EXPECT_EQ(sizeof(expected_url), FPDFLink_GetURL( 314 pagelink, 0, fixed_buffer, 128)); 315 EXPECT_TRUE(check_unsigned_shorts( 316 expected_url, fixed_buffer, sizeof(expected_url))); 317 EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]); 318 EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]); 319 320 // Each link rendered in a single rect in this test page. 321 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0)); 322 EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1)); 323 324 // Each link rendered in a single rect in this test page. 325 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1)); 326 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2)); 327 EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000)); 328 329 // Check boundary of valid link index with valid rect index. 330 double left = 0.0; 331 double right = 0.0; 332 double top = 0.0; 333 double bottom = 0.0; 334 FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom); 335 EXPECT_NEAR(50.791, left, 0.001); 336 EXPECT_NEAR(187.963, right, 0.001); 337 EXPECT_NEAR(97.624, bottom, 0.001); 338 EXPECT_NEAR(108.736, top, 0.001); 339 340 // Check that valid link with invalid rect index leaves parameters unchanged. 341 left = -1.0; 342 right = -1.0; 343 top = -1.0; 344 bottom = -1.0; 345 FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom); 346 EXPECT_EQ(-1.0, left); 347 EXPECT_EQ(-1.0, right); 348 EXPECT_EQ(-1.0, bottom); 349 EXPECT_EQ(-1.0, top); 350 351 // Check that invalid link index leaves parameters unchanged. 352 left = -2.0; 353 right = -2.0; 354 top = -2.0; 355 bottom = -2.0; 356 FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom); 357 EXPECT_EQ(-2.0, left); 358 EXPECT_EQ(-2.0, right); 359 EXPECT_EQ(-2.0, bottom); 360 EXPECT_EQ(-2.0, top); 361 362 FPDFLink_CloseWebLinks(pagelink); 363 FPDFText_ClosePage(textpage); 364 UnloadPage(page); 365 } 366