Home | History | Annotate | Download | only in src
      1 // Copyright 2015 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "../../public/fpdf_text.h"
      6 #include "../../public/fpdfview.h"
      7 #include "../../testing/embedder_test.h"
      8 #include "testing/gtest/include/gtest/gtest.h"
      9 
     10 namespace {
     11 
     12 static bool check_unsigned_shorts(const char* expected,
     13                                   const unsigned short* actual,
     14                                   size_t length) {
     15   if (length > strlen(expected) + 1) {
     16     return false;
     17   }
     18   for (size_t i = 0; i < length; ++i) {
     19     if (actual[i] != static_cast<unsigned short>(expected[i])) {
     20       return false;
     21     }
     22   }
     23   return true;
     24 }
     25 
     26 }  // namespace
     27 
     28 class FPDFTextEmbeddertest : public EmbedderTest {
     29 };
     30 
     31 TEST_F(FPDFTextEmbeddertest, Text) {
     32   EXPECT_TRUE(OpenDocument("testing/resources/hello_world.pdf"));
     33   FPDF_PAGE page = LoadPage(0);
     34   EXPECT_NE(nullptr, page);
     35 
     36   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
     37   EXPECT_NE(nullptr, textpage);
     38 
     39   static const char expected[] = "Hello, world!\r\nGoodbye, world!";
     40   unsigned short fixed_buffer[128];
     41   memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
     42 
     43   // Check includes the terminating NUL that is provided.
     44   EXPECT_EQ(sizeof(expected), FPDFText_GetText(textpage, 0, 128, fixed_buffer));
     45   EXPECT_TRUE(check_unsigned_shorts(expected, fixed_buffer, sizeof(expected)));
     46 
     47   // Count does not include the terminating NUL in the string literal.
     48   EXPECT_EQ(sizeof(expected) - 1, FPDFText_CountChars(textpage));
     49   for (size_t i = 0; i < sizeof(expected) - 1; ++i) {
     50     EXPECT_EQ(expected[i], FPDFText_GetUnicode(textpage, i)) << " at " << i;
     51   }
     52 
     53   EXPECT_EQ(12.0, FPDFText_GetFontSize(textpage, 0));
     54   EXPECT_EQ(16.0, FPDFText_GetFontSize(textpage, 15));
     55 
     56   double left = 0.0;
     57   double right = 0.0;
     58   double bottom = 0.0;
     59   double top = 0.0;
     60   FPDFText_GetCharBox(textpage, 4, &left, &right, &bottom, &top);
     61   EXPECT_NEAR(41.071, left, 0.001);
     62   EXPECT_NEAR(46.243, right, 0.001);
     63   EXPECT_NEAR(49.844, bottom, 0.001);
     64   EXPECT_NEAR(55.520, top, 0.001);
     65 
     66   EXPECT_EQ(4, FPDFText_GetCharIndexAtPos(
     67       textpage, 42.0, 50.0, 1.0, 1.0));
     68   EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(
     69       textpage, 0.0, 0.0, 1.0, 1.0));
     70   EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(
     71       textpage, 199.0, 199.0, 1.0, 1.0));
     72 
     73   // Test out of range indicies.
     74   EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(
     75       textpage, 42.0, 10000000.0, 1.0, 1.0));
     76   EXPECT_EQ(-1, FPDFText_GetCharIndexAtPos(
     77       textpage, -1.0, 50.0, 1.0, 1.0));
     78 
     79   // Count does not include the terminating NUL in the string literal.
     80   EXPECT_EQ(2, FPDFText_CountRects(textpage, 0, sizeof(expected) - 1));
     81 
     82   left = 0.0;
     83   right = 0.0;
     84   bottom = 0.0;
     85   top = 0.0;
     86   FPDFText_GetRect(textpage, 1, &left, &top, &right, &bottom);
     87   EXPECT_NEAR(20.847, left, 0.001);
     88   EXPECT_NEAR(135.167, right, 0.001);
     89   EXPECT_NEAR(96.655, bottom, 0.001);
     90   EXPECT_NEAR(116.000, top, 0.001);
     91 
     92   // Test out of range indicies set outputs to (0.0, 0.0, 0.0, 0.0).
     93   left = -1.0;
     94   right = -1.0;
     95   bottom = -1.0;
     96   top = -1.0;
     97   FPDFText_GetRect(textpage, -1, &left, &top, &right, &bottom);
     98   EXPECT_EQ(0.0, left);
     99   EXPECT_EQ(0.0, right);
    100   EXPECT_EQ(0.0, bottom);
    101   EXPECT_EQ(0.0, top);
    102 
    103   left = -2.0;
    104   right = -2.0;
    105   bottom = -2.0;
    106   top = -2.0;
    107   FPDFText_GetRect(textpage, 2, &left, &top, &right, &bottom);
    108   EXPECT_EQ(0.0, left);
    109   EXPECT_EQ(0.0, right);
    110   EXPECT_EQ(0.0, bottom);
    111   EXPECT_EQ(0.0, top);
    112 
    113   EXPECT_EQ(9, FPDFText_GetBoundedText(
    114       textpage, 41.0, 56.0, 82.0, 48.0, 0, 0));
    115 
    116   // Extract starting at character 4 as above.
    117   memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
    118   EXPECT_EQ(1, FPDFText_GetBoundedText(
    119       textpage, 41.0, 56.0, 82.0, 48.0, fixed_buffer, 1));
    120   EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 1));
    121   EXPECT_EQ(0xbdbd, fixed_buffer[1]);
    122 
    123   memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
    124   EXPECT_EQ(9, FPDFText_GetBoundedText(
    125       textpage, 41.0, 56.0, 82.0, 48.0, fixed_buffer, 9));
    126   EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9));
    127   EXPECT_EQ(0xbdbd, fixed_buffer[9]);
    128 
    129   memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
    130   EXPECT_EQ(10, FPDFText_GetBoundedText(
    131       textpage, 41.0, 56.0, 82.0, 48.0, fixed_buffer, 128));
    132   EXPECT_TRUE(check_unsigned_shorts(expected + 4, fixed_buffer, 9));
    133   EXPECT_EQ(0u, fixed_buffer[9]);
    134   EXPECT_EQ(0xbdbd, fixed_buffer[10]);
    135 
    136   FPDFText_ClosePage(textpage);
    137   UnloadPage(page);
    138 }
    139 
    140 TEST_F(FPDFTextEmbeddertest, TextSearch) {
    141   EXPECT_TRUE(OpenDocument("testing/resources/hello_world.pdf"));
    142   FPDF_PAGE page = LoadPage(0);
    143   EXPECT_NE(nullptr, page);
    144 
    145   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
    146   EXPECT_NE(nullptr, textpage);
    147 
    148   // Avoid issues with system wchar_t width vs. FPDF_WideString.
    149   const unsigned short nope[] = { 'n', 'o', 'p', 'e', '\0' };
    150   const unsigned short world[] = { 'w', 'o', 'r', 'l', 'd', '\0' };
    151   const unsigned short world_caps[] = { 'W', 'O', 'R', 'L', 'D', '\0' };
    152   const unsigned short world_substr[] = { 'o', 'r', 'l', 'd', '\0' };
    153 
    154   // No occurences of "nope" in test page.
    155   FPDF_SCHHANDLE search = FPDFText_FindStart(textpage, nope, 0, 0);
    156   EXPECT_NE(nullptr, search);
    157   EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
    158   EXPECT_EQ(0, FPDFText_GetSchCount(search));
    159 
    160   // Advancing finds nothing.
    161   EXPECT_FALSE(FPDFText_FindNext(search));
    162   EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
    163   EXPECT_EQ(0, FPDFText_GetSchCount(search));
    164 
    165   // Retreating finds nothing.
    166   EXPECT_FALSE(FPDFText_FindPrev(search));
    167   EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
    168   EXPECT_EQ(0, FPDFText_GetSchCount(search));
    169   FPDFText_FindClose(search);
    170 
    171   // Two occurences of "world" in test page.
    172   search = FPDFText_FindStart(textpage, world, 0, 2);
    173   EXPECT_NE(nullptr, search);
    174 
    175   // Remains not found until advanced.
    176   EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
    177   EXPECT_EQ(0, FPDFText_GetSchCount(search));
    178 
    179   // First occurence of "world" in this test page.
    180   EXPECT_TRUE(FPDFText_FindNext(search));
    181   EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
    182   EXPECT_EQ(5, FPDFText_GetSchCount(search));
    183 
    184   // Last occurence of "world" in this test page.
    185   EXPECT_TRUE(FPDFText_FindNext(search));
    186   EXPECT_EQ(24, FPDFText_GetSchResultIndex(search));
    187   EXPECT_EQ(5, FPDFText_GetSchCount(search));
    188 
    189   // Found position unchanged when fails to advance.
    190   EXPECT_FALSE(FPDFText_FindNext(search));
    191   EXPECT_EQ(24, FPDFText_GetSchResultIndex(search));
    192   EXPECT_EQ(5, FPDFText_GetSchCount(search));
    193 
    194   // Back to first occurence.
    195   EXPECT_TRUE(FPDFText_FindPrev(search));
    196   EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
    197   EXPECT_EQ(5, FPDFText_GetSchCount(search));
    198 
    199   // Found position unchanged when fails to retreat.
    200   EXPECT_FALSE(FPDFText_FindPrev(search));
    201   EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
    202   EXPECT_EQ(5, FPDFText_GetSchCount(search));
    203   FPDFText_FindClose(search);
    204 
    205   // Exact search unaffected by case sensitiity and whole word flags.
    206   search = FPDFText_FindStart(
    207       textpage, world, FPDF_MATCHCASE | FPDF_MATCHWHOLEWORD, 0);
    208   EXPECT_NE(nullptr, search);
    209   EXPECT_TRUE(FPDFText_FindNext(search));
    210   EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
    211   EXPECT_EQ(5, FPDFText_GetSchCount(search));
    212   FPDFText_FindClose(search);
    213 
    214   // Default is case-insensitive, so matching agaist caps works.
    215   search = FPDFText_FindStart(textpage, world_caps, 0, 0);
    216   EXPECT_NE(nullptr, search);
    217   EXPECT_TRUE(FPDFText_FindNext(search));
    218   EXPECT_EQ(7, FPDFText_GetSchResultIndex(search));
    219   EXPECT_EQ(5, FPDFText_GetSchCount(search));
    220   FPDFText_FindClose(search);
    221 
    222   // But can be made case sensitive, in which case this fails.
    223   search = FPDFText_FindStart(textpage, world_caps, FPDF_MATCHCASE, 0);
    224   EXPECT_FALSE(FPDFText_FindNext(search));
    225   EXPECT_EQ(0, FPDFText_GetSchResultIndex(search));
    226   EXPECT_EQ(0, FPDFText_GetSchCount(search));
    227   FPDFText_FindClose(search);
    228 
    229   // Default is match anywhere within word, so matching substirng works.
    230   search = FPDFText_FindStart(textpage, world_substr, 0, 0);
    231   EXPECT_TRUE(FPDFText_FindNext(search));
    232   EXPECT_EQ(8, FPDFText_GetSchResultIndex(search));
    233   EXPECT_EQ(4, FPDFText_GetSchCount(search));
    234   FPDFText_FindClose(search);
    235 
    236   // But can be made to mach word boundaries, in which case this fails.
    237   search = FPDFText_FindStart(textpage, world_substr, FPDF_MATCHWHOLEWORD, 0);
    238   EXPECT_FALSE(FPDFText_FindNext(search));
    239   // TODO(tsepez): investigate strange index/count values in this state.
    240   FPDFText_FindClose(search);
    241 
    242   FPDFText_ClosePage(textpage);
    243   UnloadPage(page);
    244 }
    245 
    246 // Test that the page has characters despite a bad stream length.
    247 TEST_F(FPDFTextEmbeddertest, StreamLengthPastEndOfFile) {
    248   EXPECT_TRUE(OpenDocument("testing/resources/bug_57.pdf"));
    249   FPDF_PAGE page = LoadPage(0);
    250   EXPECT_NE(nullptr, page);
    251 
    252   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
    253   EXPECT_NE(nullptr, textpage);
    254   EXPECT_EQ(13, FPDFText_CountChars(textpage));
    255 
    256   FPDFText_ClosePage(textpage);
    257   UnloadPage(page);
    258 }
    259 
    260 TEST_F(FPDFTextEmbeddertest, WebLinks) {
    261   EXPECT_TRUE(OpenDocument("testing/resources/weblinks.pdf"));
    262   FPDF_PAGE page = LoadPage(0);
    263   EXPECT_NE(nullptr, page);
    264 
    265   FPDF_TEXTPAGE textpage = FPDFText_LoadPage(page);
    266   EXPECT_NE(nullptr, textpage);
    267 
    268   FPDF_PAGELINK pagelink = FPDFLink_LoadWebLinks(textpage);
    269   EXPECT_NE(nullptr, pagelink);
    270 
    271   // Page contains two HTTP-style URLs.
    272   EXPECT_EQ(2, FPDFLink_CountWebLinks(pagelink));
    273 
    274   // Only a terminating NUL required for bogus links.
    275   EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 2, nullptr, 0));
    276   EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 1400, nullptr, 0));
    277   EXPECT_EQ(1, FPDFLink_GetURL(pagelink, -1, nullptr, 0));
    278 
    279   // Query the number of characters required for each link (incl NUL).
    280   EXPECT_EQ(25, FPDFLink_GetURL(pagelink, 0, nullptr, 0));
    281   EXPECT_EQ(26, FPDFLink_GetURL(pagelink, 1, nullptr, 0));
    282 
    283   static const char expected_url[] = "http://example.com?q=foo";
    284   unsigned short fixed_buffer[128];
    285 
    286   // Retrieve a link with too small a buffer.  Buffer will not be
    287   // NUL-terminated, but must not be modified past indicated length,
    288   // so pre-fill with a pattern to check write bounds.
    289   memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
    290   EXPECT_EQ(1, FPDFLink_GetURL(pagelink, 0, fixed_buffer, 1));
    291   EXPECT_TRUE(check_unsigned_shorts(expected_url, fixed_buffer, 1));
    292   EXPECT_EQ(0xbdbd, fixed_buffer[1]);
    293 
    294   // Check buffer that doesn't have space for a terminating NUL.
    295   memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
    296   EXPECT_EQ(sizeof(expected_url) - 1, FPDFLink_GetURL(
    297       pagelink, 0, fixed_buffer, sizeof(expected_url) - 1));
    298   EXPECT_TRUE(check_unsigned_shorts(
    299       expected_url, fixed_buffer, sizeof(expected_url) - 1));
    300   EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url) - 1]);
    301 
    302   // Retreive link with exactly-sized buffer.
    303   memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
    304   EXPECT_EQ(sizeof(expected_url), FPDFLink_GetURL(
    305       pagelink, 0, fixed_buffer, sizeof(expected_url)));
    306   EXPECT_TRUE(check_unsigned_shorts(
    307       expected_url, fixed_buffer, sizeof(expected_url)));
    308   EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]);
    309   EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]);
    310 
    311   // Retreive link with ample-sized-buffer.
    312   memset(fixed_buffer, 0xbd, sizeof(fixed_buffer));
    313   EXPECT_EQ(sizeof(expected_url), FPDFLink_GetURL(
    314       pagelink, 0, fixed_buffer, 128));
    315   EXPECT_TRUE(check_unsigned_shorts(
    316       expected_url, fixed_buffer, sizeof(expected_url)));
    317   EXPECT_EQ(0u, fixed_buffer[sizeof(expected_url) - 1]);
    318   EXPECT_EQ(0xbdbd, fixed_buffer[sizeof(expected_url)]);
    319 
    320   // Each link rendered in a single rect in this test page.
    321   EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 0));
    322   EXPECT_EQ(1, FPDFLink_CountRects(pagelink, 1));
    323 
    324   // Each link rendered in a single rect in this test page.
    325   EXPECT_EQ(0, FPDFLink_CountRects(pagelink, -1));
    326   EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 2));
    327   EXPECT_EQ(0, FPDFLink_CountRects(pagelink, 10000));
    328 
    329   // Check boundary of valid link index with valid rect index.
    330   double left = 0.0;
    331   double right = 0.0;
    332   double top = 0.0;
    333   double bottom = 0.0;
    334   FPDFLink_GetRect(pagelink, 0, 0, &left, &top, &right, &bottom);
    335   EXPECT_NEAR(50.791, left, 0.001);
    336   EXPECT_NEAR(187.963, right, 0.001);
    337   EXPECT_NEAR(97.624, bottom, 0.001);
    338   EXPECT_NEAR(108.736, top, 0.001);
    339 
    340   // Check that valid link with invalid rect index leaves parameters unchanged.
    341   left = -1.0;
    342   right = -1.0;
    343   top = -1.0;
    344   bottom = -1.0;
    345   FPDFLink_GetRect(pagelink, 0, 1, &left, &top, &right, &bottom);
    346   EXPECT_EQ(-1.0, left);
    347   EXPECT_EQ(-1.0, right);
    348   EXPECT_EQ(-1.0, bottom);
    349   EXPECT_EQ(-1.0, top);
    350 
    351   // Check that invalid link index leaves parameters unchanged.
    352   left = -2.0;
    353   right = -2.0;
    354   top = -2.0;
    355   bottom = -2.0;
    356   FPDFLink_GetRect(pagelink, -1, 0, &left, &top, &right, &bottom);
    357   EXPECT_EQ(-2.0, left);
    358   EXPECT_EQ(-2.0, right);
    359   EXPECT_EQ(-2.0, bottom);
    360   EXPECT_EQ(-2.0, top);
    361 
    362   FPDFLink_CloseWebLinks(pagelink);
    363   FPDFText_ClosePage(textpage);
    364   UnloadPage(page);
    365 }
    366