Home | History | Annotate | Download | only in fpdfsdk
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "public/fpdf_text.h"
      8 
      9 #include <algorithm>
     10 #include <vector>
     11 
     12 #include "core/fpdfapi/page/cpdf_page.h"
     13 #include "core/fpdfdoc/cpdf_viewerpreferences.h"
     14 #include "core/fpdftext/cpdf_linkextract.h"
     15 #include "core/fpdftext/cpdf_textpage.h"
     16 #include "core/fpdftext/cpdf_textpagefind.h"
     17 #include "fpdfsdk/fsdk_define.h"
     18 #include "third_party/base/numerics/safe_conversions.h"
     19 #include "third_party/base/stl_util.h"
     20 
     21 #ifdef PDF_ENABLE_XFA
     22 #include "fpdfsdk/fpdfxfa/cpdfxfa_context.h"
     23 #include "fpdfsdk/fpdfxfa/cpdfxfa_page.h"
     24 #endif  // PDF_ENABLE_XFA
     25 
     26 #ifdef _WIN32
     27 #include <tchar.h>
     28 #endif
     29 
     30 namespace {
     31 
     32 CPDF_TextPage* CPDFTextPageFromFPDFTextPage(FPDF_TEXTPAGE text_page) {
     33   return static_cast<CPDF_TextPage*>(text_page);
     34 }
     35 
     36 CPDF_TextPageFind* CPDFTextPageFindFromFPDFSchHandle(FPDF_SCHHANDLE handle) {
     37   return static_cast<CPDF_TextPageFind*>(handle);
     38 }
     39 
     40 CPDF_LinkExtract* CPDFLinkExtractFromFPDFPageLink(FPDF_PAGELINK link) {
     41   return static_cast<CPDF_LinkExtract*>(link);
     42 }
     43 
     44 }  // namespace
     45 
     46 DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) {
     47   CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page);
     48   if (!pPDFPage)
     49     return nullptr;
     50 
     51 #ifdef PDF_ENABLE_XFA
     52   CPDFXFA_Page* pPage = (CPDFXFA_Page*)page;
     53   CPDFXFA_Context* pContext = pPage->GetContext();
     54   CPDF_ViewerPreferences viewRef(pContext->GetPDFDoc());
     55 #else  // PDF_ENABLE_XFA
     56   CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument);
     57 #endif  // PDF_ENABLE_XFA
     58 
     59   CPDF_TextPage* textpage = new CPDF_TextPage(
     60       pPDFPage, viewRef.IsDirectionR2L() ? FPDFText_Direction::Right
     61                                          : FPDFText_Direction::Left);
     62   textpage->ParseTextPage();
     63   return textpage;
     64 }
     65 
     66 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page) {
     67   delete CPDFTextPageFromFPDFTextPage(text_page);
     68 }
     69 
     70 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) {
     71   if (!text_page)
     72     return -1;
     73 
     74   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
     75   return textpage->CountChars();
     76 }
     77 
     78 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,
     79                                                    int index) {
     80   if (!text_page)
     81     return 0;
     82 
     83   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
     84   if (index < 0 || index >= textpage->CountChars())
     85     return 0;
     86 
     87   FPDF_CHAR_INFO charinfo;
     88   textpage->GetCharInfo(index, &charinfo);
     89   return charinfo.m_Unicode;
     90 }
     91 
     92 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
     93                                               int index) {
     94   if (!text_page)
     95     return 0;
     96   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
     97 
     98   if (index < 0 || index >= textpage->CountChars())
     99     return 0;
    100 
    101   FPDF_CHAR_INFO charinfo;
    102   textpage->GetCharInfo(index, &charinfo);
    103   return charinfo.m_FontSize;
    104 }
    105 
    106 DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
    107                                            int index,
    108                                            double* left,
    109                                            double* right,
    110                                            double* bottom,
    111                                            double* top) {
    112   if (!text_page)
    113     return;
    114   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
    115 
    116   if (index < 0 || index >= textpage->CountChars())
    117     return;
    118   FPDF_CHAR_INFO charinfo;
    119   textpage->GetCharInfo(index, &charinfo);
    120   *left = charinfo.m_CharBox.left;
    121   *right = charinfo.m_CharBox.right;
    122   *bottom = charinfo.m_CharBox.bottom;
    123   *top = charinfo.m_CharBox.top;
    124 }
    125 
    126 // select
    127 DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
    128                                                  double x,
    129                                                  double y,
    130                                                  double xTolerance,
    131                                                  double yTolerance) {
    132   if (!text_page)
    133     return -3;
    134 
    135   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
    136   return textpage->GetIndexAtPos(
    137       CFX_PointF(static_cast<FX_FLOAT>(x), static_cast<FX_FLOAT>(y)),
    138       CFX_SizeF(static_cast<FX_FLOAT>(xTolerance),
    139                 static_cast<FX_FLOAT>(yTolerance)));
    140 }
    141 
    142 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page,
    143                                        int start,
    144                                        int count,
    145                                        unsigned short* result) {
    146   if (!text_page)
    147     return 0;
    148 
    149   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
    150   if (start >= textpage->CountChars())
    151     return 0;
    152 
    153   CFX_WideString str = textpage->GetPageText(start, count);
    154   if (str.GetLength() > count)
    155     str = str.Left(count);
    156 
    157   CFX_ByteString cbUTF16str = str.UTF16LE_Encode();
    158   FXSYS_memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()),
    159                cbUTF16str.GetLength());
    160   cbUTF16str.ReleaseBuffer(cbUTF16str.GetLength());
    161 
    162   return cbUTF16str.GetLength() / sizeof(unsigned short);
    163 }
    164 
    165 DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page,
    166                                           int start,
    167                                           int count) {
    168   if (!text_page)
    169     return 0;
    170 
    171   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
    172   return textpage->CountRects(start, count);
    173 }
    174 
    175 DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page,
    176                                         int rect_index,
    177                                         double* left,
    178                                         double* top,
    179                                         double* right,
    180                                         double* bottom) {
    181   if (!text_page)
    182     return;
    183 
    184   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
    185   CFX_FloatRect rect;
    186   textpage->GetRect(rect_index, rect.left, rect.top, rect.right, rect.bottom);
    187   *left = rect.left;
    188   *top = rect.top;
    189   *right = rect.right;
    190   *bottom = rect.bottom;
    191 }
    192 
    193 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,
    194                                               double left,
    195                                               double top,
    196                                               double right,
    197                                               double bottom,
    198                                               unsigned short* buffer,
    199                                               int buflen) {
    200   if (!text_page)
    201     return 0;
    202 
    203   CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page);
    204   CFX_FloatRect rect((FX_FLOAT)left, (FX_FLOAT)bottom, (FX_FLOAT)right,
    205                      (FX_FLOAT)top);
    206   CFX_WideString str = textpage->GetTextByRect(rect);
    207 
    208   if (buflen <= 0 || !buffer)
    209     return str.GetLength();
    210 
    211   CFX_ByteString cbUTF16Str = str.UTF16LE_Encode();
    212   int len = cbUTF16Str.GetLength() / sizeof(unsigned short);
    213   int size = buflen > len ? len : buflen;
    214   FXSYS_memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)),
    215                size * sizeof(unsigned short));
    216   cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short));
    217 
    218   return size;
    219 }
    220 
    221 // Search
    222 // -1 for end
    223 DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page,
    224                                                     FPDF_WIDESTRING findwhat,
    225                                                     unsigned long flags,
    226                                                     int start_index) {
    227   if (!text_page)
    228     return nullptr;
    229 
    230   CPDF_TextPageFind* textpageFind =
    231       new CPDF_TextPageFind(CPDFTextPageFromFPDFTextPage(text_page));
    232   FX_STRSIZE len = CFX_WideString::WStringLength(findwhat);
    233   textpageFind->FindFirst(CFX_WideString::FromUTF16LE(findwhat, len), flags,
    234                           start_index);
    235   return textpageFind;
    236 }
    237 
    238 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle) {
    239   if (!handle)
    240     return false;
    241 
    242   CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
    243   return textpageFind->FindNext();
    244 }
    245 
    246 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle) {
    247   if (!handle)
    248     return false;
    249 
    250   CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
    251   return textpageFind->FindPrev();
    252 }
    253 
    254 DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) {
    255   if (!handle)
    256     return 0;
    257 
    258   CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
    259   return textpageFind->GetCurOrder();
    260 }
    261 
    262 DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle) {
    263   if (!handle)
    264     return 0;
    265 
    266   CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
    267   return textpageFind->GetMatchedCount();
    268 }
    269 
    270 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle) {
    271   if (!handle)
    272     return;
    273 
    274   CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle);
    275   delete textpageFind;
    276   handle = nullptr;
    277 }
    278 
    279 // web link
    280 DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) {
    281   if (!text_page)
    282     return nullptr;
    283 
    284   CPDF_LinkExtract* pageLink =
    285       new CPDF_LinkExtract(CPDFTextPageFromFPDFTextPage(text_page));
    286   pageLink->ExtractLinks();
    287   return pageLink;
    288 }
    289 
    290 DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) {
    291   if (!link_page)
    292     return 0;
    293 
    294   CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
    295   return pdfium::base::checked_cast<int>(pageLink->CountLinks());
    296 }
    297 
    298 DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page,
    299                                       int link_index,
    300                                       unsigned short* buffer,
    301                                       int buflen) {
    302   CFX_WideString wsUrl(L"");
    303   if (link_page && link_index >= 0) {
    304     CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
    305     wsUrl = pageLink->GetURL(link_index);
    306   }
    307   CFX_ByteString cbUTF16URL = wsUrl.UTF16LE_Encode();
    308   int required = cbUTF16URL.GetLength() / sizeof(unsigned short);
    309   if (!buffer || buflen <= 0)
    310     return required;
    311 
    312   int size = std::min(required, buflen);
    313   if (size > 0) {
    314     int buf_size = size * sizeof(unsigned short);
    315     FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(buf_size), buf_size);
    316   }
    317   return size;
    318 }
    319 
    320 DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page,
    321                                           int link_index) {
    322   if (!link_page || link_index < 0)
    323     return 0;
    324 
    325   CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
    326   return pdfium::CollectionSize<int>(pageLink->GetRects(link_index));
    327 }
    328 
    329 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page,
    330                                         int link_index,
    331                                         int rect_index,
    332                                         double* left,
    333                                         double* top,
    334                                         double* right,
    335                                         double* bottom) {
    336   if (!link_page || link_index < 0 || rect_index < 0)
    337     return;
    338 
    339   CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page);
    340   std::vector<CFX_FloatRect> rectArray = pageLink->GetRects(link_index);
    341   if (rect_index >= pdfium::CollectionSize<int>(rectArray))
    342     return;
    343 
    344   *left = rectArray[rect_index].left;
    345   *right = rectArray[rect_index].right;
    346   *top = rectArray[rect_index].top;
    347   *bottom = rectArray[rect_index].bottom;
    348 }
    349 
    350 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) {
    351   delete CPDFLinkExtractFromFPDFPageLink(link_page);
    352 }
    353