Home | History | Annotate | Download | only in src
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "public/fpdf_text.h"
      8 
      9 #include "core/include/fpdfdoc/fpdf_doc.h"
     10 #include "core/include/fpdftext/fpdf_text.h"
     11 #include "fpdfsdk/include/fsdk_define.h"
     12 
     13 #ifdef PDF_ENABLE_XFA
     14 #include "fpdfsdk/include/fpdfxfa/fpdfxfa_doc.h"
     15 #include "fpdfsdk/include/fpdfxfa/fpdfxfa_page.h"
     16 #endif  // PDF_ENABLE_XFA
     17 
     18 #ifdef _WIN32
     19 #include <tchar.h>
     20 #endif
     21 
     22 DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) {
     23   CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page);
     24   if (!pPDFPage)
     25     return nullptr;
     26 #ifdef PDF_ENABLE_XFA
     27   CPDFXFA_Page* pPage = (CPDFXFA_Page*)page;
     28   CPDFXFA_Document* pDoc = pPage->GetDocument();
     29   CPDF_ViewerPreferences viewRef(pDoc->GetPDFDoc());
     30 #else  // PDF_ENABLE_XFA
     31   CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument);
     32 #endif  // PDF_ENABLE_XFA
     33   IPDF_TextPage* textpage =
     34       IPDF_TextPage::CreateTextPage(pPDFPage, viewRef.IsDirectionR2L());
     35   textpage->ParseTextPage();
     36   return textpage;
     37 }
     38 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page) {
     39   delete (IPDF_TextPage*)text_page;
     40 }
     41 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) {
     42   if (!text_page)
     43     return -1;
     44   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
     45   return textpage->CountChars();
     46 }
     47 
     48 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page,
     49                                                    int index) {
     50   if (!text_page)
     51     return -1;
     52   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
     53 
     54   if (index < 0 || index >= textpage->CountChars())
     55     return 0;
     56 
     57   FPDF_CHAR_INFO charinfo;
     58   textpage->GetCharInfo(index, &charinfo);
     59   return charinfo.m_Unicode;
     60 }
     61 
     62 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
     63                                               int index) {
     64   if (!text_page)
     65     return 0;
     66   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
     67 
     68   if (index < 0 || index >= textpage->CountChars())
     69     return 0;
     70 
     71   FPDF_CHAR_INFO charinfo;
     72   textpage->GetCharInfo(index, &charinfo);
     73   return charinfo.m_FontSize;
     74 }
     75 
     76 DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
     77                                            int index,
     78                                            double* left,
     79                                            double* right,
     80                                            double* bottom,
     81                                            double* top) {
     82   if (!text_page)
     83     return;
     84   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
     85 
     86   if (index < 0 || index >= textpage->CountChars())
     87     return;
     88   FPDF_CHAR_INFO charinfo;
     89   textpage->GetCharInfo(index, &charinfo);
     90   *left = charinfo.m_CharBox.left;
     91   *right = charinfo.m_CharBox.right;
     92   *bottom = charinfo.m_CharBox.bottom;
     93   *top = charinfo.m_CharBox.top;
     94 }
     95 
     96 // select
     97 DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
     98                                                  double x,
     99                                                  double y,
    100                                                  double xTolerance,
    101                                                  double yTolerance) {
    102   if (!text_page)
    103     return -3;
    104   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
    105   return textpage->GetIndexAtPos((FX_FLOAT)x, (FX_FLOAT)y, (FX_FLOAT)xTolerance,
    106                                  (FX_FLOAT)yTolerance);
    107 }
    108 
    109 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page,
    110                                        int start,
    111                                        int count,
    112                                        unsigned short* result) {
    113   if (!text_page)
    114     return 0;
    115   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
    116 
    117   if (start >= textpage->CountChars())
    118     return 0;
    119 
    120   CFX_WideString str = textpage->GetPageText(start, count);
    121   if (str.GetLength() > count)
    122     str = str.Left(count);
    123 
    124   CFX_ByteString cbUTF16str = str.UTF16LE_Encode();
    125   FXSYS_memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()),
    126                cbUTF16str.GetLength());
    127   cbUTF16str.ReleaseBuffer(cbUTF16str.GetLength());
    128 
    129   return cbUTF16str.GetLength() / sizeof(unsigned short);
    130 }
    131 
    132 DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page,
    133                                           int start,
    134                                           int count) {
    135   if (!text_page)
    136     return 0;
    137   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
    138   return textpage->CountRects(start, count);
    139 }
    140 DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page,
    141                                         int rect_index,
    142                                         double* left,
    143                                         double* top,
    144                                         double* right,
    145                                         double* bottom) {
    146   if (!text_page)
    147     return;
    148   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
    149   CFX_FloatRect rect;
    150   textpage->GetRect(rect_index, rect.left, rect.top, rect.right, rect.bottom);
    151   *left = rect.left;
    152   *top = rect.top;
    153   *right = rect.right;
    154   *bottom = rect.bottom;
    155 }
    156 
    157 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,
    158                                               double left,
    159                                               double top,
    160                                               double right,
    161                                               double bottom,
    162                                               unsigned short* buffer,
    163                                               int buflen) {
    164   if (!text_page)
    165     return 0;
    166   IPDF_TextPage* textpage = (IPDF_TextPage*)text_page;
    167   CFX_FloatRect rect((FX_FLOAT)left, (FX_FLOAT)bottom, (FX_FLOAT)right,
    168                      (FX_FLOAT)top);
    169   CFX_WideString str = textpage->GetTextByRect(rect);
    170 
    171   if (buflen <= 0 || !buffer) {
    172     return str.GetLength();
    173   }
    174 
    175   CFX_ByteString cbUTF16Str = str.UTF16LE_Encode();
    176   int len = cbUTF16Str.GetLength() / sizeof(unsigned short);
    177   int size = buflen > len ? len : buflen;
    178   FXSYS_memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)),
    179                size * sizeof(unsigned short));
    180   cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short));
    181 
    182   return size;
    183 }
    184 
    185 // Search
    186 //-1 for end
    187 DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page,
    188                                                     FPDF_WIDESTRING findwhat,
    189                                                     unsigned long flags,
    190                                                     int start_index) {
    191   if (!text_page)
    192     return NULL;
    193   IPDF_TextPageFind* textpageFind = NULL;
    194   textpageFind = IPDF_TextPageFind::CreatePageFind((IPDF_TextPage*)text_page);
    195   FX_STRSIZE len = CFX_WideString::WStringLength(findwhat);
    196   textpageFind->FindFirst(CFX_WideString::FromUTF16LE(findwhat, len), flags,
    197                           start_index);
    198   return textpageFind;
    199 }
    200 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle) {
    201   if (!handle)
    202     return FALSE;
    203   IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
    204   return textpageFind->FindNext();
    205 }
    206 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle) {
    207   if (!handle)
    208     return FALSE;
    209   IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
    210   return textpageFind->FindPrev();
    211 }
    212 DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) {
    213   if (!handle)
    214     return 0;
    215   IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
    216   return textpageFind->GetCurOrder();
    217 }
    218 DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle) {
    219   if (!handle)
    220     return 0;
    221   IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
    222   return textpageFind->GetMatchedCount();
    223 }
    224 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle) {
    225   if (!handle)
    226     return;
    227   IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle;
    228   delete textpageFind;
    229   handle = NULL;
    230 }
    231 
    232 // web link
    233 DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) {
    234   if (!text_page)
    235     return NULL;
    236   IPDF_LinkExtract* pageLink = NULL;
    237   pageLink = IPDF_LinkExtract::CreateLinkExtract();
    238   pageLink->ExtractLinks((IPDF_TextPage*)text_page);
    239   return pageLink;
    240 }
    241 DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) {
    242   if (!link_page)
    243     return 0;
    244   IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
    245   return pageLink->CountLinks();
    246 }
    247 DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page,
    248                                       int link_index,
    249                                       unsigned short* buffer,
    250                                       int buflen) {
    251   if (!link_page)
    252     return 0;
    253   IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
    254   CFX_WideString url = pageLink->GetURL(link_index);
    255 
    256   CFX_ByteString cbUTF16URL = url.UTF16LE_Encode();
    257   int len = cbUTF16URL.GetLength() / sizeof(unsigned short);
    258   if (!buffer || buflen <= 0)
    259     return len;
    260   int size = len < buflen ? len : buflen;
    261   if (size > 0) {
    262     FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(size * sizeof(unsigned short)),
    263                  size * sizeof(unsigned short));
    264     cbUTF16URL.ReleaseBuffer(size * sizeof(unsigned short));
    265   }
    266   return size;
    267 }
    268 DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page,
    269                                           int link_index) {
    270   if (!link_page)
    271     return 0;
    272   IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
    273   CFX_RectArray rectArray;
    274   pageLink->GetRects(link_index, rectArray);
    275   return rectArray.GetSize();
    276 }
    277 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page,
    278                                         int link_index,
    279                                         int rect_index,
    280                                         double* left,
    281                                         double* top,
    282                                         double* right,
    283                                         double* bottom) {
    284   if (!link_page)
    285     return;
    286   IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page;
    287   CFX_RectArray rectArray;
    288   pageLink->GetRects(link_index, rectArray);
    289   if (rect_index >= 0 && rect_index < rectArray.GetSize()) {
    290     CFX_FloatRect rect = rectArray.GetAt(rect_index);
    291     *left = rect.left;
    292     *right = rect.right;
    293     *top = rect.top;
    294     *bottom = rect.bottom;
    295   }
    296 }
    297 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) {
    298   delete (IPDF_LinkExtract*)link_page;
    299 }
    300