1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "public/fpdf_text.h" 8 9 #include "core/include/fpdfdoc/fpdf_doc.h" 10 #include "core/include/fpdftext/fpdf_text.h" 11 #include "fpdfsdk/include/fsdk_define.h" 12 13 #ifdef PDF_ENABLE_XFA 14 #include "fpdfsdk/include/fpdfxfa/fpdfxfa_doc.h" 15 #include "fpdfsdk/include/fpdfxfa/fpdfxfa_page.h" 16 #endif // PDF_ENABLE_XFA 17 18 #ifdef _WIN32 19 #include <tchar.h> 20 #endif 21 22 DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page) { 23 CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page); 24 if (!pPDFPage) 25 return nullptr; 26 #ifdef PDF_ENABLE_XFA 27 CPDFXFA_Page* pPage = (CPDFXFA_Page*)page; 28 CPDFXFA_Document* pDoc = pPage->GetDocument(); 29 CPDF_ViewerPreferences viewRef(pDoc->GetPDFDoc()); 30 #else // PDF_ENABLE_XFA 31 CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument); 32 #endif // PDF_ENABLE_XFA 33 IPDF_TextPage* textpage = 34 IPDF_TextPage::CreateTextPage(pPDFPage, viewRef.IsDirectionR2L()); 35 textpage->ParseTextPage(); 36 return textpage; 37 } 38 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page) { 39 delete (IPDF_TextPage*)text_page; 40 } 41 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page) { 42 if (!text_page) 43 return -1; 44 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; 45 return textpage->CountChars(); 46 } 47 48 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, 49 int index) { 50 if (!text_page) 51 return -1; 52 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; 53 54 if (index < 0 || index >= textpage->CountChars()) 55 return 0; 56 57 FPDF_CHAR_INFO charinfo; 58 textpage->GetCharInfo(index, &charinfo); 59 return charinfo.m_Unicode; 60 } 61 62 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, 63 int index) { 64 if (!text_page) 65 return 0; 66 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; 67 68 if (index < 0 || index >= textpage->CountChars()) 69 return 0; 70 71 FPDF_CHAR_INFO charinfo; 72 textpage->GetCharInfo(index, &charinfo); 73 return charinfo.m_FontSize; 74 } 75 76 DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, 77 int index, 78 double* left, 79 double* right, 80 double* bottom, 81 double* top) { 82 if (!text_page) 83 return; 84 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; 85 86 if (index < 0 || index >= textpage->CountChars()) 87 return; 88 FPDF_CHAR_INFO charinfo; 89 textpage->GetCharInfo(index, &charinfo); 90 *left = charinfo.m_CharBox.left; 91 *right = charinfo.m_CharBox.right; 92 *bottom = charinfo.m_CharBox.bottom; 93 *top = charinfo.m_CharBox.top; 94 } 95 96 // select 97 DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page, 98 double x, 99 double y, 100 double xTolerance, 101 double yTolerance) { 102 if (!text_page) 103 return -3; 104 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; 105 return textpage->GetIndexAtPos((FX_FLOAT)x, (FX_FLOAT)y, (FX_FLOAT)xTolerance, 106 (FX_FLOAT)yTolerance); 107 } 108 109 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, 110 int start, 111 int count, 112 unsigned short* result) { 113 if (!text_page) 114 return 0; 115 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; 116 117 if (start >= textpage->CountChars()) 118 return 0; 119 120 CFX_WideString str = textpage->GetPageText(start, count); 121 if (str.GetLength() > count) 122 str = str.Left(count); 123 124 CFX_ByteString cbUTF16str = str.UTF16LE_Encode(); 125 FXSYS_memcpy(result, cbUTF16str.GetBuffer(cbUTF16str.GetLength()), 126 cbUTF16str.GetLength()); 127 cbUTF16str.ReleaseBuffer(cbUTF16str.GetLength()); 128 129 return cbUTF16str.GetLength() / sizeof(unsigned short); 130 } 131 132 DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page, 133 int start, 134 int count) { 135 if (!text_page) 136 return 0; 137 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; 138 return textpage->CountRects(start, count); 139 } 140 DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, 141 int rect_index, 142 double* left, 143 double* top, 144 double* right, 145 double* bottom) { 146 if (!text_page) 147 return; 148 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; 149 CFX_FloatRect rect; 150 textpage->GetRect(rect_index, rect.left, rect.top, rect.right, rect.bottom); 151 *left = rect.left; 152 *top = rect.top; 153 *right = rect.right; 154 *bottom = rect.bottom; 155 } 156 157 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page, 158 double left, 159 double top, 160 double right, 161 double bottom, 162 unsigned short* buffer, 163 int buflen) { 164 if (!text_page) 165 return 0; 166 IPDF_TextPage* textpage = (IPDF_TextPage*)text_page; 167 CFX_FloatRect rect((FX_FLOAT)left, (FX_FLOAT)bottom, (FX_FLOAT)right, 168 (FX_FLOAT)top); 169 CFX_WideString str = textpage->GetTextByRect(rect); 170 171 if (buflen <= 0 || !buffer) { 172 return str.GetLength(); 173 } 174 175 CFX_ByteString cbUTF16Str = str.UTF16LE_Encode(); 176 int len = cbUTF16Str.GetLength() / sizeof(unsigned short); 177 int size = buflen > len ? len : buflen; 178 FXSYS_memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)), 179 size * sizeof(unsigned short)); 180 cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short)); 181 182 return size; 183 } 184 185 // Search 186 //-1 for end 187 DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page, 188 FPDF_WIDESTRING findwhat, 189 unsigned long flags, 190 int start_index) { 191 if (!text_page) 192 return NULL; 193 IPDF_TextPageFind* textpageFind = NULL; 194 textpageFind = IPDF_TextPageFind::CreatePageFind((IPDF_TextPage*)text_page); 195 FX_STRSIZE len = CFX_WideString::WStringLength(findwhat); 196 textpageFind->FindFirst(CFX_WideString::FromUTF16LE(findwhat, len), flags, 197 start_index); 198 return textpageFind; 199 } 200 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle) { 201 if (!handle) 202 return FALSE; 203 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle; 204 return textpageFind->FindNext(); 205 } 206 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle) { 207 if (!handle) 208 return FALSE; 209 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle; 210 return textpageFind->FindPrev(); 211 } 212 DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) { 213 if (!handle) 214 return 0; 215 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle; 216 return textpageFind->GetCurOrder(); 217 } 218 DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle) { 219 if (!handle) 220 return 0; 221 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle; 222 return textpageFind->GetMatchedCount(); 223 } 224 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle) { 225 if (!handle) 226 return; 227 IPDF_TextPageFind* textpageFind = (IPDF_TextPageFind*)handle; 228 delete textpageFind; 229 handle = NULL; 230 } 231 232 // web link 233 DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) { 234 if (!text_page) 235 return NULL; 236 IPDF_LinkExtract* pageLink = NULL; 237 pageLink = IPDF_LinkExtract::CreateLinkExtract(); 238 pageLink->ExtractLinks((IPDF_TextPage*)text_page); 239 return pageLink; 240 } 241 DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) { 242 if (!link_page) 243 return 0; 244 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page; 245 return pageLink->CountLinks(); 246 } 247 DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, 248 int link_index, 249 unsigned short* buffer, 250 int buflen) { 251 if (!link_page) 252 return 0; 253 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page; 254 CFX_WideString url = pageLink->GetURL(link_index); 255 256 CFX_ByteString cbUTF16URL = url.UTF16LE_Encode(); 257 int len = cbUTF16URL.GetLength() / sizeof(unsigned short); 258 if (!buffer || buflen <= 0) 259 return len; 260 int size = len < buflen ? len : buflen; 261 if (size > 0) { 262 FXSYS_memcpy(buffer, cbUTF16URL.GetBuffer(size * sizeof(unsigned short)), 263 size * sizeof(unsigned short)); 264 cbUTF16URL.ReleaseBuffer(size * sizeof(unsigned short)); 265 } 266 return size; 267 } 268 DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, 269 int link_index) { 270 if (!link_page) 271 return 0; 272 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page; 273 CFX_RectArray rectArray; 274 pageLink->GetRects(link_index, rectArray); 275 return rectArray.GetSize(); 276 } 277 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, 278 int link_index, 279 int rect_index, 280 double* left, 281 double* top, 282 double* right, 283 double* bottom) { 284 if (!link_page) 285 return; 286 IPDF_LinkExtract* pageLink = (IPDF_LinkExtract*)link_page; 287 CFX_RectArray rectArray; 288 pageLink->GetRects(link_index, rectArray); 289 if (rect_index >= 0 && rect_index < rectArray.GetSize()) { 290 CFX_FloatRect rect = rectArray.GetAt(rect_index); 291 *left = rect.left; 292 *right = rect.right; 293 *top = rect.top; 294 *bottom = rect.bottom; 295 } 296 } 297 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) { 298 delete (IPDF_LinkExtract*)link_page; 299 } 300