1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "public/fpdf_text.h" 8 9 #include <algorithm> 10 #include <vector> 11 12 #include "core/fpdfapi/page/cpdf_page.h" 13 #include "core/fpdfdoc/cpdf_viewerpreferences.h" 14 #include "core/fpdftext/cpdf_linkextract.h" 15 #include "core/fpdftext/cpdf_textpage.h" 16 #include "core/fpdftext/cpdf_textpagefind.h" 17 #include "fpdfsdk/fsdk_define.h" 18 #include "third_party/base/numerics/safe_conversions.h" 19 #include "third_party/base/stl_util.h" 20 21 #ifdef PDF_ENABLE_XFA 22 #include "fpdfsdk/fpdfxfa/cpdfxfa_context.h" 23 #include "fpdfsdk/fpdfxfa/cpdfxfa_page.h" 24 #endif // PDF_ENABLE_XFA 25 26 #ifdef _WIN32 27 #include <tchar.h> 28 #endif 29 30 namespace { 31 32 constexpr size_t kBytesPerCharacter = sizeof(unsigned short); 33 34 CPDF_TextPage* CPDFTextPageFromFPDFTextPage(FPDF_TEXTPAGE text_page) { 35 return static_cast<CPDF_TextPage*>(text_page); 36 } 37 38 CPDF_TextPageFind* CPDFTextPageFindFromFPDFSchHandle(FPDF_SCHHANDLE handle) { 39 return static_cast<CPDF_TextPageFind*>(handle); 40 } 41 42 CPDF_LinkExtract* CPDFLinkExtractFromFPDFPageLink(FPDF_PAGELINK link) { 43 return static_cast<CPDF_LinkExtract*>(link); 44 } 45 46 } // namespace 47 48 FPDF_EXPORT FPDF_TEXTPAGE FPDF_CALLCONV FPDFText_LoadPage(FPDF_PAGE page) { 49 CPDF_Page* pPDFPage = CPDFPageFromFPDFPage(page); 50 if (!pPDFPage) 51 return nullptr; 52 53 #ifdef PDF_ENABLE_XFA 54 CPDFXFA_Page* pPage = (CPDFXFA_Page*)page; 55 CPDFXFA_Context* pContext = pPage->GetContext(); 56 CPDF_ViewerPreferences viewRef(pContext->GetPDFDoc()); 57 #else // PDF_ENABLE_XFA 58 CPDF_ViewerPreferences viewRef(pPDFPage->m_pDocument.Get()); 59 #endif // PDF_ENABLE_XFA 60 61 CPDF_TextPage* textpage = new CPDF_TextPage( 62 pPDFPage, viewRef.IsDirectionR2L() ? FPDFText_Direction::Right 63 : FPDFText_Direction::Left); 64 textpage->ParseTextPage(); 65 return textpage; 66 } 67 68 FPDF_EXPORT void FPDF_CALLCONV FPDFText_ClosePage(FPDF_TEXTPAGE text_page) { 69 delete CPDFTextPageFromFPDFTextPage(text_page); 70 } 71 72 FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountChars(FPDF_TEXTPAGE text_page) { 73 if (!text_page) 74 return -1; 75 76 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 77 return textpage->CountChars(); 78 } 79 80 FPDF_EXPORT unsigned int FPDF_CALLCONV 81 FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index) { 82 if (!text_page) 83 return 0; 84 85 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 86 if (index < 0 || index >= textpage->CountChars()) 87 return 0; 88 89 FPDF_CHAR_INFO charinfo; 90 textpage->GetCharInfo(index, &charinfo); 91 return charinfo.m_Unicode; 92 } 93 94 FPDF_EXPORT double FPDF_CALLCONV FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, 95 int index) { 96 if (!text_page) 97 return 0; 98 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 99 100 if (index < 0 || index >= textpage->CountChars()) 101 return 0; 102 103 FPDF_CHAR_INFO charinfo; 104 textpage->GetCharInfo(index, &charinfo); 105 return charinfo.m_FontSize; 106 } 107 108 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, 109 int index, 110 double* left, 111 double* right, 112 double* bottom, 113 double* top) { 114 if (!text_page || index < 0) 115 return false; 116 117 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 118 if (index >= textpage->CountChars()) 119 return false; 120 121 FPDF_CHAR_INFO charinfo; 122 textpage->GetCharInfo(index, &charinfo); 123 *left = charinfo.m_CharBox.left; 124 *right = charinfo.m_CharBox.right; 125 *bottom = charinfo.m_CharBox.bottom; 126 *top = charinfo.m_CharBox.top; 127 return true; 128 } 129 130 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV 131 FPDFText_GetCharOrigin(FPDF_TEXTPAGE text_page, 132 int index, 133 double* x, 134 double* y) { 135 if (!text_page) 136 return false; 137 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 138 139 if (index < 0 || index >= textpage->CountChars()) 140 return false; 141 FPDF_CHAR_INFO charinfo; 142 textpage->GetCharInfo(index, &charinfo); 143 *x = charinfo.m_Origin.x; 144 *y = charinfo.m_Origin.y; 145 return true; 146 } 147 148 // select 149 FPDF_EXPORT int FPDF_CALLCONV 150 FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page, 151 double x, 152 double y, 153 double xTolerance, 154 double yTolerance) { 155 if (!text_page) 156 return -3; 157 158 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 159 return textpage->GetIndexAtPos( 160 CFX_PointF(static_cast<float>(x), static_cast<float>(y)), 161 CFX_SizeF(static_cast<float>(xTolerance), 162 static_cast<float>(yTolerance))); 163 } 164 165 FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE page, 166 int char_start, 167 int char_count, 168 unsigned short* result) { 169 if (!page || char_start < 0 || char_count < 0 || !result) 170 return 0; 171 172 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(page); 173 int char_available = textpage->CountChars() - char_start; 174 if (char_available <= 0) 175 return 0; 176 177 char_count = std::min(char_count, char_available); 178 if (char_count == 0) { 179 // Writing out "", which has a character count of 1 due to the NUL. 180 *result = '\0'; 181 return 1; 182 } 183 184 WideString str = textpage->GetPageText(char_start, char_count); 185 186 if (str.GetLength() > static_cast<size_t>(char_count)) 187 str = str.Left(static_cast<size_t>(char_count)); 188 189 // UFT16LE_Encode doesn't handle surrogate pairs properly, so it is expected 190 // the number of items to stay the same. 191 ByteString byte_str = str.UTF16LE_Encode(); 192 size_t byte_str_len = byte_str.GetLength(); 193 int ret_count = byte_str_len / kBytesPerCharacter; 194 195 ASSERT(ret_count <= char_count + 1); // +1 to account for the NUL terminator. 196 memcpy(result, byte_str.GetBuffer(byte_str_len), byte_str_len); 197 return ret_count; 198 } 199 200 FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountRects(FPDF_TEXTPAGE text_page, 201 int start, 202 int count) { 203 if (!text_page) 204 return 0; 205 206 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 207 return textpage->CountRects(start, count); 208 } 209 210 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetRect(FPDF_TEXTPAGE text_page, 211 int rect_index, 212 double* left, 213 double* top, 214 double* right, 215 double* bottom) { 216 if (!text_page) 217 return false; 218 219 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 220 CFX_FloatRect rect; 221 bool result = textpage->GetRect(rect_index, &rect); 222 223 *left = rect.left; 224 *top = rect.top; 225 *right = rect.right; 226 *bottom = rect.bottom; 227 return result; 228 } 229 230 FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page, 231 double left, 232 double top, 233 double right, 234 double bottom, 235 unsigned short* buffer, 236 int buflen) { 237 if (!text_page) 238 return 0; 239 240 CPDF_TextPage* textpage = CPDFTextPageFromFPDFTextPage(text_page); 241 CFX_FloatRect rect((float)left, (float)bottom, (float)right, (float)top); 242 WideString str = textpage->GetTextByRect(rect); 243 244 if (buflen <= 0 || !buffer) 245 return str.GetLength(); 246 247 ByteString cbUTF16Str = str.UTF16LE_Encode(); 248 int len = cbUTF16Str.GetLength() / sizeof(unsigned short); 249 int size = buflen > len ? len : buflen; 250 memcpy(buffer, cbUTF16Str.GetBuffer(size * sizeof(unsigned short)), 251 size * sizeof(unsigned short)); 252 cbUTF16Str.ReleaseBuffer(size * sizeof(unsigned short)); 253 254 return size; 255 } 256 257 // Search 258 // -1 for end 259 FPDF_EXPORT FPDF_SCHHANDLE FPDF_CALLCONV 260 FPDFText_FindStart(FPDF_TEXTPAGE text_page, 261 FPDF_WIDESTRING findwhat, 262 unsigned long flags, 263 int start_index) { 264 if (!text_page) 265 return nullptr; 266 267 CPDF_TextPageFind* textpageFind = 268 new CPDF_TextPageFind(CPDFTextPageFromFPDFTextPage(text_page)); 269 size_t len = WideString::WStringLength(findwhat); 270 textpageFind->FindFirst( 271 WideString::FromUTF16LE(findwhat, len), flags, 272 start_index >= 0 ? Optional<size_t>(start_index) : Optional<size_t>()); 273 return textpageFind; 274 } 275 276 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_FindNext(FPDF_SCHHANDLE handle) { 277 if (!handle) 278 return false; 279 280 CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle); 281 return textpageFind->FindNext(); 282 } 283 284 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_FindPrev(FPDF_SCHHANDLE handle) { 285 if (!handle) 286 return false; 287 288 CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle); 289 return textpageFind->FindPrev(); 290 } 291 292 FPDF_EXPORT int FPDF_CALLCONV 293 FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle) { 294 if (!handle) 295 return 0; 296 297 CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle); 298 return textpageFind->GetCurOrder(); 299 } 300 301 FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetSchCount(FPDF_SCHHANDLE handle) { 302 if (!handle) 303 return 0; 304 305 CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle); 306 return textpageFind->GetMatchedCount(); 307 } 308 309 FPDF_EXPORT void FPDF_CALLCONV FPDFText_FindClose(FPDF_SCHHANDLE handle) { 310 if (!handle) 311 return; 312 313 CPDF_TextPageFind* textpageFind = CPDFTextPageFindFromFPDFSchHandle(handle); 314 delete textpageFind; 315 handle = nullptr; 316 } 317 318 // web link 319 FPDF_EXPORT FPDF_PAGELINK FPDF_CALLCONV 320 FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page) { 321 if (!text_page) 322 return nullptr; 323 324 CPDF_LinkExtract* pageLink = 325 new CPDF_LinkExtract(CPDFTextPageFromFPDFTextPage(text_page)); 326 pageLink->ExtractLinks(); 327 return pageLink; 328 } 329 330 FPDF_EXPORT int FPDF_CALLCONV FPDFLink_CountWebLinks(FPDF_PAGELINK link_page) { 331 if (!link_page) 332 return 0; 333 334 CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); 335 return pdfium::base::checked_cast<int>(pageLink->CountLinks()); 336 } 337 338 FPDF_EXPORT int FPDF_CALLCONV FPDFLink_GetURL(FPDF_PAGELINK link_page, 339 int link_index, 340 unsigned short* buffer, 341 int buflen) { 342 WideString wsUrl(L""); 343 if (link_page && link_index >= 0) { 344 CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); 345 wsUrl = pageLink->GetURL(link_index); 346 } 347 ByteString cbUTF16URL = wsUrl.UTF16LE_Encode(); 348 int required = cbUTF16URL.GetLength() / sizeof(unsigned short); 349 if (!buffer || buflen <= 0) 350 return required; 351 352 int size = std::min(required, buflen); 353 if (size > 0) { 354 int buf_size = size * sizeof(unsigned short); 355 memcpy(buffer, cbUTF16URL.GetBuffer(buf_size), buf_size); 356 } 357 return size; 358 } 359 360 FPDF_EXPORT int FPDF_CALLCONV FPDFLink_CountRects(FPDF_PAGELINK link_page, 361 int link_index) { 362 if (!link_page || link_index < 0) 363 return 0; 364 365 CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); 366 return pdfium::CollectionSize<int>(pageLink->GetRects(link_index)); 367 } 368 369 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFLink_GetRect(FPDF_PAGELINK link_page, 370 int link_index, 371 int rect_index, 372 double* left, 373 double* top, 374 double* right, 375 double* bottom) { 376 if (!link_page || link_index < 0 || rect_index < 0) 377 return false; 378 379 CPDF_LinkExtract* pageLink = CPDFLinkExtractFromFPDFPageLink(link_page); 380 std::vector<CFX_FloatRect> rectArray = pageLink->GetRects(link_index); 381 if (rect_index >= pdfium::CollectionSize<int>(rectArray)) 382 return false; 383 384 *left = rectArray[rect_index].left; 385 *right = rectArray[rect_index].right; 386 *top = rectArray[rect_index].top; 387 *bottom = rectArray[rect_index].bottom; 388 return true; 389 } 390 391 FPDF_EXPORT void FPDF_CALLCONV FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page) { 392 delete CPDFLinkExtractFromFPDFPageLink(link_page); 393 } 394