1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #ifndef PUBLIC_FPDF_TEXT_H_ 8 #define PUBLIC_FPDF_TEXT_H_ 9 10 #include "fpdfview.h" 11 12 // Exported Functions 13 #ifdef __cplusplus 14 extern "C" { 15 #endif 16 17 // Function: FPDFText_LoadPage 18 // Prepare information about all characters in a page. 19 // Parameters: 20 // page - Handle to the page. Returned by FPDF_LoadPage function (in FPDFVIEW module). 21 // Return value: 22 // A handle to the text page information structure. 23 // NULL if something goes wrong. 24 // Comments: 25 // Application must call FPDFText_ClosePage to release the text page information. 26 // 27 DLLEXPORT FPDF_TEXTPAGE STDCALL FPDFText_LoadPage(FPDF_PAGE page); 28 29 // Function: FPDFText_ClosePage 30 // Release all resources allocated for a text page information structure. 31 // Parameters: 32 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 33 // Return Value: 34 // None. 35 // 36 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page); 37 38 // Function: FPDFText_CountChars 39 // Get number of characters in a page. 40 // Parameters: 41 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 42 // Return value: 43 // Number of characters in the page. Return -1 for error. 44 // Generated characters, like additional space characters, new line characters, are also counted. 45 // Comments: 46 // Characters in a page form a "stream", inside the stream, each character has an index. 47 // We will use the index parameters in many of FPDFTEXT functions. The first character in the page 48 // has an index value of zero. 49 // 50 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page); 51 52 // Function: FPDFText_GetUnicode 53 // Get Unicode of a character in a page. 54 // Parameters: 55 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 56 // index - Zero-based index of the character. 57 // Return value: 58 // The Unicode of the particular character. 59 // If a character is not encoded in Unicode and Foxit engine can't convert to Unicode, 60 // the return value will be zero. 61 // 62 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index); 63 64 // Function: FPDFText_GetFontSize 65 // Get the font size of a particular character. 66 // Parameters: 67 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 68 // index - Zero-based index of the character. 69 // Return value: 70 // The font size of the particular character, measured in points (about 1/72 inch). 71 // This is the typographic size of the font (so called "em size"). 72 // 73 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index); 74 75 // Function: FPDFText_GetCharBox 76 // Get bounding box of a particular character. 77 // Parameters: 78 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 79 // index - Zero-based index of the character. 80 // left - Pointer to a double number receiving left position of the character box. 81 // right - Pointer to a double number receiving right position of the character box. 82 // bottom - Pointer to a double number receiving bottom position of the character box. 83 // top - Pointer to a double number receiving top position of the character box. 84 // Return Value: 85 // None. 86 // Comments: 87 // All positions are measured in PDF "user space". 88 // 89 DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, int index, double* left, 90 double* right, double* bottom, double* top); 91 92 // Function: FPDFText_GetCharIndexAtPos 93 // Get the index of a character at or nearby a certain position on the page. 94 // Parameters: 95 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 96 // x - X position in PDF "user space". 97 // y - Y position in PDF "user space". 98 // xTolerance - An x-axis tolerance value for character hit detection, in point unit. 99 // yTolerance - A y-axis tolerance value for character hit detection, in point unit. 100 // Return Value: 101 // The zero-based index of the character at, or nearby the point (x,y). 102 // If there is no character at or nearby the point, return value will be -1. 103 // If an error occurs, -3 will be returned. 104 // 105 DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page, 106 double x, double y, double xTorelance, double yTolerance); 107 108 // Function: FPDFText_GetText 109 // Extract unicode text string from the page. 110 // Parameters: 111 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 112 // start_index - Index for the start characters. 113 // count - Number of characters to be extracted. 114 // result - A buffer (allocated by application) receiving the extracted unicodes. 115 // The size of the buffer must be able to hold the number of characters plus a terminator. 116 // Return Value: 117 // Number of characters written into the result buffer, including the trailing terminator. 118 // Comments: 119 // This function ignores characters without unicode information. 120 // 121 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, int start_index, int count, unsigned short* result); 122 123 // Function: FPDFText_CountRects 124 // Count number of rectangular areas occupied by a segment of texts. 125 // Parameters: 126 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 127 // start_index - Index for the start characters. 128 // count - Number of characters. 129 // Return value: 130 // Number of rectangles. Zero for error. 131 // Comments: 132 // This function, along with FPDFText_GetRect can be used by applications to detect the position 133 // on the page for a text segment, so proper areas can be highlighted or something. 134 // FPDFTEXT will automatically merge small character boxes into bigger one if those characters 135 // are on the same line and use same font settings. 136 // 137 DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page, int start_index, int count); 138 139 // Function: FPDFText_GetRect 140 // Get a rectangular area from the result generated by FPDFText_CountRects. 141 // Parameters: 142 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 143 // rect_index - Zero-based index for the rectangle. 144 // left - Pointer to a double value receiving the rectangle left boundary. 145 // top - Pointer to a double value receiving the rectangle top boundary. 146 // right - Pointer to a double value receiving the rectangle right boundary. 147 // bottom - Pointer to a double value receiving the rectangle bottom boundary. 148 // Return Value: 149 // None. 150 // 151 DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, int rect_index, double* left, double* top, 152 double* right, double* bottom); 153 154 // Function: FPDFText_GetBoundedText 155 // Extract unicode text within a rectangular boundary on the page. 156 // Parameters: 157 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 158 // left - Left boundary. 159 // top - Top boundary. 160 // right - Right boundary. 161 // bottom - Bottom boundary. 162 // buffer - A unicode buffer. 163 // buflen - Number of characters (not bytes) for the buffer, excluding an additional terminator. 164 // Return Value: 165 // If buffer is NULL or buflen is zero, return number of characters (not bytes) of text present within 166 // the rectangle, excluding a terminating NUL. Generally you should pass a buffer at least one larger 167 // than this if you want a terminating NUL, which will be provided if space is available. 168 // Otherwise, return number of characters copied into the buffer, including the terminating NUL 169 // when space for it is available. 170 // Comment: 171 // If the buffer is too small, as much text as will fit is copied into it. 172 // 173 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double left, double top, 174 double right, double bottom,unsigned short* buffer,int buflen); 175 176 177 // Flags used by FPDFText_FindStart function. 178 #define FPDF_MATCHCASE 0x00000001 //If not set, it will not match case by default. 179 #define FPDF_MATCHWHOLEWORD 0x00000002 //If not set, it will not match the whole word by default. 180 181 // Function: FPDFText_FindStart 182 // Start a search. 183 // Parameters: 184 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 185 // findwhat - A unicode match pattern. 186 // flags - Option flags. 187 // start_index - Start from this character. -1 for end of the page. 188 // Return Value: 189 // A handle for the search context. FPDFText_FindClose must be called to release this handle. 190 // 191 DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page, FPDF_WIDESTRING findwhat, 192 unsigned long flags, int start_index); 193 194 // Function: FPDFText_FindNext 195 // Search in the direction from page start to end. 196 // Parameters: 197 // handle - A search context handle returned by FPDFText_FindStart. 198 // Return Value: 199 // Whether a match is found. 200 // 201 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle); 202 203 // Function: FPDFText_FindPrev 204 // Search in the direction from page end to start. 205 // Parameters: 206 // handle - A search context handle returned by FPDFText_FindStart. 207 // Return Value: 208 // Whether a match is found. 209 // 210 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle); 211 212 // Function: FPDFText_GetSchResultIndex 213 // Get the starting character index of the search result. 214 // Parameters: 215 // handle - A search context handle returned by FPDFText_FindStart. 216 // Return Value: 217 // Index for the starting character. 218 // 219 DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle); 220 221 // Function: FPDFText_GetSchCount 222 // Get the number of matched characters in the search result. 223 // Parameters: 224 // handle - A search context handle returned by FPDFText_FindStart. 225 // Return Value: 226 // Number of matched characters. 227 // 228 DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle); 229 230 // Function: FPDFText_FindClose 231 // Release a search context. 232 // Parameters: 233 // handle - A search context handle returned by FPDFText_FindStart. 234 // Return Value: 235 // None. 236 // 237 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle); 238 239 // Function: FPDFLink_LoadWebLinks 240 // Prepare information about weblinks in a page. 241 // Parameters: 242 // text_page - Handle to a text page information structure. Returned by FPDFText_LoadPage function. 243 // Return Value: 244 // A handle to the page's links information structure. 245 // NULL if something goes wrong. 246 // Comments: 247 // Weblinks are those links implicitly embedded in PDF pages. PDF also has a type of 248 // annotation called "link", FPDFTEXT doesn't deal with that kind of link. 249 // FPDFTEXT weblink feature is useful for automatically detecting links in the page 250 // contents. For example, things like "http://www.foxitsoftware.com" will be detected, 251 // so applications can allow user to click on those characters to activate the link, 252 // even the PDF doesn't come with link annotations. 253 // 254 // FPDFLink_CloseWebLinks must be called to release resources. 255 // 256 DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page); 257 258 // Function: FPDFLink_CountWebLinks 259 // Count number of detected web links. 260 // Parameters: 261 // link_page - Handle returned by FPDFLink_LoadWebLinks. 262 // Return Value: 263 // Number of detected web links. 264 // 265 DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page); 266 267 // Function: FPDFLink_GetURL 268 // Fetch the URL information for a detected web link. 269 // Parameters: 270 // link_page - Handle returned by FPDFLink_LoadWebLinks. 271 // link_index - Zero-based index for the link. 272 // buffer - A unicode buffer. 273 // buflen - Number of characters (not bytes) for the buffer, including an additional terminator. 274 // Return Value: 275 // If buffer is NULL or buflen is zero, return number of characters (not bytes and an additional terminator is also counted) needed, 276 // otherwise, return number of characters copied into the buffer. 277 // 278 DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, int link_index, unsigned short* buffer,int buflen); 279 280 // Function: FPDFLink_CountRects 281 // Count number of rectangular areas for the link. 282 // Parameters: 283 // link_page - Handle returned by FPDFLink_LoadWebLinks. 284 // link_index - Zero-based index for the link. 285 // Return Value: 286 // Number of rectangular areas for the link. 287 // 288 DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, int link_index); 289 290 // Function: FPDFLink_GetRect 291 // Fetch the boundaries of a rectangle for a link. 292 // Parameters: 293 // link_page - Handle returned by FPDFLink_LoadWebLinks. 294 // link_index - Zero-based index for the link. 295 // rect_index - Zero-based index for a rectangle. 296 // left - Pointer to a double value receiving the rectangle left boundary. 297 // top - Pointer to a double value receiving the rectangle top boundary. 298 // right - Pointer to a double value receiving the rectangle right boundary. 299 // bottom - Pointer to a double value receiving the rectangle bottom boundary. 300 // Return Value: 301 // None. 302 // 303 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index, int rect_index, 304 double* left, double* top,double* right, double* bottom); 305 306 // Function: FPDFLink_CloseWebLinks 307 // Release resources used by weblink feature. 308 // Parameters: 309 // link_page - Handle returned by FPDFLink_LoadWebLinks. 310 // Return Value: 311 // None. 312 // 313 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page); 314 315 316 #ifdef __cplusplus 317 } 318 #endif 319 320 #endif // PUBLIC_FPDF_TEXT_H_ 321