Home | History | Annotate | Download | only in include
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #ifndef _FPDFTEXT_H_
      8 #define _FPDFTEXT_H_
      9 
     10 #include "fpdfview.h"
     11 
     12 // Exported Functions
     13 #ifdef __cplusplus
     14 extern "C" {
     15 #endif
     16 
     17 // Function: FPDFText_LoadPage
     18 //			Prepare information about all characters in a page.
     19 // Parameters:
     20 //			page	-	Handle to the page. Returned by FPDF_LoadPage function (in FPDFVIEW module).
     21 // Return value:
     22 //			A handle to the text page information structure.
     23 //			NULL if something goes wrong.
     24 // Comments:
     25 //			Application must call FPDFText_ClosePage to release the text page information.
     26 //			If you don't purchase Text Module , this function will return NULL.
     27 //
     28 DLLEXPORT FPDF_TEXTPAGE	STDCALL FPDFText_LoadPage(FPDF_PAGE page);
     29 
     30 // Function: FPDFText_ClosePage
     31 //			Release all resources allocated for a text page information structure.
     32 // Parameters:
     33 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
     34 // Return Value:
     35 //			None.
     36 //
     37 DLLEXPORT void STDCALL FPDFText_ClosePage(FPDF_TEXTPAGE text_page);
     38 
     39 // Function: FPDFText_CountChars
     40 //			Get number of characters in a page.
     41 // Parameters:
     42 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
     43 // Return value:
     44 //			Number of characters in the page. Return -1 for error.
     45 //			Generated characters, like additional space characters, new line characters, are also counted.
     46 // Comments:
     47 //			Characters in a page form a "stream", inside the stream, each character has an index.
     48 //			We will use the index parameters in many of FPDFTEXT functions. The first character in the page
     49 //			has an index value of zero.
     50 //
     51 DLLEXPORT int STDCALL FPDFText_CountChars(FPDF_TEXTPAGE text_page);
     52 
     53 // Function: FPDFText_GetUnicode
     54 //			Get Unicode of a character in a page.
     55 // Parameters:
     56 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
     57 //			index		-	Zero-based index of the character.
     58 // Return value:
     59 //			The Unicode of the particular character.
     60 //			If a character is not encoded in Unicode and Foxit engine can't convert to Unicode,
     61 //			the return value will be zero.
     62 //
     63 DLLEXPORT unsigned int STDCALL FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index);
     64 
     65 // Function: FPDFText_GetFontSize
     66 //			Get the font size of a particular character.
     67 // Parameters:
     68 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
     69 //			index		-	Zero-based index of the character.
     70 // Return value:
     71 //			The font size of the particular character, measured in points (about 1/72 inch).
     72 //			This is the typographic size of the font (so called "em size").
     73 //
     74 DLLEXPORT double STDCALL FPDFText_GetFontSize(FPDF_TEXTPAGE text_page, int index);
     75 
     76 // Function: FPDFText_GetCharBox
     77 //			Get bounding box of a particular character.
     78 // Parameters:
     79 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
     80 //			index		-	Zero-based index of the character.
     81 //			left		-	Pointer to a double number receiving left position of the character box.
     82 //			right		-	Pointer to a double number receiving right position of the character box.
     83 //			bottom		-	Pointer to a double number receiving bottom position of the character box.
     84 //			top			-	Pointer to a double number receiving top position of the character box.
     85 // Return Value:
     86 //			None.
     87 // Comments:
     88 //			All positions are measured in PDF "user space".
     89 //
     90 DLLEXPORT void STDCALL FPDFText_GetCharBox(FPDF_TEXTPAGE text_page, int index, double* left,
     91 													double* right, double* bottom, double* top);
     92 
     93 // Function: FPDFText_GetCharIndexAtPos
     94 //			Get the index of a character at or nearby a certain position on the page.
     95 // Parameters:
     96 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
     97 //			x			-	X position in PDF "user space".
     98 //			y			-	Y position in PDF "user space".
     99 //			xTolerance	-	An x-axis tolerance value for character hit detection, in point unit.
    100 //			yTolerance	-	A y-axis tolerance value for character hit detection, in point unit.
    101 // Return Value:
    102 //			The zero-based index of the character at, or nearby the point (x,y).
    103 //			If there is no character at or nearby the point, return value will be -1.
    104 //			If an error occurs, -3 will be returned.
    105 //
    106 DLLEXPORT int STDCALL FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
    107 												 double x, double y, double xTorelance, double yTolerance);
    108 
    109 // Function: FPDFText_GetText
    110 //			Extract unicode text string from the page.
    111 // Parameters:
    112 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
    113 //			start_index	-	Index for the start characters.
    114 //			count		-	Number of characters to be extracted.
    115 //			result		-	A buffer (allocated by application) receiving the extracted unicodes.
    116 //							The size of the buffer must be able to hold the number of characters plus a terminator.
    117 // Return Value:
    118 //			Number of characters written into the result buffer, including the trailing terminator.
    119 // Comments:
    120 //			This function ignores characters without unicode information.
    121 //
    122 DLLEXPORT int STDCALL FPDFText_GetText(FPDF_TEXTPAGE text_page, int start_index, int count, unsigned short* result);
    123 
    124 // Function: FPDFText_CountRects
    125 //			Count number of rectangular areas occupied by a segment of texts.
    126 // Parameters:
    127 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
    128 //			start_index	-	Index for the start characters.
    129 //			count		-	Number of characters.
    130 // Return value:
    131 //			Number of rectangles. Zero for error.
    132 // Comments:
    133 //			This function, along with FPDFText_GetRect can be used by applications to detect the position
    134 //			on the page for a text segment, so proper areas can be highlighted or something.
    135 //			FPDFTEXT will automatically merge small character boxes into bigger one if those characters
    136 //			are on the same line and use same font settings.
    137 //
    138 DLLEXPORT int STDCALL FPDFText_CountRects(FPDF_TEXTPAGE text_page, int start_index, int count);
    139 
    140 // Function: FPDFText_GetRect
    141 //			Get a rectangular area from the result generated by FPDFText_CountRects.
    142 // Parameters:
    143 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
    144 //			rect_index	-	Zero-based index for the rectangle.
    145 //			left		-	Pointer to a double value receiving the rectangle left boundary.
    146 //			top			-	Pointer to a double value receiving the rectangle top boundary.
    147 //			right		-	Pointer to a double value receiving the rectangle right boundary.
    148 //			bottom		-	Pointer to a double value receiving the rectangle bottom boundary.
    149 // Return Value:
    150 //			None.
    151 //
    152 DLLEXPORT void STDCALL FPDFText_GetRect(FPDF_TEXTPAGE text_page, int rect_index, double* left, double* top,
    153 											double* right, double* bottom);
    154 
    155 // Function: FPDFText_GetBoundedText
    156 //			Extract unicode text within a rectangular boundary on the page.
    157 // Parameters:
    158 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
    159 //			left		-	Left boundary.
    160 //			top			-	Top boundary.
    161 //			right		-	Right boundary.
    162 //			bottom		-	Bottom boundary.
    163 //			buffer		-	A unicode buffer.
    164 //			buflen		-	Number of characters (not bytes) for the buffer, excluding an additional terminator.
    165 // Return Value:
    166 //			If buffer is NULL or buflen is zero, return number of characters (not bytes) needed,
    167 //			otherwise, return number of characters copied into the buffer.
    168 //
    169 DLLEXPORT int STDCALL FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,double left, double top,
    170 											  double right, double bottom,unsigned short* buffer,int buflen);
    171 
    172 
    173 // Flags used by FPDFText_FindStart function.
    174 #define FPDF_MATCHCASE      0x00000001		//If not set, it will not match case by default.
    175 #define FPDF_MATCHWHOLEWORD 0x00000002		//If not set, it will not match the whole word by default.
    176 
    177 // Function: FPDFText_FindStart
    178 //			Start a search.
    179 // Parameters:
    180 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
    181 //			findwhat	-	A unicode match pattern.
    182 //			flags		-	Option flags.
    183 //			start_index	-	Start from this character. -1 for end of the page.
    184 // Return Value:
    185 //			A handle for the search context. FPDFText_FindClose must be called to release this handle.
    186 //
    187 DLLEXPORT FPDF_SCHHANDLE STDCALL FPDFText_FindStart(FPDF_TEXTPAGE text_page, FPDF_WIDESTRING findwhat,
    188 													unsigned long flags, int start_index);
    189 
    190 // Function: FPDFText_FindNext
    191 //			Search in the direction from page start to end.
    192 // Parameters:
    193 //			handle		-	A search context handle returned by FPDFText_FindStart.
    194 // Return Value:
    195 //			Whether a match is found.
    196 //
    197 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindNext(FPDF_SCHHANDLE handle);
    198 
    199 // Function: FPDFText_FindPrev
    200 //			Search in the direction from page end to start.
    201 // Parameters:
    202 //			handle		-	A search context handle returned by FPDFText_FindStart.
    203 // Return Value:
    204 //			Whether a match is found.
    205 //
    206 DLLEXPORT FPDF_BOOL STDCALL FPDFText_FindPrev(FPDF_SCHHANDLE handle);
    207 
    208 // Function: FPDFText_GetSchResultIndex
    209 //			Get the starting character index of the search result.
    210 // Parameters:
    211 //			handle		-	A search context handle returned by FPDFText_FindStart.
    212 // Return Value:
    213 //			Index for the starting character.
    214 //
    215 DLLEXPORT int STDCALL FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle);
    216 
    217 // Function: FPDFText_GetSchCount
    218 //			Get the number of matched characters in the search result.
    219 // Parameters:
    220 //			handle		-	A search context handle returned by FPDFText_FindStart.
    221 // Return Value:
    222 //			Number of matched characters.
    223 //
    224 DLLEXPORT int STDCALL FPDFText_GetSchCount(FPDF_SCHHANDLE handle);
    225 
    226 // Function: FPDFText_FindClose
    227 //			Release a search context.
    228 // Parameters:
    229 //			handle		-	A search context handle returned by FPDFText_FindStart.
    230 // Return Value:
    231 //			None.
    232 //
    233 DLLEXPORT void STDCALL FPDFText_FindClose(FPDF_SCHHANDLE handle);
    234 
    235 // Function: FPDFLink_LoadWebLinks
    236 //			Prepare information about weblinks in a page.
    237 // Parameters:
    238 //			text_page	-	Handle to a text page information structure. Returned by FPDFText_LoadPage function.
    239 // Return Value:
    240 //			A handle to the page's links information structure.
    241 //			NULL if something goes wrong.
    242 // Comments:
    243 //			Weblinks are those links implicitly embedded in PDF pages. PDF also has a type of
    244 //			annotation called "link", FPDFTEXT doesn't deal with that kind of link.
    245 //			FPDFTEXT weblink feature is useful for automatically detecting links in the page
    246 //			contents. For example, things like "http://www.foxitsoftware.com" will be detected,
    247 //			so applications can allow user to click on those characters to activate the link,
    248 //			even the PDF doesn't come with link annotations.
    249 //
    250 //			FPDFLink_CloseWebLinks must be called to release resources.
    251 //
    252 DLLEXPORT FPDF_PAGELINK STDCALL FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page);
    253 
    254 // Function: FPDFLink_CountWebLinks
    255 //			Count number of detected web links.
    256 // Parameters:
    257 //			link_page	-	Handle returned by FPDFLink_LoadWebLinks.
    258 // Return Value:
    259 //			Number of detected web links.
    260 //
    261 DLLEXPORT int STDCALL FPDFLink_CountWebLinks(FPDF_PAGELINK link_page);
    262 
    263 // Function: FPDFLink_GetURL
    264 //			Fetch the URL information for a detected web link.
    265 // Parameters:
    266 //			link_page	-	Handle returned by FPDFLink_LoadWebLinks.
    267 //			link_index	-	Zero-based index for the link.
    268 //			buffer		-	A unicode buffer.
    269 //			buflen		-	Number of characters (not bytes) for the buffer, including an additional terminator.
    270 // Return Value:
    271 //			If buffer is NULL or buflen is zero, return number of characters (not bytes and an additional terminator is also counted) needed,
    272 //			otherwise, return number of characters copied into the buffer.
    273 //
    274 DLLEXPORT int STDCALL FPDFLink_GetURL(FPDF_PAGELINK link_page, int link_index, unsigned short* buffer,int buflen);
    275 
    276 // Function: FPDFLink_CountRects
    277 //			Count number of rectangular areas for the link.
    278 // Parameters:
    279 //			link_page	-	Handle returned by FPDFLink_LoadWebLinks.
    280 //			link_index	-	Zero-based index for the link.
    281 // Return Value:
    282 //			Number of rectangular areas for the link.
    283 //
    284 DLLEXPORT int STDCALL FPDFLink_CountRects(FPDF_PAGELINK link_page, int link_index);
    285 
    286 // Function: FPDFLink_GetRect
    287 //			Fetch the boundaries of a rectangle for a link.
    288 // Parameters:
    289 //			link_page	-	Handle returned by FPDFLink_LoadWebLinks.
    290 //			link_index	-	Zero-based index for the link.
    291 //			rect_index	-	Zero-based index for a rectangle.
    292 //			left		-	Pointer to a double value receiving the rectangle left boundary.
    293 //			top			-	Pointer to a double value receiving the rectangle top boundary.
    294 //			right		-	Pointer to a double value receiving the rectangle right boundary.
    295 //			bottom		-	Pointer to a double value receiving the rectangle bottom boundary.
    296 // Return Value:
    297 //			None.
    298 //
    299 DLLEXPORT void STDCALL FPDFLink_GetRect(FPDF_PAGELINK link_page, int link_index, int rect_index,
    300 										double* left, double* top,double* right, double* bottom);
    301 
    302 // Function: FPDFLink_CloseWebLinks
    303 //			Release resources used by weblink feature.
    304 // Parameters:
    305 //			link_page	-	Handle returned by FPDFLink_LoadWebLinks.
    306 // Return Value:
    307 //			None.
    308 //
    309 DLLEXPORT void STDCALL FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page);
    310 
    311 
    312 #ifdef __cplusplus
    313 };
    314 #endif
    315 
    316 #endif//_FPDFTEXT_H_
    317