Home | History | Annotate | Download | only in public
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #ifndef PUBLIC_FPDF_TEXT_H_
      8 #define PUBLIC_FPDF_TEXT_H_
      9 
     10 // NOLINTNEXTLINE(build/include)
     11 #include "fpdfview.h"
     12 
     13 // Exported Functions
     14 #ifdef __cplusplus
     15 extern "C" {
     16 #endif
     17 
     18 // Function: FPDFText_LoadPage
     19 //          Prepare information about all characters in a page.
     20 // Parameters:
     21 //          page    -   Handle to the page. Returned by FPDF_LoadPage function
     22 //          (in FPDFVIEW module).
     23 // Return value:
     24 //          A handle to the text page information structure.
     25 //          NULL if something goes wrong.
     26 // Comments:
     27 //          Application must call FPDFText_ClosePage to release the text page
     28 //          information.
     29 //
     30 FPDF_EXPORT FPDF_TEXTPAGE FPDF_CALLCONV FPDFText_LoadPage(FPDF_PAGE page);
     31 
     32 // Function: FPDFText_ClosePage
     33 //          Release all resources allocated for a text page information
     34 //          structure.
     35 // Parameters:
     36 //          text_page   -   Handle to a text page information structure.
     37 //          Returned by FPDFText_LoadPage function.
     38 // Return Value:
     39 //          None.
     40 //
     41 FPDF_EXPORT void FPDF_CALLCONV FPDFText_ClosePage(FPDF_TEXTPAGE text_page);
     42 
     43 // Function: FPDFText_CountChars
     44 //          Get number of characters in a page.
     45 // Parameters:
     46 //          text_page   -   Handle to a text page information structure.
     47 //          Returned by FPDFText_LoadPage function.
     48 // Return value:
     49 //          Number of characters in the page. Return -1 for error.
     50 //          Generated characters, like additional space characters, new line
     51 //          characters, are also counted.
     52 // Comments:
     53 //          Characters in a page form a "stream", inside the stream, each
     54 //          character has an index.
     55 //          We will use the index parameters in many of FPDFTEXT functions. The
     56 //          first character in the page
     57 //          has an index value of zero.
     58 //
     59 FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountChars(FPDF_TEXTPAGE text_page);
     60 
     61 // Function: FPDFText_GetUnicode
     62 //          Get Unicode of a character in a page.
     63 // Parameters:
     64 //          text_page   -   Handle to a text page information structure.
     65 //          Returned by FPDFText_LoadPage function.
     66 //          index       -   Zero-based index of the character.
     67 // Return value:
     68 //          The Unicode of the particular character.
     69 //          If a character is not encoded in Unicode and Foxit engine can't
     70 //          convert to Unicode,
     71 //          the return value will be zero.
     72 //
     73 FPDF_EXPORT unsigned int FPDF_CALLCONV
     74 FPDFText_GetUnicode(FPDF_TEXTPAGE text_page, int index);
     75 
     76 // Function: FPDFText_GetFontSize
     77 //          Get the font size of a particular character.
     78 // Parameters:
     79 //          text_page   -   Handle to a text page information structure.
     80 //          Returned by FPDFText_LoadPage function.
     81 //          index       -   Zero-based index of the character.
     82 // Return value:
     83 //          The font size of the particular character, measured in points (about
     84 //          1/72 inch).
     85 //          This is the typographic size of the font (so called "em size").
     86 //
     87 FPDF_EXPORT double FPDF_CALLCONV FPDFText_GetFontSize(FPDF_TEXTPAGE text_page,
     88                                                       int index);
     89 
     90 // Function: FPDFText_GetCharBox
     91 //          Get bounding box of a particular character.
     92 // Parameters:
     93 //          text_page   -   Handle to a text page information structure.
     94 //          Returned by FPDFText_LoadPage function.
     95 //          index       -   Zero-based index of the character.
     96 //          left        -   Pointer to a double number receiving left position
     97 //          of the character box.
     98 //          right       -   Pointer to a double number receiving right position
     99 //          of the character box.
    100 //          bottom      -   Pointer to a double number receiving bottom position
    101 //          of the character box.
    102 //          top         -   Pointer to a double number receiving top position of
    103 //          the character box.
    104 // Return Value:
    105 //          On success, return TRUE and fill in |left|, |right|, |bottom|, and
    106 //          |top|. If |text_page| is invalid, or if |index| is out of bounds,
    107 //          then return FALSE, and the out parameters remain unmodified.
    108 // Comments:
    109 //          All positions are measured in PDF "user space".
    110 //
    111 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetCharBox(FPDF_TEXTPAGE text_page,
    112                                                         int index,
    113                                                         double* left,
    114                                                         double* right,
    115                                                         double* bottom,
    116                                                         double* top);
    117 
    118 // Function: FPDFText_GetCharOrigin
    119 //          Get origin of a particular character.
    120 // Parameters:
    121 //          text_page   -   Handle to a text page information structure.
    122 //          Returned by FPDFText_LoadPage function.
    123 //          index       -   Zero-based index of the character.
    124 //          x           -   Pointer to a double number receiving x coordinate of
    125 //          the character origin.
    126 //          y           -   Pointer to a double number receiving y coordinate of
    127 //          the character origin.
    128 // Return Value:
    129 //          Whether the call succeeded. If false, x and y are unchanged.
    130 // Comments:
    131 //          All positions are measured in PDF "user space".
    132 //
    133 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
    134 FPDFText_GetCharOrigin(FPDF_TEXTPAGE text_page,
    135                        int index,
    136                        double* x,
    137                        double* y);
    138 
    139 // Function: FPDFText_GetCharIndexAtPos
    140 //          Get the index of a character at or nearby a certain position on the
    141 //          page.
    142 // Parameters:
    143 //          text_page   -   Handle to a text page information structure.
    144 //          Returned by FPDFText_LoadPage function.
    145 //          x           -   X position in PDF "user space".
    146 //          y           -   Y position in PDF "user space".
    147 //          xTolerance  -   An x-axis tolerance value for character hit
    148 //          detection, in point unit.
    149 //          yTolerance  -   A y-axis tolerance value for character hit
    150 //          detection, in point unit.
    151 // Return Value:
    152 //          The zero-based index of the character at, or nearby the point (x,y).
    153 //          If there is no character at or nearby the point, return value will
    154 //          be -1.
    155 //          If an error occurs, -3 will be returned.
    156 //
    157 FPDF_EXPORT int FPDF_CALLCONV
    158 FPDFText_GetCharIndexAtPos(FPDF_TEXTPAGE text_page,
    159                            double x,
    160                            double y,
    161                            double xTolerance,
    162                            double yTolerance);
    163 
    164 // Function: FPDFText_GetText
    165 //          Extract unicode text string from the page.
    166 // Parameters:
    167 //          text_page   -   Handle to a text page information structure.
    168 //          Returned by FPDFText_LoadPage function.
    169 //          start_index -   Index for the start characters.
    170 //          count       -   Number of characters to be extracted.
    171 //          result      -   A buffer (allocated by application) receiving the
    172 //          extracted unicodes.
    173 //                          The size of the buffer must be able to hold the
    174 //                          number of characters plus a terminator.
    175 // Return Value:
    176 //          Number of characters written into the result buffer, including the
    177 //          trailing terminator.
    178 // Comments:
    179 //          This function ignores characters without unicode information.
    180 //
    181 FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetText(FPDF_TEXTPAGE text_page,
    182                                                int start_index,
    183                                                int count,
    184                                                unsigned short* result);
    185 
    186 // Function: FPDFText_CountRects
    187 //          Count number of rectangular areas occupied by a segment of texts.
    188 // Parameters:
    189 //          text_page   -   Handle to a text page information structure.
    190 //          Returned by FPDFText_LoadPage function.
    191 //          start_index -   Index for the start characters.
    192 //          count       -   Number of characters.
    193 // Return value:
    194 //          Number of rectangles. Zero for error.
    195 // Comments:
    196 //          This function, along with FPDFText_GetRect can be used by
    197 //          applications to detect the position
    198 //          on the page for a text segment, so proper areas can be highlighted
    199 //          or something.
    200 //          FPDFTEXT will automatically merge small character boxes into bigger
    201 //          one if those characters
    202 //          are on the same line and use same font settings.
    203 //
    204 FPDF_EXPORT int FPDF_CALLCONV FPDFText_CountRects(FPDF_TEXTPAGE text_page,
    205                                                   int start_index,
    206                                                   int count);
    207 
    208 // Function: FPDFText_GetRect
    209 //          Get a rectangular area from the result generated by
    210 //          FPDFText_CountRects.
    211 // Parameters:
    212 //          text_page   -   Handle to a text page information structure.
    213 //          Returned by FPDFText_LoadPage function.
    214 //          rect_index  -   Zero-based index for the rectangle.
    215 //          left        -   Pointer to a double value receiving the rectangle
    216 //          left boundary.
    217 //          top         -   Pointer to a double value receiving the rectangle
    218 //          top boundary.
    219 //          right       -   Pointer to a double value receiving the rectangle
    220 //          right boundary.
    221 //          bottom      -   Pointer to a double value receiving the rectangle
    222 //          bottom boundary.
    223 // Return Value:
    224 //          On success, return TRUE and fill in |left|, |top|, |right|, and
    225 //          |bottom|. If |link_page| is invalid then return FALSE, and the out
    226 //          parameters remain unmodified. If |link_page| is valid but
    227 //          |link_index| is out of bounds, then return FALSE and set the out
    228 //          parameters to 0.
    229 //
    230 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_GetRect(FPDF_TEXTPAGE text_page,
    231                                                      int rect_index,
    232                                                      double* left,
    233                                                      double* top,
    234                                                      double* right,
    235                                                      double* bottom);
    236 
    237 // Function: FPDFText_GetBoundedText
    238 //          Extract unicode text within a rectangular boundary on the page.
    239 // Parameters:
    240 //          text_page   -   Handle to a text page information structure.
    241 //          Returned by FPDFText_LoadPage function.
    242 //          left        -   Left boundary.
    243 //          top         -   Top boundary.
    244 //          right       -   Right boundary.
    245 //          bottom      -   Bottom boundary.
    246 //          buffer      -   A unicode buffer.
    247 //          buflen      -   Number of characters (not bytes) for the buffer,
    248 //          excluding an additional terminator.
    249 // Return Value:
    250 //          If buffer is NULL or buflen is zero, return number of characters
    251 //          (not bytes) of text present within
    252 //          the rectangle, excluding a terminating NUL.  Generally you should
    253 //          pass a buffer at least one larger
    254 //          than this if you want a terminating NUL, which will be provided if
    255 //          space is available.
    256 //          Otherwise, return number of characters copied into the buffer,
    257 //          including the terminating NUL
    258 //          when space for it is available.
    259 // Comment:
    260 //          If the buffer is too small, as much text as will fit is copied into
    261 //          it.
    262 //
    263 FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetBoundedText(FPDF_TEXTPAGE text_page,
    264                                                       double left,
    265                                                       double top,
    266                                                       double right,
    267                                                       double bottom,
    268                                                       unsigned short* buffer,
    269                                                       int buflen);
    270 
    271 // Flags used by FPDFText_FindStart function.
    272 #define FPDF_MATCHCASE \
    273   0x00000001  // If not set, it will not match case by default.
    274 #define FPDF_MATCHWHOLEWORD \
    275   0x00000002  // If not set, it will not match the whole word by default.
    276 
    277 // Function: FPDFText_FindStart
    278 //          Start a search.
    279 // Parameters:
    280 //          text_page   -   Handle to a text page information structure.
    281 //          Returned by FPDFText_LoadPage function.
    282 //          findwhat    -   A unicode match pattern.
    283 //          flags       -   Option flags.
    284 //          start_index -   Start from this character. -1 for end of the page.
    285 // Return Value:
    286 //          A handle for the search context. FPDFText_FindClose must be called
    287 //          to release this handle.
    288 //
    289 FPDF_EXPORT FPDF_SCHHANDLE FPDF_CALLCONV
    290 FPDFText_FindStart(FPDF_TEXTPAGE text_page,
    291                    FPDF_WIDESTRING findwhat,
    292                    unsigned long flags,
    293                    int start_index);
    294 
    295 // Function: FPDFText_FindNext
    296 //          Search in the direction from page start to end.
    297 // Parameters:
    298 //          handle      -   A search context handle returned by
    299 //          FPDFText_FindStart.
    300 // Return Value:
    301 //          Whether a match is found.
    302 //
    303 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_FindNext(FPDF_SCHHANDLE handle);
    304 
    305 // Function: FPDFText_FindPrev
    306 //          Search in the direction from page end to start.
    307 // Parameters:
    308 //          handle      -   A search context handle returned by
    309 //          FPDFText_FindStart.
    310 // Return Value:
    311 //          Whether a match is found.
    312 //
    313 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFText_FindPrev(FPDF_SCHHANDLE handle);
    314 
    315 // Function: FPDFText_GetSchResultIndex
    316 //          Get the starting character index of the search result.
    317 // Parameters:
    318 //          handle      -   A search context handle returned by
    319 //          FPDFText_FindStart.
    320 // Return Value:
    321 //          Index for the starting character.
    322 //
    323 FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetSchResultIndex(FPDF_SCHHANDLE handle);
    324 
    325 // Function: FPDFText_GetSchCount
    326 //          Get the number of matched characters in the search result.
    327 // Parameters:
    328 //          handle      -   A search context handle returned by
    329 //          FPDFText_FindStart.
    330 // Return Value:
    331 //          Number of matched characters.
    332 //
    333 FPDF_EXPORT int FPDF_CALLCONV FPDFText_GetSchCount(FPDF_SCHHANDLE handle);
    334 
    335 // Function: FPDFText_FindClose
    336 //          Release a search context.
    337 // Parameters:
    338 //          handle      -   A search context handle returned by
    339 //          FPDFText_FindStart.
    340 // Return Value:
    341 //          None.
    342 //
    343 FPDF_EXPORT void FPDF_CALLCONV FPDFText_FindClose(FPDF_SCHHANDLE handle);
    344 
    345 // Function: FPDFLink_LoadWebLinks
    346 //          Prepare information about weblinks in a page.
    347 // Parameters:
    348 //          text_page   -   Handle to a text page information structure.
    349 //          Returned by FPDFText_LoadPage function.
    350 // Return Value:
    351 //          A handle to the page's links information structure.
    352 //          NULL if something goes wrong.
    353 // Comments:
    354 //          Weblinks are those links implicitly embedded in PDF pages. PDF also
    355 //          has a type of
    356 //          annotation called "link", FPDFTEXT doesn't deal with that kind of
    357 //          link.
    358 //          FPDFTEXT weblink feature is useful for automatically detecting links
    359 //          in the page
    360 //          contents. For example, things like "http://www.foxitsoftware.com"
    361 //          will be detected,
    362 //          so applications can allow user to click on those characters to
    363 //          activate the link,
    364 //          even the PDF doesn't come with link annotations.
    365 //
    366 //          FPDFLink_CloseWebLinks must be called to release resources.
    367 //
    368 FPDF_EXPORT FPDF_PAGELINK FPDF_CALLCONV
    369 FPDFLink_LoadWebLinks(FPDF_TEXTPAGE text_page);
    370 
    371 // Function: FPDFLink_CountWebLinks
    372 //          Count number of detected web links.
    373 // Parameters:
    374 //          link_page   -   Handle returned by FPDFLink_LoadWebLinks.
    375 // Return Value:
    376 //          Number of detected web links.
    377 //
    378 FPDF_EXPORT int FPDF_CALLCONV FPDFLink_CountWebLinks(FPDF_PAGELINK link_page);
    379 
    380 // Function: FPDFLink_GetURL
    381 //          Fetch the URL information for a detected web link.
    382 // Parameters:
    383 //          link_page   -   Handle returned by FPDFLink_LoadWebLinks.
    384 //          link_index  -   Zero-based index for the link.
    385 //          buffer      -   A unicode buffer for the result.
    386 //          buflen      -   Number of characters (not bytes) for the buffer,
    387 //                          including an additional terminator.
    388 // Return Value:
    389 //          If |buffer| is NULL or |buflen| is zero, return the number of
    390 //          characters (not bytes) needed to buffer the result (an additional
    391 //          terminator is included in this count).
    392 //          Otherwise, copy the result into |buffer|, truncating at |buflen| if
    393 //          the result is too large to fit, and return the number of characters
    394 //          actually copied into the buffer (the additional terminator is also
    395 //          included in this count).
    396 //          If |link_index| does not correspond to a valid link, then the result
    397 //          is an empty string.
    398 //
    399 FPDF_EXPORT int FPDF_CALLCONV FPDFLink_GetURL(FPDF_PAGELINK link_page,
    400                                               int link_index,
    401                                               unsigned short* buffer,
    402                                               int buflen);
    403 
    404 // Function: FPDFLink_CountRects
    405 //          Count number of rectangular areas for the link.
    406 // Parameters:
    407 //          link_page   -   Handle returned by FPDFLink_LoadWebLinks.
    408 //          link_index  -   Zero-based index for the link.
    409 // Return Value:
    410 //          Number of rectangular areas for the link.  If |link_index| does
    411 //          not correspond to a valid link, then 0 is returned.
    412 //
    413 FPDF_EXPORT int FPDF_CALLCONV FPDFLink_CountRects(FPDF_PAGELINK link_page,
    414                                                   int link_index);
    415 
    416 // Function: FPDFLink_GetRect
    417 //          Fetch the boundaries of a rectangle for a link.
    418 // Parameters:
    419 //          link_page   -   Handle returned by FPDFLink_LoadWebLinks.
    420 //          link_index  -   Zero-based index for the link.
    421 //          rect_index  -   Zero-based index for a rectangle.
    422 //          left        -   Pointer to a double value receiving the rectangle
    423 //                          left boundary.
    424 //          top         -   Pointer to a double value receiving the rectangle
    425 //                          top boundary.
    426 //          right       -   Pointer to a double value receiving the rectangle
    427 //                          right boundary.
    428 //          bottom      -   Pointer to a double value receiving the rectangle
    429 //                          bottom boundary.
    430 // Return Value:
    431 //          On success, return TRUE and fill in |left|, |top|, |right|, and
    432 //          |bottom|. If |link_page| is invalid or if |link_index| does not
    433 //          correspond to a valid link, then return FALSE, and the out
    434 //          parameters remain unmodified.
    435 //
    436 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDFLink_GetRect(FPDF_PAGELINK link_page,
    437                                                      int link_index,
    438                                                      int rect_index,
    439                                                      double* left,
    440                                                      double* top,
    441                                                      double* right,
    442                                                      double* bottom);
    443 
    444 // Function: FPDFLink_CloseWebLinks
    445 //          Release resources used by weblink feature.
    446 // Parameters:
    447 //          link_page   -   Handle returned by FPDFLink_LoadWebLinks.
    448 // Return Value:
    449 //          None.
    450 //
    451 FPDF_EXPORT void FPDF_CALLCONV FPDFLink_CloseWebLinks(FPDF_PAGELINK link_page);
    452 
    453 #ifdef __cplusplus
    454 }
    455 #endif
    456 
    457 #endif  // PUBLIC_FPDF_TEXT_H_
    458