Home | History | Annotate | Download | only in fpdftext
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #ifndef CORE_INCLUDE_FPDFTEXT_FPDF_TEXT_H_
      8 #define CORE_INCLUDE_FPDFTEXT_FPDF_TEXT_H_
      9 
     10 #include "../fpdfapi/fpdf_page.h"
     11 #include "../fpdfapi/fpdf_pageobj.h"
     12 #include "../fpdfapi/fpdf_parser.h"
     13 
     14 class CPDF_PageObjects;
     15 class IPDF_LinkExtract;
     16 class IPDF_ReflowedPage;
     17 class IPDF_TextPage;
     18 class IPDF_TextPageFind;
     19 
     20 #define PDF2TXT_AUTO_ROTATE		1
     21 #define PDF2TXT_AUTO_WIDTH		2
     22 #define PDF2TXT_KEEP_COLUMN		4
     23 #define PDF2TXT_USE_OCR			8
     24 #define PDF2TXT_INCLUDE_INVISIBLE	16
     25 void PDF_GetPageText(CFX_ByteStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
     26                      int iMinWidth, FX_DWORD flags);
     27 void PDF_GetPageText_Unicode(CFX_WideStringArray& lines, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
     28                              int iMinWidth, FX_DWORD flags);
     29 void PDF_GetTextStream_Unicode(CFX_WideTextBuf& buffer, CPDF_Document* pDoc, CPDF_Dictionary* pPage,
     30                                FX_DWORD flags);
     31 CFX_WideString PDF_GetFirstTextLine_Unicode(CPDF_Document* pDoc, CPDF_Dictionary* pPage);
     32 #define CHAR_ERROR			-1
     33 #define CHAR_NORMAL			0
     34 #define CHAR_GENERATED		1
     35 #define CHAR_UNUNICODE		2
     36 typedef struct {
     37     FX_WCHAR			m_Unicode;
     38     FX_WCHAR			m_Charcode;
     39     FX_INT32			m_Flag;
     40     FX_FLOAT			m_FontSize;
     41     FX_FLOAT			m_OriginX;
     42     FX_FLOAT			m_OriginY;
     43     CFX_FloatRect		m_CharBox;
     44     CPDF_TextObject*	m_pTextObj;
     45     CFX_AffineMatrix	m_Matrix;
     46 } FPDF_CHAR_INFO;
     47 typedef	CFX_ArrayTemplate<CFX_FloatRect> CFX_RectArray;
     48 #define FPDFTEXT_LRTB	0
     49 #define FPDFTEXT_RLTB	1
     50 #define FPDFTEXT_TBRL	2
     51 #define FPDFTEXT_LEFT			-1
     52 #define FPDFTEXT_RIGHT			1
     53 #define FPDFTEXT_UP				-2
     54 #define FPDFTEXT_DOWN			2
     55 #define FPDFTEXT_WRITINGMODE_UNKNOW	0
     56 #define FPDFTEXT_WRITINGMODE_LRTB	1
     57 #define FPDFTEXT_WRITINGMODE_RLTB	2
     58 #define FPDFTEXT_WRITINGMODE_TBRL	3
     59 class CPDFText_ParseOptions
     60 {
     61 public:
     62 
     63     CPDFText_ParseOptions();
     64     FX_BOOL			m_bGetCharCodeOnly;
     65     FX_BOOL			m_bNormalizeObjs;
     66     FX_BOOL			m_bOutputHyphen;
     67 };
     68 class IPDF_TextPage
     69 {
     70 public:
     71 
     72     virtual ~IPDF_TextPage() {}
     73     static IPDF_TextPage*	CreateTextPage(const CPDF_Page* pPage, CPDFText_ParseOptions ParserOptions);
     74     static IPDF_TextPage*	CreateTextPage(const CPDF_Page* pPage, int flags = 0);
     75     static IPDF_TextPage*	CreateTextPage(const CPDF_PageObjects* pObjs, int flags = 0);
     76     static IPDF_TextPage*	CreateReflowTextPage(IPDF_ReflowedPage* pRefPage);
     77 
     78     virtual void			NormalizeObjects(FX_BOOL bNormalize) = 0;
     79 
     80     virtual FX_BOOL			ParseTextPage() = 0;
     81 
     82 
     83     virtual FX_BOOL			IsParsered() const = 0;
     84 public:
     85 
     86     virtual int CharIndexFromTextIndex(int TextIndex) const = 0;
     87 
     88     virtual int TextIndexFromCharIndex(int CharIndex) const = 0;
     89 
     90 
     91     virtual int				CountChars() const = 0;
     92 
     93     virtual	void			GetCharInfo(int index, FPDF_CHAR_INFO & info) const = 0;
     94 
     95     virtual void			GetRectArray(int start, int nCount, CFX_RectArray& rectArray) const = 0;
     96 
     97 
     98 
     99     virtual int				GetIndexAtPos(CPDF_Point point, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const = 0;
    100 
    101     virtual int				GetIndexAtPos(FX_FLOAT x, FX_FLOAT y, FX_FLOAT xTorelance, FX_FLOAT yTorelance) const = 0;
    102 
    103     virtual	int				GetOrderByDirection(int index, int direction) const = 0;
    104 
    105     virtual CFX_WideString	GetTextByRect(const CFX_FloatRect& rect) const = 0;
    106 
    107     virtual void			GetRectsArrayByRect(const CFX_FloatRect& rect, CFX_RectArray& resRectArray) const = 0;
    108 
    109 
    110     virtual int				CountRects(int start, int nCount) = 0;
    111 
    112     virtual	void			GetRect(int rectIndex, FX_FLOAT& left, FX_FLOAT& top, FX_FLOAT& right, FX_FLOAT &bottom) const = 0;
    113 
    114     virtual FX_BOOL			GetBaselineRotate(int rectIndex, int& Rotate) = 0;
    115 
    116     virtual FX_BOOL			GetBaselineRotate(const CFX_FloatRect& rect, int& Rotate) = 0;
    117 
    118     virtual	int				CountBoundedSegments(FX_FLOAT left, FX_FLOAT top, FX_FLOAT right, FX_FLOAT bottom, FX_BOOL bContains = FALSE) = 0;
    119 
    120     virtual	void			GetBoundedSegment(int index, int& start, int& count) const = 0;
    121 
    122 
    123     virtual int				GetWordBreak(int index, int direction) const = 0;
    124 
    125     virtual CFX_WideString	GetPageText(int start = 0, int nCount = -1 ) const = 0;
    126 };
    127 #define FPDFTEXT_MATCHCASE      0x00000001
    128 #define FPDFTEXT_MATCHWHOLEWORD 0x00000002
    129 #define FPDFTEXT_CONSECUTIVE	0x00000004
    130 class IPDF_TextPageFind
    131 {
    132 public:
    133 
    134     virtual	~IPDF_TextPageFind() {}
    135 
    136     static	IPDF_TextPageFind*	CreatePageFind(const IPDF_TextPage* pTextPage);
    137 public:
    138 
    139     virtual	FX_BOOL				FindFirst(const CFX_WideString& findwhat, int flags, int startPos = 0) = 0;
    140 
    141     virtual	FX_BOOL				FindNext() = 0;
    142 
    143     virtual	FX_BOOL				FindPrev() = 0;
    144 
    145     virtual void				GetRectArray(CFX_RectArray& rects) const = 0;
    146 
    147     virtual int					GetCurOrder() const = 0;
    148 
    149     virtual int					GetMatchedCount() const = 0;
    150 };
    151 class IPDF_LinkExtract
    152 {
    153 public:
    154 
    155     virtual	~IPDF_LinkExtract() {}
    156 
    157     static	IPDF_LinkExtract*	CreateLinkExtract();
    158 
    159     virtual FX_BOOL				ExtractLinks(const IPDF_TextPage* pTextPage) = 0;
    160 public:
    161 
    162     virtual int					CountLinks() const = 0;
    163 
    164     virtual CFX_WideString		GetURL(int index) const = 0;
    165 
    166     virtual	void				GetBoundedSegment(int index, int& start, int& count) const = 0;
    167 
    168     virtual void				GetRects(int index, CFX_RectArray& rects) const = 0;
    169 };
    170 
    171 #endif  // CORE_INCLUDE_FPDFTEXT_FPDF_TEXT_H_
    172