Home | History | Annotate | Download | only in parser
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #ifndef CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
      8 #define CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
      9 
     10 #include <map>
     11 #include <memory>
     12 #include <set>
     13 #include <utility>
     14 #include <vector>
     15 
     16 #include "core/fpdfapi/parser/cpdf_parser.h"
     17 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
     18 #include "core/fxcrt/unowned_ptr.h"
     19 
     20 class CPDF_CrossRefAvail;
     21 class CPDF_Dictionary;
     22 class CPDF_HintTables;
     23 class CPDF_IndirectObjectHolder;
     24 class CPDF_LinearizedHeader;
     25 class CPDF_PageObjectAvail;
     26 class CPDF_Parser;
     27 class CPDF_ReadValidator;
     28 
     29 enum PDF_DATAAVAIL_STATUS {
     30   PDF_DATAAVAIL_HEADER = 0,
     31   PDF_DATAAVAIL_FIRSTPAGE,
     32   PDF_DATAAVAIL_HINTTABLE,
     33   PDF_DATAAVAIL_LOADALLCROSSREF,
     34   PDF_DATAAVAIL_ROOT,
     35   PDF_DATAAVAIL_INFO,
     36   PDF_DATAAVAIL_PAGETREE,
     37   PDF_DATAAVAIL_PAGE,
     38   PDF_DATAAVAIL_PAGE_LATERLOAD,
     39   PDF_DATAAVAIL_RESOURCES,
     40   PDF_DATAAVAIL_DONE,
     41   PDF_DATAAVAIL_ERROR,
     42   PDF_DATAAVAIL_LOADALLFILE,
     43 };
     44 
     45 enum PDF_PAGENODE_TYPE {
     46   PDF_PAGENODE_UNKNOWN = 0,
     47   PDF_PAGENODE_PAGE,
     48   PDF_PAGENODE_PAGES,
     49   PDF_PAGENODE_ARRAY,
     50 };
     51 
     52 class CPDF_DataAvail final {
     53  public:
     54   // Must match PDF_DATA_* definitions in public/fpdf_dataavail.h, but cannot
     55   // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
     56   // to make sure the two sets of values match.
     57   enum DocAvailStatus {
     58     DataError = -1,        // PDF_DATA_ERROR
     59     DataNotAvailable = 0,  // PDF_DATA_NOTAVAIL
     60     DataAvailable = 1,     // PDF_DATA_AVAIL
     61   };
     62 
     63   // Must match PDF_*LINEAR* definitions in public/fpdf_dataavail.h, but cannot
     64   // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
     65   // to make sure the two sets of values match.
     66   enum DocLinearizationStatus {
     67     LinearizationUnknown = -1,  // PDF_LINEARIZATION_UNKNOWN
     68     NotLinearized = 0,          // PDF_NOT_LINEARIZED
     69     Linearized = 1,             // PDF_LINEARIZED
     70   };
     71 
     72   // Must match PDF_FORM_* definitions in public/fpdf_dataavail.h, but cannot
     73   // #include that header. fpdfsdk/fpdf_dataavail.cpp has static_asserts
     74   // to make sure the two sets of values match.
     75   enum DocFormStatus {
     76     FormError = -1,        // PDF_FORM_ERROR
     77     FormNotAvailable = 0,  // PDF_FORM_NOTAVAIL
     78     FormAvailable = 1,     // PDF_FORM_AVAIL
     79     FormNotExist = 2,      // PDF_FORM_NOTEXIST
     80   };
     81 
     82   class FileAvail {
     83    public:
     84     virtual ~FileAvail();
     85     virtual bool IsDataAvail(FX_FILESIZE offset, size_t size) = 0;
     86   };
     87 
     88   class DownloadHints {
     89    public:
     90     virtual ~DownloadHints();
     91     virtual void AddSegment(FX_FILESIZE offset, size_t size) = 0;
     92   };
     93 
     94   CPDF_DataAvail(FileAvail* pFileAvail,
     95                  const RetainPtr<IFX_SeekableReadStream>& pFileRead,
     96                  bool bSupportHintTable);
     97   ~CPDF_DataAvail();
     98 
     99   DocAvailStatus IsDocAvail(DownloadHints* pHints);
    100   DocAvailStatus IsPageAvail(uint32_t dwPage, DownloadHints* pHints);
    101   DocFormStatus IsFormAvail(DownloadHints* pHints);
    102   DocLinearizationStatus IsLinearizedPDF();
    103   RetainPtr<IFX_SeekableReadStream> GetFileRead() const;
    104   int GetPageCount() const;
    105   CPDF_Dictionary* GetPage(int index);
    106   RetainPtr<CPDF_ReadValidator> GetValidator() const;
    107 
    108   std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>> ParseDocument(
    109       const char* password);
    110 
    111   const CPDF_HintTables* GetHintTables() const { return m_pHintTables.get(); }
    112 
    113  protected:
    114   class PageNode {
    115    public:
    116     PageNode();
    117     ~PageNode();
    118 
    119     PDF_PAGENODE_TYPE m_type;
    120     uint32_t m_dwPageNo;
    121     std::vector<std::unique_ptr<PageNode>> m_ChildNodes;
    122   };
    123 
    124   static const int kMaxPageRecursionDepth = 1024;
    125 
    126   bool CheckDocStatus();
    127   bool CheckHeader();
    128   bool CheckFirstPage();
    129   bool CheckHintTables();
    130   bool CheckRoot();
    131   bool CheckInfo();
    132   bool CheckPages();
    133   bool CheckPage();
    134   DocAvailStatus CheckResources(const CPDF_Dictionary* page);
    135   DocFormStatus CheckAcroForm();
    136   bool CheckPageStatus();
    137 
    138   DocAvailStatus CheckHeaderAndLinearized();
    139   std::unique_ptr<CPDF_Object> ParseIndirectObjectAt(
    140       FX_FILESIZE pos,
    141       uint32_t objnum,
    142       CPDF_IndirectObjectHolder* pObjList = nullptr);
    143   std::unique_ptr<CPDF_Object> GetObject(uint32_t objnum,
    144                                          bool* pExistInFile);
    145   bool GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages);
    146   bool PreparePageItem();
    147   bool LoadPages();
    148   bool CheckAndLoadAllXref();
    149   bool LoadAllFile();
    150   DocAvailStatus CheckLinearizedData();
    151 
    152   bool CheckPage(uint32_t dwPage);
    153   bool LoadDocPages();
    154   bool LoadDocPage(uint32_t dwPage);
    155   bool CheckPageNode(const PageNode& pageNode,
    156                      int32_t iPage,
    157                      int32_t& iCount,
    158                      int level);
    159   bool CheckUnknownPageNode(uint32_t dwPageNo, PageNode* pPageNode);
    160   bool CheckArrayPageNode(uint32_t dwPageNo, PageNode* pPageNode);
    161   bool CheckPageCount();
    162   bool IsFirstCheck(uint32_t dwPage);
    163   void ResetFirstCheck(uint32_t dwPage);
    164   bool ValidatePage(uint32_t dwPage);
    165   CPDF_SyntaxParser* GetSyntaxParser() const;
    166 
    167   FileAvail* const m_pFileAvail;
    168   RetainPtr<CPDF_ReadValidator> m_pFileRead;
    169   CPDF_Parser m_parser;
    170   std::unique_ptr<CPDF_Object> m_pRoot;
    171   uint32_t m_dwRootObjNum = 0;
    172   uint32_t m_dwInfoObjNum = 0;
    173   std::unique_ptr<CPDF_LinearizedHeader> m_pLinearized;
    174   bool m_bDocAvail = false;
    175   std::unique_ptr<CPDF_CrossRefAvail> m_pCrossRefAvail;
    176   PDF_DATAAVAIL_STATUS m_docStatus = PDF_DATAAVAIL_HEADER;
    177   const FX_FILESIZE m_dwFileLen;
    178   CPDF_Document* m_pDocument = nullptr;
    179   std::vector<uint32_t> m_PageObjList;
    180   uint32_t m_PagesObjNum = 0;
    181   bool m_bLinearedDataOK = false;
    182   bool m_bMainXRefLoadTried = false;
    183   bool m_bMainXRefLoadedOK = false;
    184   bool m_bPagesTreeLoad = false;
    185   bool m_bPagesLoad = false;
    186   CPDF_Parser* m_pCurrentParser = nullptr;
    187   std::unique_ptr<CPDF_PageObjectAvail> m_pFormAvail;
    188   std::vector<std::unique_ptr<CPDF_Object>> m_PagesArray;
    189   uint32_t m_dwEncryptObjNum = 0;
    190   bool m_bTotalLoadPageTree = false;
    191   bool m_bCurPageDictLoadOK = false;
    192   PageNode m_PageNode;
    193   std::set<uint32_t> m_pageMapCheckState;
    194   std::set<uint32_t> m_pagesLoadState;
    195   std::set<uint32_t> m_SeenPrevPositions;
    196   std::unique_ptr<CPDF_HintTables> m_pHintTables;
    197   const bool m_bSupportHintTable;
    198   std::map<uint32_t, std::unique_ptr<CPDF_PageObjectAvail>> m_PagesObjAvail;
    199   std::map<const CPDF_Object*, std::unique_ptr<CPDF_PageObjectAvail>>
    200       m_PagesResourcesAvail;
    201   bool m_bHeaderAvail = false;
    202 };
    203 
    204 #endif  // CORE_FPDFAPI_PARSER_CPDF_DATA_AVAIL_H_
    205