Home | History | Annotate | Download | only in parser
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #ifndef CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
      8 #define CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
      9 
     10 #include <algorithm>
     11 #include <memory>
     12 #include <vector>
     13 
     14 #include "core/fxcrt/string_pool_template.h"
     15 #include "core/fxcrt/weak_ptr.h"
     16 
     17 class CPDF_CryptoHandler;
     18 class CPDF_Dictionary;
     19 class CPDF_IndirectObjectHolder;
     20 class CPDF_Object;
     21 class CPDF_ReadValidator;
     22 class CPDF_Stream;
     23 class IFX_SeekableReadStream;
     24 
     25 class CPDF_SyntaxParser {
     26  public:
     27   enum class ParseType { kStrict, kLoose };
     28 
     29   CPDF_SyntaxParser();
     30   explicit CPDF_SyntaxParser(const WeakPtr<ByteStringPool>& pPool);
     31   ~CPDF_SyntaxParser();
     32 
     33   void InitParser(const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
     34                   uint32_t HeaderOffset);
     35 
     36   void InitParserWithValidator(const RetainPtr<CPDF_ReadValidator>& pValidator,
     37                                uint32_t HeaderOffset);
     38 
     39   FX_FILESIZE GetPos() const { return m_Pos; }
     40   void SetPos(FX_FILESIZE pos) { m_Pos = std::min(pos, m_FileLen); }
     41 
     42   std::unique_ptr<CPDF_Object> GetObjectBody(
     43       CPDF_IndirectObjectHolder* pObjList);
     44 
     45   std::unique_ptr<CPDF_Object> GetIndirectObject(
     46       CPDF_IndirectObjectHolder* pObjList,
     47       ParseType parse_type);
     48 
     49   ByteString GetKeyword();
     50   void ToNextLine();
     51   void ToNextWord();
     52   bool BackwardsSearchToWord(const ByteStringView& word, FX_FILESIZE limit);
     53   FX_FILESIZE FindTag(const ByteStringView& tag, FX_FILESIZE limit);
     54   bool ReadBlock(uint8_t* pBuf, uint32_t size);
     55   bool GetCharAt(FX_FILESIZE pos, uint8_t& ch);
     56   ByteString GetNextWord(bool* bIsNumber);
     57   ByteString PeekNextWord(bool* bIsNumber);
     58 
     59   RetainPtr<IFX_SeekableReadStream> GetFileAccess() const;
     60 
     61   const RetainPtr<CPDF_ReadValidator>& GetValidator() const {
     62     return m_pFileAccess;
     63   }
     64 
     65  private:
     66   friend class CPDF_Parser;
     67   friend class CPDF_DataAvail;
     68   friend class cpdf_syntax_parser_ReadHexString_Test;
     69 
     70   static const int kParserMaxRecursionDepth = 64;
     71   static int s_CurrentRecursionDepth;
     72 
     73   uint32_t GetDirectNum();
     74   bool ReadBlockAt(FX_FILESIZE read_pos);
     75   bool GetNextChar(uint8_t& ch);
     76   bool GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch);
     77   void GetNextWordInternal(bool* bIsNumber);
     78   bool IsWholeWord(FX_FILESIZE startpos,
     79                    FX_FILESIZE limit,
     80                    const ByteStringView& tag,
     81                    bool checkKeyword);
     82 
     83   ByteString ReadString();
     84   ByteString ReadHexString();
     85   unsigned int ReadEOLMarkers(FX_FILESIZE pos);
     86   std::unique_ptr<CPDF_Stream> ReadStream(
     87       std::unique_ptr<CPDF_Dictionary> pDict);
     88 
     89   bool IsPositionRead(FX_FILESIZE pos) const;
     90 
     91   std::unique_ptr<CPDF_Object> GetObjectBodyInternal(
     92       CPDF_IndirectObjectHolder* pObjList,
     93       ParseType parse_type);
     94 
     95   FX_FILESIZE m_Pos;
     96   RetainPtr<CPDF_ReadValidator> m_pFileAccess;
     97   FX_FILESIZE m_HeaderOffset;
     98   FX_FILESIZE m_FileLen;
     99   std::vector<uint8_t> m_pFileBuf;
    100   FX_FILESIZE m_BufOffset;
    101   uint8_t m_WordBuffer[257];
    102   uint32_t m_WordSize;
    103   WeakPtr<ByteStringPool> m_pPool;
    104 };
    105 
    106 #endif  // CORE_FPDFAPI_PARSER_CPDF_SYNTAX_PARSER_H_
    107