Home | History | Annotate | Download | only in xml
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #ifndef CORE_FXCRT_XML_CFX_SAXREADER_H_
      8 #define CORE_FXCRT_XML_CFX_SAXREADER_H_
      9 
     10 #include <memory>
     11 #include <stack>
     12 #include <vector>
     13 
     14 #include "core/fxcrt/fx_string.h"
     15 #include "core/fxcrt/retain_ptr.h"
     16 
     17 class CFX_SAXCommentContext;
     18 class CFX_SAXContext;
     19 class IFX_SeekableReadStream;
     20 enum class CFX_SaxMode;
     21 
     22 class CFX_SAXItem {
     23  public:
     24   enum class Type {
     25     Unknown = 0,
     26     Instruction,
     27     Declaration,
     28     Comment,
     29     Tag,
     30     Text,
     31     CharData,
     32   };
     33 
     34   explicit CFX_SAXItem(uint32_t id)
     35       : m_pNode(nullptr), m_eNode(Type::Unknown), m_dwID(id), m_bSkip(false) {}
     36 
     37   CFX_SAXContext* m_pNode;
     38   Type m_eNode;
     39   const uint32_t m_dwID;
     40   bool m_bSkip;
     41 };
     42 
     43 class CFX_SAXFile {
     44  public:
     45   CFX_SAXFile();
     46   ~CFX_SAXFile();
     47 
     48   bool StartFile(const RetainPtr<IFX_SeekableReadStream>& pFile,
     49                  uint32_t dwStart,
     50                  uint32_t dwLen);
     51   bool ReadNextBlock();
     52   void Reset();
     53 
     54   RetainPtr<IFX_SeekableReadStream> m_pFile;
     55   uint32_t m_dwStart;
     56   uint32_t m_dwEnd;
     57   uint32_t m_dwCur;
     58   uint8_t* m_pBuf;
     59   uint32_t m_dwBufSize;
     60   uint32_t m_dwBufIndex;
     61 };
     62 
     63 enum CFX_SaxParseMode {
     64   CFX_SaxParseMode_NotConvert_amp = 1 << 0,
     65   CFX_SaxParseMode_NotConvert_lt = 1 << 1,
     66   CFX_SaxParseMode_NotConvert_gt = 1 << 2,
     67   CFX_SaxParseMode_NotConvert_apos = 1 << 3,
     68   CFX_SaxParseMode_NotConvert_quot = 1 << 4,
     69   CFX_SaxParseMode_NotConvert_sharp = 1 << 5,
     70   CFX_SaxParseMode_NotSkipSpace = 1 << 6
     71 };
     72 
     73 class CFX_SAXReader {
     74  public:
     75   class HandlerIface {
     76    public:
     77     virtual ~HandlerIface() {}
     78     virtual CFX_SAXContext* OnTagEnter(const ByteStringView& bsTagName,
     79                                        CFX_SAXItem::Type eType,
     80                                        uint32_t dwStartPos) = 0;
     81     virtual void OnTagAttribute(CFX_SAXContext* pTag,
     82                                 const ByteStringView& bsAttri,
     83                                 const ByteStringView& bsValue) = 0;
     84     virtual void OnTagBreak(CFX_SAXContext* pTag) = 0;
     85     virtual void OnTagData(CFX_SAXContext* pTag,
     86                            CFX_SAXItem::Type eType,
     87                            const ByteStringView& bsData,
     88                            uint32_t dwStartPos) = 0;
     89     virtual void OnTagClose(CFX_SAXContext* pTag, uint32_t dwEndPos) = 0;
     90     virtual void OnTagEnd(CFX_SAXContext* pTag,
     91                           const ByteStringView& bsTagName,
     92                           uint32_t dwEndPos) = 0;
     93     virtual void OnTargetData(CFX_SAXContext* pTag,
     94                               CFX_SAXItem::Type eType,
     95                               const ByteStringView& bsData,
     96                               uint32_t dwStartPos) = 0;
     97   };
     98 
     99   CFX_SAXReader();
    100   ~CFX_SAXReader();
    101 
    102   int32_t StartParse(const RetainPtr<IFX_SeekableReadStream>& pFile,
    103                      uint32_t dwStart = 0,
    104                      uint32_t dwLen = -1,
    105                      uint32_t dwParseMode = 0);
    106   int32_t ContinueParse();
    107   void SetHandler(HandlerIface* pHandler) { m_pHandler = pHandler; }
    108 
    109  private:
    110   void ParseInternal();
    111   void SkipCurrentNode();
    112   void AppendData(uint8_t ch);
    113   void AppendName(uint8_t ch);
    114   void ParseText();
    115   void ParseNodeStart();
    116   void ParseInstruction();
    117   void ParseDeclOrComment();
    118   void ParseDeclNode();
    119   void ParseComment();
    120   void ParseCommentContent();
    121   void ParseTagName();
    122   void ParseTagAttributeName();
    123   void ParseTagAttributeEqual();
    124   void ParseTagAttributeValue();
    125   void ParseMaybeClose();
    126   void ParseTagClose();
    127   void ParseTagEnd();
    128   void ParseTargetData();
    129   void Reset();
    130   void ClearData();
    131   void ClearName();
    132   void AppendToData(uint8_t ch);
    133   void AppendToName(uint8_t ch);
    134   void BackUpAndReplaceDataAt(int32_t index, uint8_t ch);
    135   bool IsEntityStart(uint8_t ch) const;
    136   bool IsEntityEnd(uint8_t ch) const;
    137   int32_t CurrentDataIndex() const;
    138   void Push();
    139   void Pop();
    140   CFX_SAXItem* GetCurrentItem() const;
    141   bool SkipSpace(uint8_t ch);
    142   void SkipNode();
    143   void NotifyData();
    144   void NotifyEnter();
    145   void NotifyAttribute();
    146   void NotifyBreak();
    147   void NotifyClose();
    148   void NotifyEnd();
    149   void NotifyTargetData();
    150   void ReallocDataBuffer();
    151   void ReallocNameBuffer();
    152   void ParseChar(uint8_t ch);
    153 
    154   CFX_SAXFile m_File;
    155   HandlerIface* m_pHandler;
    156   int32_t m_iState;
    157   std::stack<std::unique_ptr<CFX_SAXItem>> m_Stack;
    158   uint32_t m_dwItemID;
    159   CFX_SaxMode m_eMode;
    160   CFX_SaxMode m_ePrevMode;
    161   bool m_bCharData;
    162   uint8_t m_CurByte;
    163   uint32_t m_dwDataOffset;
    164   std::stack<char> m_SkipStack;
    165   uint8_t m_SkipChar;
    166   uint32_t m_dwNodePos;
    167   std::vector<uint8_t> m_Data;
    168   int32_t m_iEntityStart;  // Index into m_Data.
    169   std::vector<uint8_t> m_Name;
    170   uint32_t m_dwParseMode;
    171   std::unique_ptr<CFX_SAXCommentContext> m_pCommentContext;
    172 };
    173 
    174 #endif  // CORE_FXCRT_XML_CFX_SAXREADER_H_
    175