Home | History | Annotate | Download | only in xml
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #ifndef XFA_FDE_XML_FDE_XML_IMP_H_
      8 #define XFA_FDE_XML_FDE_XML_IMP_H_
      9 
     10 #include <memory>
     11 #include <vector>
     12 
     13 #include "core/fxcrt/fx_system.h"
     14 #include "xfa/fde/xml/fde_xml.h"
     15 #include "xfa/fgas/crt/fgas_stream.h"
     16 #include "xfa/fgas/crt/fgas_utils.h"
     17 
     18 class CFDE_BlockBuffer;
     19 class CFDE_XMLInstruction;
     20 class CFDE_XMLElement;
     21 class CFDE_XMLText;
     22 class CFDE_XMLDoc;
     23 class CFDE_XMLDOMParser;
     24 class CFDE_XMLSyntaxParser;
     25 class IFDE_XMLParser;
     26 
     27 class CFDE_XMLNode {
     28  public:
     29   enum NodeItem {
     30     Root = 0,
     31     Parent,
     32     FirstSibling,
     33     PriorSibling,
     34     NextSibling,
     35     LastSibling,
     36     FirstNeighbor,
     37     PriorNeighbor,
     38     NextNeighbor,
     39     LastNeighbor,
     40     FirstChild,
     41     LastChild
     42   };
     43 
     44   CFDE_XMLNode();
     45   virtual ~CFDE_XMLNode();
     46 
     47   virtual FDE_XMLNODETYPE GetType() const;
     48   virtual CFDE_XMLNode* Clone(bool bRecursive);
     49 
     50   int32_t CountChildNodes() const;
     51   CFDE_XMLNode* GetChildNode(int32_t index) const;
     52   int32_t GetChildNodeIndex(CFDE_XMLNode* pNode) const;
     53   int32_t InsertChildNode(CFDE_XMLNode* pNode, int32_t index = -1);
     54   void RemoveChildNode(CFDE_XMLNode* pNode);
     55   void DeleteChildren();
     56   void CloneChildren(CFDE_XMLNode* pClone);
     57 
     58   CFDE_XMLNode* GetPath(const FX_WCHAR* pPath,
     59                         int32_t iLength = -1,
     60                         bool bQualifiedName = true) const;
     61 
     62   int32_t GetNodeLevel() const;
     63   CFDE_XMLNode* GetNodeItem(CFDE_XMLNode::NodeItem eItem) const;
     64   bool InsertNodeItem(CFDE_XMLNode::NodeItem eItem, CFDE_XMLNode* pNode);
     65   CFDE_XMLNode* RemoveNodeItem(CFDE_XMLNode::NodeItem eItem);
     66 
     67   void SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream);
     68 
     69   CFDE_XMLNode* m_pParent;
     70   CFDE_XMLNode* m_pChild;
     71   CFDE_XMLNode* m_pPrior;
     72   CFDE_XMLNode* m_pNext;
     73 };
     74 
     75 class CFDE_XMLInstruction : public CFDE_XMLNode {
     76  public:
     77   explicit CFDE_XMLInstruction(const CFX_WideString& wsTarget);
     78   ~CFDE_XMLInstruction() override;
     79 
     80   // CFDE_XMLNode
     81   FDE_XMLNODETYPE GetType() const override;
     82   CFDE_XMLNode* Clone(bool bRecursive) override;
     83 
     84   void GetTargetName(CFX_WideString& wsTarget) const { wsTarget = m_wsTarget; }
     85   int32_t CountAttributes() const;
     86   bool GetAttribute(int32_t index,
     87                     CFX_WideString& wsAttriName,
     88                     CFX_WideString& wsAttriValue) const;
     89   bool HasAttribute(const FX_WCHAR* pwsAttriName) const;
     90   void GetString(const FX_WCHAR* pwsAttriName,
     91                  CFX_WideString& wsAttriValue,
     92                  const FX_WCHAR* pwsDefValue = nullptr) const;
     93   void SetString(const CFX_WideString& wsAttriName,
     94                  const CFX_WideString& wsAttriValue);
     95   int32_t GetInteger(const FX_WCHAR* pwsAttriName, int32_t iDefValue = 0) const;
     96   void SetInteger(const FX_WCHAR* pwsAttriName, int32_t iAttriValue);
     97   FX_FLOAT GetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fDefValue = 0) const;
     98   void SetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fAttriValue);
     99   void RemoveAttribute(const FX_WCHAR* pwsAttriName);
    100   int32_t CountData() const;
    101   bool GetData(int32_t index, CFX_WideString& wsData) const;
    102   void AppendData(const CFX_WideString& wsData);
    103   void RemoveData(int32_t index);
    104 
    105   CFX_WideString m_wsTarget;
    106   std::vector<CFX_WideString> m_Attributes;
    107   std::vector<CFX_WideString> m_TargetData;
    108 };
    109 
    110 class CFDE_XMLElement : public CFDE_XMLNode {
    111  public:
    112   explicit CFDE_XMLElement(const CFX_WideString& wsTag);
    113   ~CFDE_XMLElement() override;
    114 
    115   // CFDE_XMLNode
    116   FDE_XMLNODETYPE GetType() const override;
    117   CFDE_XMLNode* Clone(bool bRecursive) override;
    118 
    119   void GetTagName(CFX_WideString& wsTag) const;
    120   void GetLocalTagName(CFX_WideString& wsTag) const;
    121 
    122   void GetNamespacePrefix(CFX_WideString& wsPrefix) const;
    123   void GetNamespaceURI(CFX_WideString& wsNamespace) const;
    124 
    125   int32_t CountAttributes() const;
    126   bool GetAttribute(int32_t index,
    127                     CFX_WideString& wsAttriName,
    128                     CFX_WideString& wsAttriValue) const;
    129   bool HasAttribute(const FX_WCHAR* pwsAttriName) const;
    130   void RemoveAttribute(const FX_WCHAR* pwsAttriName);
    131 
    132   void GetString(const FX_WCHAR* pwsAttriName,
    133                  CFX_WideString& wsAttriValue,
    134                  const FX_WCHAR* pwsDefValue = nullptr) const;
    135   void SetString(const CFX_WideString& wsAttriName,
    136                  const CFX_WideString& wsAttriValue);
    137 
    138   int32_t GetInteger(const FX_WCHAR* pwsAttriName, int32_t iDefValue = 0) const;
    139   void SetInteger(const FX_WCHAR* pwsAttriName, int32_t iAttriValue);
    140 
    141   FX_FLOAT GetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fDefValue = 0) const;
    142   void SetFloat(const FX_WCHAR* pwsAttriName, FX_FLOAT fAttriValue);
    143 
    144   void GetTextData(CFX_WideString& wsText) const;
    145   void SetTextData(const CFX_WideString& wsText);
    146 
    147   CFX_WideString m_wsTag;
    148   std::vector<CFX_WideString> m_Attributes;
    149 };
    150 
    151 class CFDE_XMLText : public CFDE_XMLNode {
    152  public:
    153   explicit CFDE_XMLText(const CFX_WideString& wsText);
    154   ~CFDE_XMLText() override;
    155 
    156   // CFDE_XMLNode
    157   FDE_XMLNODETYPE GetType() const override;
    158   CFDE_XMLNode* Clone(bool bRecursive) override;
    159 
    160   void GetText(CFX_WideString& wsText) const { wsText = m_wsText; }
    161   void SetText(const CFX_WideString& wsText) { m_wsText = wsText; }
    162 
    163   CFX_WideString m_wsText;
    164 };
    165 
    166 class CFDE_XMLDeclaration : public CFDE_XMLNode {
    167  public:
    168   CFDE_XMLDeclaration() {}
    169   ~CFDE_XMLDeclaration() override {}
    170 };
    171 
    172 class CFDE_XMLCharData : public CFDE_XMLDeclaration {
    173  public:
    174   explicit CFDE_XMLCharData(const CFX_WideString& wsCData);
    175   ~CFDE_XMLCharData() override;
    176 
    177   FDE_XMLNODETYPE GetType() const override;
    178   CFDE_XMLNode* Clone(bool bRecursive) override;
    179 
    180   void GetCharData(CFX_WideString& wsCharData) const {
    181     wsCharData = m_wsCharData;
    182   }
    183   void SetCharData(const CFX_WideString& wsCData) { m_wsCharData = wsCData; }
    184 
    185   CFX_WideString m_wsCharData;
    186 };
    187 
    188 class CFDE_XMLDoc {
    189  public:
    190   CFDE_XMLDoc();
    191   ~CFDE_XMLDoc();
    192 
    193   bool LoadXML(std::unique_ptr<IFDE_XMLParser> pXMLParser);
    194   int32_t DoLoad(IFX_Pause* pPause = nullptr);
    195   void CloseXML();
    196   CFDE_XMLNode* GetRoot() const { return m_pRoot; }
    197   void SaveXML(CFX_RetainPtr<IFGAS_Stream>& pXMLStream, bool bSaveBOM = true);
    198   void SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream,
    199                    CFDE_XMLNode* pNode);
    200 
    201  protected:
    202   void Reset(bool bInitRoot);
    203   void ReleaseParser();
    204 
    205   CFX_RetainPtr<IFGAS_Stream> m_pStream;
    206   int32_t m_iStatus;
    207   CFDE_XMLNode* m_pRoot;
    208   std::unique_ptr<IFDE_XMLParser> m_pXMLParser;
    209 };
    210 
    211 class IFDE_XMLParser {
    212  public:
    213   virtual ~IFDE_XMLParser() {}
    214   virtual int32_t DoParser(IFX_Pause* pPause) = 0;
    215 };
    216 
    217 class CFDE_BlockBuffer {
    218  public:
    219   explicit CFDE_BlockBuffer(int32_t iAllocStep = 1024 * 1024);
    220   ~CFDE_BlockBuffer();
    221 
    222   bool InitBuffer(int32_t iBufferSize = 1024 * 1024);
    223   bool IsInitialized() { return m_iBufferSize / m_iAllocStep >= 1; }
    224   FX_WCHAR* GetAvailableBlock(int32_t& iIndexInBlock);
    225   inline int32_t GetAllocStep() const { return m_iAllocStep; }
    226   inline int32_t& GetDataLengthRef() { return m_iDataLength; }
    227   inline void Reset(bool bReserveData = true) {
    228     if (!bReserveData) {
    229       m_iStartPosition = 0;
    230     }
    231     m_iDataLength = 0;
    232   }
    233   void SetTextChar(int32_t iIndex, FX_WCHAR ch);
    234   int32_t DeleteTextChars(int32_t iCount, bool bDirection = true);
    235   void GetTextData(CFX_WideString& wsTextData,
    236                    int32_t iStart = 0,
    237                    int32_t iLength = -1) const;
    238 
    239  protected:
    240   inline void TextDataIndex2BufIndex(const int32_t iIndex,
    241                                      int32_t& iBlockIndex,
    242                                      int32_t& iInnerIndex) const;
    243   void ClearBuffer();
    244 
    245   CFX_ArrayTemplate<FX_WCHAR*> m_BlockArray;
    246   int32_t m_iDataLength;
    247   int32_t m_iBufferSize;
    248   int32_t m_iAllocStep;
    249   int32_t m_iStartPosition;
    250 };
    251 
    252 class CFDE_XMLSyntaxParser {
    253  public:
    254   CFDE_XMLSyntaxParser();
    255   ~CFDE_XMLSyntaxParser();
    256 
    257   void Init(const CFX_RetainPtr<IFGAS_Stream>& pStream,
    258             int32_t iXMLPlaneSize,
    259             int32_t iTextDataSize = 256);
    260 
    261   FDE_XmlSyntaxResult DoSyntaxParse();
    262 
    263   int32_t GetStatus() const;
    264   int32_t GetCurrentPos() const {
    265     return m_iParsedChars + (m_pStart - m_pBuffer);
    266   }
    267   FX_FILESIZE GetCurrentBinaryPos() const;
    268   int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; }
    269   int32_t GetLastNodeNumber() const { return m_iLastNodeNum; }
    270 
    271   void GetTargetName(CFX_WideString& wsTarget) const {
    272     m_BlockBuffer.GetTextData(wsTarget, 0, m_iTextDataLength);
    273   }
    274   void GetTagName(CFX_WideString& wsTag) const {
    275     m_BlockBuffer.GetTextData(wsTag, 0, m_iTextDataLength);
    276   }
    277   void GetAttributeName(CFX_WideString& wsAttriName) const {
    278     m_BlockBuffer.GetTextData(wsAttriName, 0, m_iTextDataLength);
    279   }
    280   void GetAttributeValue(CFX_WideString& wsAttriValue) const {
    281     m_BlockBuffer.GetTextData(wsAttriValue, 0, m_iTextDataLength);
    282   }
    283   void GetTextData(CFX_WideString& wsText) const {
    284     m_BlockBuffer.GetTextData(wsText, 0, m_iTextDataLength);
    285   }
    286   void GetTargetData(CFX_WideString& wsData) const {
    287     m_BlockBuffer.GetTextData(wsData, 0, m_iTextDataLength);
    288   }
    289 
    290  protected:
    291   enum class FDE_XmlSyntaxState {
    292     Text,
    293     Node,
    294     Target,
    295     Tag,
    296     AttriName,
    297     AttriEqualSign,
    298     AttriQuotation,
    299     AttriValue,
    300     Entity,
    301     EntityDecimal,
    302     EntityHex,
    303     CloseInstruction,
    304     BreakElement,
    305     CloseElement,
    306     SkipDeclNode,
    307     DeclCharData,
    308     SkipComment,
    309     SkipCommentOrDecl,
    310     SkipCData,
    311     TargetData
    312   };
    313 
    314   void ParseTextChar(FX_WCHAR ch);
    315 
    316   CFX_RetainPtr<IFGAS_Stream> m_pStream;
    317   int32_t m_iXMLPlaneSize;
    318   int32_t m_iCurrentPos;
    319   int32_t m_iCurrentNodeNum;
    320   int32_t m_iLastNodeNum;
    321   int32_t m_iParsedChars;
    322   int32_t m_iParsedBytes;
    323   FX_WCHAR* m_pBuffer;
    324   int32_t m_iBufferChars;
    325   bool m_bEOS;
    326   FX_WCHAR* m_pStart;
    327   FX_WCHAR* m_pEnd;
    328   FDE_XMLNODE m_CurNode;
    329   CFX_StackTemplate<FDE_XMLNODE> m_XMLNodeStack;
    330   CFDE_BlockBuffer m_BlockBuffer;
    331   int32_t m_iAllocStep;
    332   int32_t& m_iDataLength;
    333   FX_WCHAR* m_pCurrentBlock;
    334   int32_t m_iIndexInBlock;
    335   int32_t m_iTextDataLength;
    336   FDE_XmlSyntaxResult m_syntaxParserResult;
    337   FDE_XmlSyntaxState m_syntaxParserState;
    338   FX_WCHAR m_wQuotationMark;
    339   int32_t m_iEntityStart;
    340   CFX_StackTemplate<uint32_t> m_SkipStack;
    341   FX_WCHAR m_SkipChar;
    342 };
    343 
    344 #endif  // XFA_FDE_XML_FDE_XML_IMP_H_
    345