Home | History | Annotate | Download | only in xml
      1 // Copyright 2017 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #ifndef CORE_FXCRT_XML_CFX_XMLSYNTAXPARSER_H_
      8 #define CORE_FXCRT_XML_CFX_XMLSYNTAXPARSER_H_
      9 
     10 #include <stack>
     11 #include <vector>
     12 
     13 #include "core/fxcrt/cfx_blockbuffer.h"
     14 #include "core/fxcrt/cfx_seekablestreamproxy.h"
     15 #include "core/fxcrt/fx_string.h"
     16 #include "core/fxcrt/retain_ptr.h"
     17 #include "core/fxcrt/xml/cfx_xmlnode.h"
     18 
     19 enum class FX_XmlSyntaxResult {
     20   None,
     21   InstructionOpen,
     22   InstructionClose,
     23   ElementOpen,
     24   ElementBreak,
     25   ElementClose,
     26   TargetName,
     27   TagName,
     28   AttriName,
     29   AttriValue,
     30   Text,
     31   CData,
     32   TargetData,
     33   Error,
     34   EndOfString
     35 };
     36 
     37 class CFX_XMLSyntaxParser {
     38  public:
     39   static bool IsXMLNameChar(wchar_t ch, bool bFirstChar);
     40 
     41   explicit CFX_XMLSyntaxParser(
     42       const RetainPtr<CFX_SeekableStreamProxy>& pStream);
     43   ~CFX_XMLSyntaxParser();
     44 
     45   FX_XmlSyntaxResult DoSyntaxParse();
     46 
     47   int32_t GetStatus() const;
     48   FX_FILESIZE GetCurrentPos() const { return m_ParsedChars + m_Start; }
     49   FX_FILESIZE GetCurrentBinaryPos() const;
     50   int32_t GetCurrentNodeNumber() const { return m_iCurrentNodeNum; }
     51   int32_t GetLastNodeNumber() const { return m_iLastNodeNum; }
     52 
     53   WideString GetTargetName() const {
     54     return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
     55   }
     56 
     57   WideString GetTagName() const {
     58     return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
     59   }
     60 
     61   WideString GetAttributeName() const {
     62     return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
     63   }
     64 
     65   WideString GetAttributeValue() const {
     66     return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
     67   }
     68 
     69   WideString GetTextData() const {
     70     return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
     71   }
     72 
     73   WideString GetTargetData() const {
     74     return m_BlockBuffer.GetTextData(0, m_iTextDataLength);
     75   }
     76 
     77  protected:
     78   enum class FDE_XmlSyntaxState {
     79     Text,
     80     Node,
     81     Target,
     82     Tag,
     83     AttriName,
     84     AttriEqualSign,
     85     AttriQuotation,
     86     AttriValue,
     87     Entity,
     88     EntityDecimal,
     89     EntityHex,
     90     CloseInstruction,
     91     BreakElement,
     92     CloseElement,
     93     SkipDeclNode,
     94     DeclCharData,
     95     SkipComment,
     96     SkipCommentOrDecl,
     97     SkipCData,
     98     TargetData
     99   };
    100 
    101   void ParseTextChar(wchar_t ch);
    102 
    103   RetainPtr<CFX_SeekableStreamProxy> m_pStream;
    104   size_t m_iXMLPlaneSize;
    105   FX_FILESIZE m_iCurrentPos;
    106   int32_t m_iCurrentNodeNum;
    107   int32_t m_iLastNodeNum;
    108   int32_t m_iParsedBytes;
    109   FX_FILESIZE m_ParsedChars;
    110   std::vector<wchar_t> m_Buffer;
    111   size_t m_iBufferChars;
    112   bool m_bEOS;
    113   FX_FILESIZE m_Start;  // Start position in m_Buffer
    114   FX_FILESIZE m_End;    // End position in m_Buffer
    115   FX_XMLNODE m_CurNode;
    116   std::stack<FX_XMLNODE> m_XMLNodeStack;
    117   CFX_BlockBuffer m_BlockBuffer;
    118   int32_t m_iAllocStep;
    119   wchar_t* m_pCurrentBlock;  // Pointer into CFX_BlockBuffer
    120   int32_t m_iIndexInBlock;
    121   int32_t m_iTextDataLength;
    122   FX_XmlSyntaxResult m_syntaxParserResult;
    123   FDE_XmlSyntaxState m_syntaxParserState;
    124   wchar_t m_wQuotationMark;
    125   int32_t m_iEntityStart;
    126   std::stack<wchar_t> m_SkipStack;
    127   wchar_t m_SkipChar;
    128 };
    129 
    130 #endif  // CORE_FXCRT_XML_CFX_XMLSYNTAXPARSER_H_
    131