Home | History | Annotate | Download | only in parser
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "xfa/fxfa/parser/cxfa_xml_parser.h"
      8 
      9 CXFA_XMLParser::CXFA_XMLParser(CFDE_XMLNode* pRoot,
     10                                const CFX_RetainPtr<IFGAS_Stream>& pStream)
     11     : m_nElementStart(0),
     12       m_dwCheckStatus(0),
     13       m_dwCurrentCheckStatus(0),
     14       m_pRoot(pRoot),
     15       m_pStream(pStream),
     16       m_pParser(new CFDE_XMLSyntaxParser),
     17       m_pParent(pRoot),
     18       m_pChild(nullptr),
     19       m_NodeStack(16),
     20       m_syntaxParserResult(FDE_XmlSyntaxResult::None) {
     21   ASSERT(m_pParent && m_pStream);
     22   m_NodeStack.Push(m_pParent);
     23   m_pParser->Init(m_pStream, 32 * 1024, 1024 * 1024);
     24 }
     25 
     26 CXFA_XMLParser::~CXFA_XMLParser() {
     27   m_NodeStack.RemoveAll(false);
     28   m_ws1.clear();
     29   m_ws2.clear();
     30 }
     31 
     32 int32_t CXFA_XMLParser::DoParser(IFX_Pause* pPause) {
     33   if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error)
     34     return -1;
     35   if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString)
     36     return 100;
     37 
     38   int32_t iCount = 0;
     39   while (true) {
     40     m_syntaxParserResult = m_pParser->DoSyntaxParse();
     41     switch (m_syntaxParserResult) {
     42       case FDE_XmlSyntaxResult::InstructionOpen:
     43         break;
     44       case FDE_XmlSyntaxResult::InstructionClose:
     45         if (m_pChild) {
     46           if (m_pChild->GetType() != FDE_XMLNODE_Instruction) {
     47             m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
     48             break;
     49           }
     50         }
     51         m_pChild = m_pParent;
     52         break;
     53       case FDE_XmlSyntaxResult::ElementOpen:
     54         if (m_dwCheckStatus != 0x03 && m_NodeStack.GetSize() == 2) {
     55           m_nElementStart = m_pParser->GetCurrentPos() - 1;
     56         }
     57         break;
     58       case FDE_XmlSyntaxResult::ElementBreak:
     59         break;
     60       case FDE_XmlSyntaxResult::ElementClose:
     61         if (m_pChild->GetType() != FDE_XMLNODE_Element) {
     62           m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
     63           break;
     64         }
     65         m_pParser->GetTagName(m_ws1);
     66         static_cast<CFDE_XMLElement*>(m_pChild)->GetTagName(m_ws2);
     67         if (m_ws1.GetLength() > 0 && m_ws1 != m_ws2) {
     68           m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
     69           break;
     70         }
     71         m_NodeStack.Pop();
     72         if (m_NodeStack.GetSize() < 1) {
     73           m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
     74           break;
     75         } else if (m_dwCurrentCheckStatus != 0 && m_NodeStack.GetSize() == 2) {
     76           m_nSize[m_dwCurrentCheckStatus - 1] =
     77               m_pParser->GetCurrentBinaryPos() -
     78               m_nStart[m_dwCurrentCheckStatus - 1];
     79           m_dwCurrentCheckStatus = 0;
     80         }
     81 
     82         m_pParent = static_cast<CFDE_XMLNode*>(*m_NodeStack.GetTopElement());
     83         m_pChild = m_pParent;
     84         iCount++;
     85         break;
     86       case FDE_XmlSyntaxResult::TargetName:
     87         m_pParser->GetTargetName(m_ws1);
     88         if (m_ws1 == L"originalXFAVersion" || m_ws1 == L"acrobat") {
     89           m_pChild = new CFDE_XMLInstruction(m_ws1);
     90           m_pParent->InsertChildNode(m_pChild);
     91         } else {
     92           m_pChild = nullptr;
     93         }
     94         m_ws1.clear();
     95         break;
     96       case FDE_XmlSyntaxResult::TagName:
     97         m_pParser->GetTagName(m_ws1);
     98         m_pChild = new CFDE_XMLElement(m_ws1);
     99         m_pParent->InsertChildNode(m_pChild);
    100         m_NodeStack.Push(m_pChild);
    101         m_pParent = m_pChild;
    102 
    103         if (m_dwCheckStatus != 0x03 && m_NodeStack.GetSize() == 3) {
    104           CFX_WideString wsTag;
    105           static_cast<CFDE_XMLElement*>(m_pChild)->GetLocalTagName(wsTag);
    106           if (wsTag == L"template") {
    107             m_dwCheckStatus |= 0x01;
    108             m_dwCurrentCheckStatus = 0x01;
    109             m_nStart[0] = m_pParser->GetCurrentBinaryPos() -
    110                           (m_pParser->GetCurrentPos() - m_nElementStart);
    111           } else if (wsTag == L"datasets") {
    112             m_dwCheckStatus |= 0x02;
    113             m_dwCurrentCheckStatus = 0x02;
    114             m_nStart[1] = m_pParser->GetCurrentBinaryPos() -
    115                           (m_pParser->GetCurrentPos() - m_nElementStart);
    116           }
    117         }
    118         break;
    119       case FDE_XmlSyntaxResult::AttriName:
    120         m_pParser->GetAttributeName(m_ws1);
    121         break;
    122       case FDE_XmlSyntaxResult::AttriValue:
    123         if (m_pChild) {
    124           m_pParser->GetAttributeName(m_ws2);
    125           if (m_pChild->GetType() == FDE_XMLNODE_Element) {
    126             static_cast<CFDE_XMLElement*>(m_pChild)->SetString(m_ws1, m_ws2);
    127           }
    128         }
    129         m_ws1.clear();
    130         break;
    131       case FDE_XmlSyntaxResult::Text:
    132         m_pParser->GetTextData(m_ws1);
    133         m_pChild = new CFDE_XMLText(m_ws1);
    134         m_pParent->InsertChildNode(m_pChild);
    135         m_pChild = m_pParent;
    136         break;
    137       case FDE_XmlSyntaxResult::CData:
    138         m_pParser->GetTextData(m_ws1);
    139         m_pChild = new CFDE_XMLCharData(m_ws1);
    140         m_pParent->InsertChildNode(m_pChild);
    141         m_pChild = m_pParent;
    142         break;
    143       case FDE_XmlSyntaxResult::TargetData:
    144         if (m_pChild) {
    145           if (m_pChild->GetType() != FDE_XMLNODE_Instruction) {
    146             m_syntaxParserResult = FDE_XmlSyntaxResult::Error;
    147             break;
    148           }
    149           if (!m_ws1.IsEmpty()) {
    150             static_cast<CFDE_XMLInstruction*>(m_pChild)->AppendData(m_ws1);
    151           }
    152           m_pParser->GetTargetData(m_ws1);
    153           static_cast<CFDE_XMLInstruction*>(m_pChild)->AppendData(m_ws1);
    154         }
    155         m_ws1.clear();
    156         break;
    157       default:
    158         break;
    159     }
    160     if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error ||
    161         m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) {
    162       break;
    163     }
    164     if (pPause && iCount > 500 && pPause->NeedToPauseNow()) {
    165       break;
    166     }
    167   }
    168   return (m_syntaxParserResult == FDE_XmlSyntaxResult::Error ||
    169           m_NodeStack.GetSize() != 1)
    170              ? -1
    171              : m_pParser->GetStatus();
    172 }
    173