1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "xfa/fxfa/parser/cxfa_xml_parser.h" 8 9 CXFA_XMLParser::CXFA_XMLParser(CFDE_XMLNode* pRoot, 10 const CFX_RetainPtr<IFGAS_Stream>& pStream) 11 : m_nElementStart(0), 12 m_dwCheckStatus(0), 13 m_dwCurrentCheckStatus(0), 14 m_pRoot(pRoot), 15 m_pStream(pStream), 16 m_pParser(new CFDE_XMLSyntaxParser), 17 m_pParent(pRoot), 18 m_pChild(nullptr), 19 m_NodeStack(16), 20 m_syntaxParserResult(FDE_XmlSyntaxResult::None) { 21 ASSERT(m_pParent && m_pStream); 22 m_NodeStack.Push(m_pParent); 23 m_pParser->Init(m_pStream, 32 * 1024, 1024 * 1024); 24 } 25 26 CXFA_XMLParser::~CXFA_XMLParser() { 27 m_NodeStack.RemoveAll(false); 28 m_ws1.clear(); 29 m_ws2.clear(); 30 } 31 32 int32_t CXFA_XMLParser::DoParser(IFX_Pause* pPause) { 33 if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error) 34 return -1; 35 if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) 36 return 100; 37 38 int32_t iCount = 0; 39 while (true) { 40 m_syntaxParserResult = m_pParser->DoSyntaxParse(); 41 switch (m_syntaxParserResult) { 42 case FDE_XmlSyntaxResult::InstructionOpen: 43 break; 44 case FDE_XmlSyntaxResult::InstructionClose: 45 if (m_pChild) { 46 if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { 47 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 48 break; 49 } 50 } 51 m_pChild = m_pParent; 52 break; 53 case FDE_XmlSyntaxResult::ElementOpen: 54 if (m_dwCheckStatus != 0x03 && m_NodeStack.GetSize() == 2) { 55 m_nElementStart = m_pParser->GetCurrentPos() - 1; 56 } 57 break; 58 case FDE_XmlSyntaxResult::ElementBreak: 59 break; 60 case FDE_XmlSyntaxResult::ElementClose: 61 if (m_pChild->GetType() != FDE_XMLNODE_Element) { 62 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 63 break; 64 } 65 m_pParser->GetTagName(m_ws1); 66 static_cast<CFDE_XMLElement*>(m_pChild)->GetTagName(m_ws2); 67 if (m_ws1.GetLength() > 0 && m_ws1 != m_ws2) { 68 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 69 break; 70 } 71 m_NodeStack.Pop(); 72 if (m_NodeStack.GetSize() < 1) { 73 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 74 break; 75 } else if (m_dwCurrentCheckStatus != 0 && m_NodeStack.GetSize() == 2) { 76 m_nSize[m_dwCurrentCheckStatus - 1] = 77 m_pParser->GetCurrentBinaryPos() - 78 m_nStart[m_dwCurrentCheckStatus - 1]; 79 m_dwCurrentCheckStatus = 0; 80 } 81 82 m_pParent = static_cast<CFDE_XMLNode*>(*m_NodeStack.GetTopElement()); 83 m_pChild = m_pParent; 84 iCount++; 85 break; 86 case FDE_XmlSyntaxResult::TargetName: 87 m_pParser->GetTargetName(m_ws1); 88 if (m_ws1 == L"originalXFAVersion" || m_ws1 == L"acrobat") { 89 m_pChild = new CFDE_XMLInstruction(m_ws1); 90 m_pParent->InsertChildNode(m_pChild); 91 } else { 92 m_pChild = nullptr; 93 } 94 m_ws1.clear(); 95 break; 96 case FDE_XmlSyntaxResult::TagName: 97 m_pParser->GetTagName(m_ws1); 98 m_pChild = new CFDE_XMLElement(m_ws1); 99 m_pParent->InsertChildNode(m_pChild); 100 m_NodeStack.Push(m_pChild); 101 m_pParent = m_pChild; 102 103 if (m_dwCheckStatus != 0x03 && m_NodeStack.GetSize() == 3) { 104 CFX_WideString wsTag; 105 static_cast<CFDE_XMLElement*>(m_pChild)->GetLocalTagName(wsTag); 106 if (wsTag == L"template") { 107 m_dwCheckStatus |= 0x01; 108 m_dwCurrentCheckStatus = 0x01; 109 m_nStart[0] = m_pParser->GetCurrentBinaryPos() - 110 (m_pParser->GetCurrentPos() - m_nElementStart); 111 } else if (wsTag == L"datasets") { 112 m_dwCheckStatus |= 0x02; 113 m_dwCurrentCheckStatus = 0x02; 114 m_nStart[1] = m_pParser->GetCurrentBinaryPos() - 115 (m_pParser->GetCurrentPos() - m_nElementStart); 116 } 117 } 118 break; 119 case FDE_XmlSyntaxResult::AttriName: 120 m_pParser->GetAttributeName(m_ws1); 121 break; 122 case FDE_XmlSyntaxResult::AttriValue: 123 if (m_pChild) { 124 m_pParser->GetAttributeName(m_ws2); 125 if (m_pChild->GetType() == FDE_XMLNODE_Element) { 126 static_cast<CFDE_XMLElement*>(m_pChild)->SetString(m_ws1, m_ws2); 127 } 128 } 129 m_ws1.clear(); 130 break; 131 case FDE_XmlSyntaxResult::Text: 132 m_pParser->GetTextData(m_ws1); 133 m_pChild = new CFDE_XMLText(m_ws1); 134 m_pParent->InsertChildNode(m_pChild); 135 m_pChild = m_pParent; 136 break; 137 case FDE_XmlSyntaxResult::CData: 138 m_pParser->GetTextData(m_ws1); 139 m_pChild = new CFDE_XMLCharData(m_ws1); 140 m_pParent->InsertChildNode(m_pChild); 141 m_pChild = m_pParent; 142 break; 143 case FDE_XmlSyntaxResult::TargetData: 144 if (m_pChild) { 145 if (m_pChild->GetType() != FDE_XMLNODE_Instruction) { 146 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 147 break; 148 } 149 if (!m_ws1.IsEmpty()) { 150 static_cast<CFDE_XMLInstruction*>(m_pChild)->AppendData(m_ws1); 151 } 152 m_pParser->GetTargetData(m_ws1); 153 static_cast<CFDE_XMLInstruction*>(m_pChild)->AppendData(m_ws1); 154 } 155 m_ws1.clear(); 156 break; 157 default: 158 break; 159 } 160 if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || 161 m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) { 162 break; 163 } 164 if (pPause && iCount > 500 && pPause->NeedToPauseNow()) { 165 break; 166 } 167 } 168 return (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || 169 m_NodeStack.GetSize() != 1) 170 ? -1 171 : m_pParser->GetStatus(); 172 } 173