1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "xfa/fxfa/parser/cxfa_simple_parser.h" 8 9 #include <utility> 10 #include <vector> 11 12 #include "core/fxcrt/cfx_checksumcontext.h" 13 #include "core/fxcrt/cfx_seekablestreamproxy.h" 14 #include "core/fxcrt/cfx_widetextbuf.h" 15 #include "core/fxcrt/fx_codepage.h" 16 #include "core/fxcrt/fx_extension.h" 17 #include "core/fxcrt/xml/cfx_xmlchardata.h" 18 #include "core/fxcrt/xml/cfx_xmldoc.h" 19 #include "core/fxcrt/xml/cfx_xmlelement.h" 20 #include "core/fxcrt/xml/cfx_xmlinstruction.h" 21 #include "core/fxcrt/xml/cfx_xmlnode.h" 22 #include "core/fxcrt/xml/cfx_xmlparser.h" 23 #include "core/fxcrt/xml/cfx_xmltext.h" 24 #include "fxjs/xfa/cjx_object.h" 25 #include "third_party/base/logging.h" 26 #include "third_party/base/ptr_util.h" 27 #include "xfa/fxfa/fxfa.h" 28 #include "xfa/fxfa/parser/cxfa_document.h" 29 #include "xfa/fxfa/parser/cxfa_node.h" 30 #include "xfa/fxfa/parser/cxfa_subform.h" 31 #include "xfa/fxfa/parser/cxfa_template.h" 32 #include "xfa/fxfa/parser/xfa_basic_data.h" 33 #include "xfa/fxfa/parser/xfa_utils.h" 34 35 namespace { 36 37 struct PacketInfo { 38 uint32_t hash; 39 const wchar_t* name; 40 XFA_PacketType packet_type; 41 const wchar_t* uri; 42 uint32_t flags; 43 }; 44 const PacketInfo PacketData[] = { 45 {0x0, nullptr, XFA_PacketType::User, nullptr, 46 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTMANY}, 47 {0x811929d, L"sourceSet", XFA_PacketType::SourceSet, 48 L"http://www.xfa.org/schema/xfa-source-set/", 49 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE}, 50 {0xb843dba, L"pdf", XFA_PacketType::Pdf, L"http://ns.adobe.com/xdp/pdf/", 51 XFA_XDPPACKET_FLAGS_COMPLETEMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE}, 52 {0xc56afbf, L"xdc", XFA_PacketType::Xdc, L"http://www.xfa.org/schema/xdc/", 53 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE}, 54 {0xc56afcc, L"xdp", XFA_PacketType::Xdp, L"http://ns.adobe.com/xdp/", 55 XFA_XDPPACKET_FLAGS_COMPLETEMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE}, 56 {0x132a8fbc, L"xmpmeta", XFA_PacketType::Xmpmeta, 57 L"http://ns.adobe.com/xmpmeta/", 58 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTMANY}, 59 {0x48d004a8, L"xfdf", XFA_PacketType::Xfdf, L"http://ns.adobe.com/xfdf/", 60 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE}, 61 {0x4e1e39b6, L"config", XFA_PacketType::Config, 62 L"http://www.xfa.org/schema/xci/", 63 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE}, 64 {0x5473b6dc, L"localeSet", XFA_PacketType::LocaleSet, 65 L"http://www.xfa.org/schema/xfa-locale-set/", 66 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE}, 67 {0x6038580a, L"stylesheet", XFA_PacketType::Stylesheet, 68 L"http://www.w3.org/1999/XSL/Transform", 69 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTMANY}, 70 {0x803550fc, L"template", XFA_PacketType::Template, 71 L"http://www.xfa.org/schema/xfa-template/", 72 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE}, 73 {0x8b036f32, L"signature", XFA_PacketType::Signature, 74 L"http://www.w3.org/2000/09/xmldsig#", 75 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE}, 76 {0x99b95079, L"datasets", XFA_PacketType::Datasets, 77 L"http://www.xfa.org/schema/xfa-data/", 78 XFA_XDPPACKET_FLAGS_PREFIXMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE}, 79 {0xcd309ff4, L"form", XFA_PacketType::Form, 80 L"http://www.xfa.org/schema/xfa-form/", 81 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE}, 82 {0xe14c801c, L"connectionSet", XFA_PacketType::ConnectionSet, 83 L"http://www.xfa.org/schema/xfa-connection-set/", 84 XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE}, 85 }; 86 87 const PacketInfo* GetPacketByIndex(XFA_PacketType ePacket) { 88 return PacketData + static_cast<uint8_t>(ePacket); 89 } 90 91 const PacketInfo* GetPacketByName(const WideStringView& wsName) { 92 if (wsName.IsEmpty()) 93 return nullptr; 94 95 uint32_t hash = FX_HashCode_GetW(wsName, false); 96 auto* elem = std::lower_bound( 97 std::begin(PacketData), std::end(PacketData), hash, 98 [](const PacketInfo& a, uint32_t hash) { return a.hash < hash; }); 99 if (elem != std::end(PacketData) && elem->hash == hash) 100 return elem; 101 return nullptr; 102 } 103 104 CFX_XMLNode* GetDocumentNode(CFX_XMLDoc* pXMLDoc, 105 bool bVerifyWellFormness = false) { 106 if (!pXMLDoc) 107 return nullptr; 108 109 for (CFX_XMLNode* pXMLNode = 110 pXMLDoc->GetRoot()->GetNodeItem(CFX_XMLNode::FirstChild); 111 pXMLNode; pXMLNode = pXMLNode->GetNodeItem(CFX_XMLNode::NextSibling)) { 112 if (pXMLNode->GetType() != FX_XMLNODE_Element) 113 continue; 114 115 if (!bVerifyWellFormness) 116 return pXMLNode; 117 118 for (CFX_XMLNode* pNextNode = 119 pXMLNode->GetNodeItem(CFX_XMLNode::NextSibling); 120 pNextNode; 121 pNextNode = pNextNode->GetNodeItem(CFX_XMLNode::NextSibling)) { 122 if (pNextNode->GetType() == FX_XMLNODE_Element) 123 return nullptr; 124 } 125 return pXMLNode; 126 } 127 return nullptr; 128 } 129 130 WideString GetElementTagNamespaceURI(CFX_XMLElement* pElement) { 131 WideString wsNodeStr = pElement->GetNamespacePrefix(); 132 WideString wsNamespaceURI; 133 if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNodeStr, 134 &wsNamespaceURI)) { 135 return WideString(); 136 } 137 return wsNamespaceURI; 138 } 139 140 bool MatchNodeName(CFX_XMLNode* pNode, 141 const WideStringView& wsLocalTagName, 142 const WideStringView& wsNamespaceURIPrefix, 143 uint32_t eMatchFlags = XFA_XDPPACKET_FLAGS_NOMATCH) { 144 if (!pNode || pNode->GetType() != FX_XMLNODE_Element) 145 return false; 146 147 CFX_XMLElement* pElement = reinterpret_cast<CFX_XMLElement*>(pNode); 148 WideString wsNodeStr = pElement->GetLocalTagName(); 149 if (wsNodeStr != wsLocalTagName) 150 return false; 151 152 wsNodeStr = GetElementTagNamespaceURI(pElement); 153 if (eMatchFlags & XFA_XDPPACKET_FLAGS_NOMATCH) 154 return true; 155 if (eMatchFlags & XFA_XDPPACKET_FLAGS_PREFIXMATCH) { 156 return wsNodeStr.Left(wsNamespaceURIPrefix.GetLength()) == 157 wsNamespaceURIPrefix; 158 } 159 160 return wsNodeStr == wsNamespaceURIPrefix; 161 } 162 163 bool GetAttributeLocalName(const WideStringView& wsAttributeName, 164 WideString& wsLocalAttrName) { 165 WideString wsAttrName(wsAttributeName); 166 auto pos = wsAttrName.Find(L':', 0); 167 if (!pos.has_value()) { 168 wsLocalAttrName = wsAttrName; 169 return false; 170 } 171 wsLocalAttrName = wsAttrName.Right(wsAttrName.GetLength() - pos.value() - 1); 172 return true; 173 } 174 175 bool ResolveAttribute(CFX_XMLElement* pElement, 176 const WideString& wsAttrName, 177 WideString& wsLocalAttrName, 178 WideString& wsNamespaceURI) { 179 WideString wsNSPrefix; 180 if (GetAttributeLocalName(wsAttrName.AsStringView(), wsLocalAttrName)) { 181 wsNSPrefix = wsAttrName.Left(wsAttrName.GetLength() - 182 wsLocalAttrName.GetLength() - 1); 183 } 184 if (wsLocalAttrName == L"xmlns" || wsNSPrefix == L"xmlns" || 185 wsNSPrefix == L"xml") { 186 return false; 187 } 188 if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNSPrefix, 189 &wsNamespaceURI)) { 190 wsNamespaceURI.clear(); 191 return false; 192 } 193 return true; 194 } 195 196 bool FindAttributeWithNS(CFX_XMLElement* pElement, 197 const WideStringView& wsLocalAttributeName, 198 const WideStringView& wsNamespaceURIPrefix, 199 WideString& wsValue, 200 bool bMatchNSAsPrefix = false) { 201 if (!pElement) 202 return false; 203 204 WideString wsAttrNS; 205 for (auto it : pElement->GetAttributes()) { 206 auto pos = it.first.Find(L':', 0); 207 WideString wsNSPrefix; 208 if (!pos.has_value()) { 209 if (wsLocalAttributeName != it.first) 210 continue; 211 } else { 212 if (wsLocalAttributeName != 213 it.first.Right(it.first.GetLength() - pos.value() - 1)) { 214 continue; 215 } 216 wsNSPrefix = it.first.Left(pos.value()); 217 } 218 219 if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNSPrefix, 220 &wsAttrNS)) { 221 continue; 222 } 223 if (bMatchNSAsPrefix) { 224 if (wsAttrNS.Left(wsNamespaceURIPrefix.GetLength()) != 225 wsNamespaceURIPrefix) { 226 continue; 227 } 228 } else { 229 if (wsAttrNS != wsNamespaceURIPrefix) 230 continue; 231 } 232 wsValue = it.second; 233 return true; 234 } 235 return false; 236 } 237 238 CFX_XMLNode* GetDataSetsFromXDP(CFX_XMLNode* pXMLDocumentNode) { 239 const PacketInfo* datasets_packet = 240 GetPacketByIndex(XFA_PacketType::Datasets); 241 if (MatchNodeName(pXMLDocumentNode, datasets_packet->name, 242 datasets_packet->uri, datasets_packet->flags)) { 243 return pXMLDocumentNode; 244 } 245 246 const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Xdp); 247 if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri, 248 packet->flags)) { 249 return nullptr; 250 } 251 252 for (CFX_XMLNode* pDatasetsNode = 253 pXMLDocumentNode->GetNodeItem(CFX_XMLNode::FirstChild); 254 pDatasetsNode; 255 pDatasetsNode = pDatasetsNode->GetNodeItem(CFX_XMLNode::NextSibling)) { 256 if (MatchNodeName(pDatasetsNode, datasets_packet->name, 257 datasets_packet->uri, datasets_packet->flags)) { 258 return pDatasetsNode; 259 } 260 } 261 return nullptr; 262 } 263 264 bool IsStringAllWhitespace(WideString wsText) { 265 wsText.TrimRight(L"\x20\x9\xD\xA"); 266 return wsText.IsEmpty(); 267 } 268 269 void ConvertXMLToPlainText(CFX_XMLElement* pRootXMLNode, WideString& wsOutput) { 270 for (CFX_XMLNode* pXMLChild = 271 pRootXMLNode->GetNodeItem(CFX_XMLNode::FirstChild); 272 pXMLChild; 273 pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) { 274 switch (pXMLChild->GetType()) { 275 case FX_XMLNODE_Element: { 276 WideString wsTextData = 277 static_cast<CFX_XMLElement*>(pXMLChild)->GetTextData(); 278 wsTextData += L"\n"; 279 wsOutput += wsTextData; 280 break; 281 } 282 case FX_XMLNODE_Text: 283 case FX_XMLNODE_CharData: { 284 WideString wsText = static_cast<CFX_XMLText*>(pXMLChild)->GetText(); 285 if (IsStringAllWhitespace(wsText)) 286 continue; 287 288 wsOutput = wsText; 289 break; 290 } 291 default: 292 NOTREACHED(); 293 break; 294 } 295 } 296 } 297 298 WideString GetPlainTextFromRichText(CFX_XMLNode* pXMLNode) { 299 if (!pXMLNode) 300 return L""; 301 302 WideString wsPlainText; 303 switch (pXMLNode->GetType()) { 304 case FX_XMLNODE_Element: { 305 CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode); 306 WideString wsTag = pXMLElement->GetLocalTagName(); 307 uint32_t uTag = FX_HashCode_GetW(wsTag.AsStringView(), true); 308 if (uTag == 0x0001f714) { 309 wsPlainText += L"\n"; 310 } else if (uTag == 0x00000070) { 311 if (!wsPlainText.IsEmpty()) { 312 wsPlainText += L"\n"; 313 } 314 } else if (uTag == 0xa48ac63) { 315 if (!wsPlainText.IsEmpty() && 316 wsPlainText[wsPlainText.GetLength() - 1] != '\n') { 317 wsPlainText += L"\n"; 318 } 319 } 320 break; 321 } 322 case FX_XMLNODE_Text: 323 case FX_XMLNODE_CharData: { 324 WideString wsContent = static_cast<CFX_XMLText*>(pXMLNode)->GetText(); 325 wsPlainText += wsContent; 326 break; 327 } 328 default: 329 break; 330 } 331 for (CFX_XMLNode* pChildXML = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild); 332 pChildXML; 333 pChildXML = pChildXML->GetNodeItem(CFX_XMLNode::NextSibling)) { 334 wsPlainText += GetPlainTextFromRichText(pChildXML); 335 } 336 337 return wsPlainText; 338 } 339 340 } // namespace 341 342 bool XFA_RecognizeRichText(CFX_XMLElement* pRichTextXMLNode) { 343 return pRichTextXMLNode && GetElementTagNamespaceURI(pRichTextXMLNode) == 344 L"http://www.w3.org/1999/xhtml"; 345 } 346 347 CXFA_SimpleParser::CXFA_SimpleParser() : m_bDocumentParser(true) {} 348 349 CXFA_SimpleParser::CXFA_SimpleParser(CXFA_Document* pFactory) 350 : m_pFactory(pFactory), m_bDocumentParser(false) {} 351 352 CXFA_SimpleParser::~CXFA_SimpleParser() {} 353 354 void CXFA_SimpleParser::SetFactory(CXFA_Document* pFactory) { 355 ASSERT(m_bDocumentParser); 356 m_pFactory = pFactory; 357 } 358 359 int32_t CXFA_SimpleParser::StartParse( 360 const RetainPtr<IFX_SeekableStream>& pStream, 361 XFA_PacketType ePacketID) { 362 CloseParser(); 363 m_pFileRead = pStream; 364 m_pStream = pdfium::MakeRetain<CFX_SeekableStreamProxy>(pStream, false); 365 uint16_t wCodePage = m_pStream->GetCodePage(); 366 if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE && 367 wCodePage != FX_CODEPAGE_UTF8) { 368 m_pStream->SetCodePage(FX_CODEPAGE_UTF8); 369 } 370 m_pXMLDoc = pdfium::MakeUnique<CFX_XMLDoc>(); 371 auto pNewParser = 372 pdfium::MakeUnique<CFX_XMLParser>(m_pXMLDoc->GetRoot(), m_pStream); 373 m_pXMLParser = pNewParser.get(); 374 if (!m_pXMLDoc->LoadXML(std::move(pNewParser))) 375 return XFA_PARSESTATUS_StatusErr; 376 377 m_bParseStarted = true; 378 m_ePacketID = ePacketID; 379 return XFA_PARSESTATUS_Ready; 380 } 381 382 int32_t CXFA_SimpleParser::DoParse() { 383 if (!m_pXMLDoc || !m_bParseStarted) 384 return XFA_PARSESTATUS_StatusErr; 385 386 int32_t iRet = m_pXMLDoc->DoLoad(); 387 if (iRet < 0) 388 return XFA_PARSESTATUS_SyntaxErr; 389 if (iRet < 100) 390 return iRet / 2; 391 392 m_pRootNode = ParseAsXDPPacket(GetDocumentNode(m_pXMLDoc.get()), m_ePacketID); 393 m_pXMLParser.Release(); 394 m_pXMLDoc->CloseXML(); 395 m_pStream.Reset(); 396 397 if (!m_pRootNode) 398 return XFA_PARSESTATUS_StatusErr; 399 400 return XFA_PARSESTATUS_Done; 401 } 402 403 CFX_XMLNode* CXFA_SimpleParser::ParseXMLData(const ByteString& wsXML) { 404 CloseParser(); 405 m_pXMLDoc = pdfium::MakeUnique<CFX_XMLDoc>(); 406 407 auto pStream = pdfium::MakeRetain<CFX_SeekableStreamProxy>( 408 const_cast<uint8_t*>(wsXML.raw_str()), wsXML.GetLength()); 409 auto pParser = 410 pdfium::MakeUnique<CFX_XMLParser>(m_pXMLDoc->GetRoot(), pStream); 411 pParser->m_dwCheckStatus = 0x03; 412 if (!m_pXMLDoc->LoadXML(std::move(pParser))) 413 return nullptr; 414 415 int32_t iRet = m_pXMLDoc->DoLoad(); 416 if (iRet < 0 || iRet >= 100) 417 m_pXMLDoc->CloseXML(); 418 return iRet < 100 ? nullptr : GetDocumentNode(m_pXMLDoc.get()); 419 } 420 421 void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode, 422 CFX_XMLNode* pXMLNode) { 423 XFA_PacketType ePacketID = pXFANode->GetPacketType(); 424 if (ePacketID == XFA_PacketType::Datasets) { 425 if (pXFANode->GetElementType() == XFA_Element::DataValue) { 426 for (CFX_XMLNode* pXMLChild = 427 pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild); 428 pXMLChild; 429 pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) { 430 FX_XMLNODETYPE eNodeType = pXMLChild->GetType(); 431 if (eNodeType == FX_XMLNODE_Instruction) 432 continue; 433 434 if (eNodeType == FX_XMLNODE_Element) { 435 CXFA_Node* pXFAChild = m_pFactory->CreateNode( 436 XFA_PacketType::Datasets, XFA_Element::DataValue); 437 if (!pXFAChild) 438 return; 439 440 CFX_XMLElement* child = static_cast<CFX_XMLElement*>(pXMLChild); 441 WideString wsNodeStr = child->GetLocalTagName(); 442 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, wsNodeStr, false, 443 false); 444 WideString wsChildValue = GetPlainTextFromRichText(child); 445 if (!wsChildValue.IsEmpty()) 446 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsChildValue, 447 false, false); 448 449 pXFANode->InsertChild(pXFAChild, nullptr); 450 pXFAChild->SetXMLMappingNode(pXMLChild); 451 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); 452 break; 453 } 454 } 455 m_pRootNode = pXFANode; 456 } else { 457 m_pRootNode = DataLoader(pXFANode, pXMLNode, true); 458 } 459 } else if (pXFANode->IsContentNode()) { 460 ParseContentNode(pXFANode, pXMLNode, ePacketID); 461 m_pRootNode = pXFANode; 462 } else { 463 m_pRootNode = NormalLoader(pXFANode, pXMLNode, ePacketID, true); 464 } 465 } 466 467 CXFA_Node* CXFA_SimpleParser::GetRootNode() const { 468 return m_pRootNode; 469 } 470 471 CFX_XMLDoc* CXFA_SimpleParser::GetXMLDoc() const { 472 return m_pXMLDoc.get(); 473 } 474 475 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket(CFX_XMLNode* pXMLDocumentNode, 476 XFA_PacketType ePacketID) { 477 switch (ePacketID) { 478 case XFA_PacketType::Xdp: 479 return ParseAsXDPPacket_XDP(pXMLDocumentNode); 480 case XFA_PacketType::Config: 481 return ParseAsXDPPacket_Config(pXMLDocumentNode); 482 case XFA_PacketType::Template: 483 return ParseAsXDPPacket_Template(pXMLDocumentNode); 484 case XFA_PacketType::Form: 485 return ParseAsXDPPacket_Form(pXMLDocumentNode); 486 case XFA_PacketType::Datasets: 487 return ParseAsXDPPacket_Data(pXMLDocumentNode); 488 case XFA_PacketType::Xdc: 489 return ParseAsXDPPacket_Xdc(pXMLDocumentNode); 490 case XFA_PacketType::LocaleSet: 491 return ParseAsXDPPacket_LocaleConnectionSourceSet( 492 pXMLDocumentNode, XFA_PacketType::LocaleSet, XFA_Element::LocaleSet); 493 case XFA_PacketType::ConnectionSet: 494 return ParseAsXDPPacket_LocaleConnectionSourceSet( 495 pXMLDocumentNode, XFA_PacketType::ConnectionSet, 496 XFA_Element::ConnectionSet); 497 case XFA_PacketType::SourceSet: 498 return ParseAsXDPPacket_LocaleConnectionSourceSet( 499 pXMLDocumentNode, XFA_PacketType::SourceSet, XFA_Element::SourceSet); 500 default: 501 return ParseAsXDPPacket_User(pXMLDocumentNode); 502 } 503 } 504 505 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_XDP( 506 CFX_XMLNode* pXMLDocumentNode) { 507 const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Xdp); 508 if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri, 509 packet->flags)) { 510 return nullptr; 511 } 512 513 CXFA_Node* pXFARootNode = 514 m_pFactory->CreateNode(XFA_PacketType::Xdp, XFA_Element::Xfa); 515 if (!pXFARootNode) 516 return nullptr; 517 518 m_pRootNode = pXFARootNode; 519 pXFARootNode->JSObject()->SetCData(XFA_Attribute::Name, L"xfa", false, false); 520 521 CFX_XMLElement* pElement = static_cast<CFX_XMLElement*>(pXMLDocumentNode); 522 for (auto it : pElement->GetAttributes()) { 523 if (it.first == L"uuid") 524 pXFARootNode->JSObject()->SetCData(XFA_Attribute::Uuid, it.second, false, 525 false); 526 else if (it.first == L"timeStamp") 527 pXFARootNode->JSObject()->SetCData(XFA_Attribute::TimeStamp, it.second, 528 false, false); 529 } 530 531 CFX_XMLNode* pXMLConfigDOMRoot = nullptr; 532 CXFA_Node* pXFAConfigDOMRoot = nullptr; 533 for (CFX_XMLNode* pChildItem = 534 pXMLDocumentNode->GetNodeItem(CFX_XMLNode::FirstChild); 535 pChildItem; 536 pChildItem = pChildItem->GetNodeItem(CFX_XMLNode::NextSibling)) { 537 const PacketInfo* pPacketInfo = GetPacketByIndex(XFA_PacketType::Config); 538 if (!MatchNodeName(pChildItem, pPacketInfo->name, pPacketInfo->uri, 539 pPacketInfo->flags)) { 540 continue; 541 } 542 if (pXFARootNode->GetFirstChildByName(pPacketInfo->hash)) 543 return nullptr; 544 545 pXMLConfigDOMRoot = pChildItem; 546 pXFAConfigDOMRoot = ParseAsXDPPacket_Config(pXMLConfigDOMRoot); 547 if (pXFAConfigDOMRoot) 548 pXFARootNode->InsertChild(pXFAConfigDOMRoot, nullptr); 549 } 550 551 CFX_XMLNode* pXMLDatasetsDOMRoot = nullptr; 552 CFX_XMLNode* pXMLFormDOMRoot = nullptr; 553 CFX_XMLNode* pXMLTemplateDOMRoot = nullptr; 554 for (CFX_XMLNode* pChildItem = 555 pXMLDocumentNode->GetNodeItem(CFX_XMLNode::FirstChild); 556 pChildItem; 557 pChildItem = pChildItem->GetNodeItem(CFX_XMLNode::NextSibling)) { 558 if (!pChildItem || pChildItem->GetType() != FX_XMLNODE_Element) 559 continue; 560 if (pChildItem == pXMLConfigDOMRoot) 561 continue; 562 563 CFX_XMLElement* pElement = reinterpret_cast<CFX_XMLElement*>(pChildItem); 564 WideString wsPacketName = pElement->GetLocalTagName(); 565 const PacketInfo* pPacketInfo = 566 GetPacketByName(wsPacketName.AsStringView()); 567 if (pPacketInfo && pPacketInfo->uri) { 568 if (!MatchNodeName(pElement, pPacketInfo->name, pPacketInfo->uri, 569 pPacketInfo->flags)) { 570 pPacketInfo = nullptr; 571 } 572 } 573 XFA_PacketType ePacket = 574 pPacketInfo ? pPacketInfo->packet_type : XFA_PacketType::User; 575 if (ePacket == XFA_PacketType::Xdp) 576 continue; 577 if (ePacket == XFA_PacketType::Datasets) { 578 if (pXMLDatasetsDOMRoot) 579 return nullptr; 580 581 pXMLDatasetsDOMRoot = pElement; 582 } else if (ePacket == XFA_PacketType::Form) { 583 if (pXMLFormDOMRoot) 584 return nullptr; 585 586 pXMLFormDOMRoot = pElement; 587 } else if (ePacket == XFA_PacketType::Template) { 588 // Found a duplicate template packet. 589 if (pXMLTemplateDOMRoot) 590 return nullptr; 591 592 CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket); 593 if (pPacketNode) { 594 pXMLTemplateDOMRoot = pElement; 595 pXFARootNode->InsertChild(pPacketNode, nullptr); 596 } 597 } else { 598 CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket); 599 if (pPacketNode) { 600 if (pPacketInfo && 601 (pPacketInfo->flags & XFA_XDPPACKET_FLAGS_SUPPORTONE) && 602 pXFARootNode->GetFirstChildByName(pPacketInfo->hash)) { 603 return nullptr; 604 } 605 pXFARootNode->InsertChild(pPacketNode, nullptr); 606 } 607 } 608 } 609 610 // No template is found. 611 if (!pXMLTemplateDOMRoot) 612 return nullptr; 613 614 if (pXMLDatasetsDOMRoot) { 615 CXFA_Node* pPacketNode = 616 ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_PacketType::Datasets); 617 if (pPacketNode) 618 pXFARootNode->InsertChild(pPacketNode, nullptr); 619 } 620 if (pXMLFormDOMRoot) { 621 CXFA_Node* pPacketNode = 622 ParseAsXDPPacket(pXMLFormDOMRoot, XFA_PacketType::Form); 623 if (pPacketNode) 624 pXFARootNode->InsertChild(pPacketNode, nullptr); 625 } 626 627 pXFARootNode->SetXMLMappingNode(pXMLDocumentNode); 628 return pXFARootNode; 629 } 630 631 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Config( 632 CFX_XMLNode* pXMLDocumentNode) { 633 const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Config); 634 if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri, 635 packet->flags)) { 636 return nullptr; 637 } 638 CXFA_Node* pNode = 639 m_pFactory->CreateNode(XFA_PacketType::Config, XFA_Element::Config); 640 if (!pNode) 641 return nullptr; 642 643 pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false); 644 if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Config, true)) 645 return nullptr; 646 647 pNode->SetXMLMappingNode(pXMLDocumentNode); 648 return pNode; 649 } 650 651 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Template( 652 CFX_XMLNode* pXMLDocumentNode) { 653 const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Template); 654 if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri, 655 packet->flags)) { 656 return nullptr; 657 } 658 659 CXFA_Node* pNode = 660 m_pFactory->CreateNode(XFA_PacketType::Template, XFA_Element::Template); 661 if (!pNode) 662 return nullptr; 663 664 pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false); 665 if (m_bDocumentParser) { 666 CFX_XMLElement* pXMLDocumentElement = 667 static_cast<CFX_XMLElement*>(pXMLDocumentNode); 668 WideString wsNamespaceURI = pXMLDocumentElement->GetNamespaceURI(); 669 if (wsNamespaceURI.IsEmpty()) 670 wsNamespaceURI = pXMLDocumentElement->GetString(L"xmlns:xfa"); 671 672 pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI); 673 } 674 if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Template, true)) 675 return nullptr; 676 677 pNode->SetXMLMappingNode(pXMLDocumentNode); 678 return pNode; 679 } 680 681 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Form( 682 CFX_XMLNode* pXMLDocumentNode) { 683 const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Form); 684 if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri, 685 packet->flags)) { 686 return nullptr; 687 } 688 689 CFX_XMLElement* pXMLDocumentElement = 690 static_cast<CFX_XMLElement*>(pXMLDocumentNode); 691 WideString wsChecksum = pXMLDocumentElement->GetString(L"checksum"); 692 if (wsChecksum.GetLength() != 28 || m_pXMLParser->m_dwCheckStatus != 0x03) { 693 return nullptr; 694 } 695 696 auto pChecksum = pdfium::MakeUnique<CFX_ChecksumContext>(); 697 pChecksum->StartChecksum(); 698 pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[0], 699 m_pXMLParser->m_nSize[0]); 700 pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[1], 701 m_pXMLParser->m_nSize[1]); 702 pChecksum->FinishChecksum(); 703 ByteString bsCheck = pChecksum->GetChecksum(); 704 if (bsCheck != wsChecksum.UTF8Encode()) 705 return nullptr; 706 707 CXFA_Node* pNode = 708 m_pFactory->CreateNode(XFA_PacketType::Form, XFA_Element::Form); 709 if (!pNode) 710 return nullptr; 711 712 pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false); 713 pNode->JSObject()->SetAttribute(XFA_Attribute::Checksum, 714 wsChecksum.AsStringView(), false); 715 CXFA_Template* pTemplateRoot = 716 m_pRootNode->GetFirstChildByClass<CXFA_Template>(XFA_Element::Template); 717 CXFA_Subform* pTemplateChosen = 718 pTemplateRoot ? pTemplateRoot->GetFirstChildByClass<CXFA_Subform>( 719 XFA_Element::Subform) 720 : nullptr; 721 bool bUseAttribute = true; 722 if (pTemplateChosen && 723 pTemplateChosen->JSObject()->GetEnum(XFA_Attribute::RestoreState) != 724 XFA_AttributeEnum::Auto) { 725 bUseAttribute = false; 726 } 727 if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Form, 728 bUseAttribute)) 729 return nullptr; 730 731 pNode->SetXMLMappingNode(pXMLDocumentNode); 732 return pNode; 733 } 734 735 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Data( 736 CFX_XMLNode* pXMLDocumentNode) { 737 CFX_XMLNode* pDatasetsXMLNode = GetDataSetsFromXDP(pXMLDocumentNode); 738 const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Datasets); 739 if (pDatasetsXMLNode) { 740 CXFA_Node* pNode = m_pFactory->CreateNode(XFA_PacketType::Datasets, 741 XFA_Element::DataModel); 742 if (!pNode) 743 return nullptr; 744 745 pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, 746 false); 747 if (!DataLoader(pNode, pDatasetsXMLNode, false)) 748 return nullptr; 749 750 pNode->SetXMLMappingNode(pDatasetsXMLNode); 751 return pNode; 752 } 753 754 CFX_XMLNode* pDataXMLNode = nullptr; 755 if (MatchNodeName(pXMLDocumentNode, L"data", packet->uri, packet->flags)) { 756 static_cast<CFX_XMLElement*>(pXMLDocumentNode) 757 ->RemoveAttribute(L"xmlns:xfa"); 758 pDataXMLNode = pXMLDocumentNode; 759 } else { 760 CFX_XMLElement* pDataElement = new CFX_XMLElement(L"xfa:data"); 761 CFX_XMLNode* pParentXMLNode = 762 pXMLDocumentNode->GetNodeItem(CFX_XMLNode::Parent); 763 if (pParentXMLNode) 764 pParentXMLNode->RemoveChildNode(pXMLDocumentNode); 765 766 ASSERT(pXMLDocumentNode->GetType() == FX_XMLNODE_Element); 767 if (pXMLDocumentNode->GetType() == FX_XMLNODE_Element) { 768 static_cast<CFX_XMLElement*>(pXMLDocumentNode) 769 ->RemoveAttribute(L"xmlns:xfa"); 770 } 771 pDataElement->InsertChildNode(pXMLDocumentNode); 772 pDataXMLNode = pDataElement; 773 } 774 775 if (pDataXMLNode) { 776 CXFA_Node* pNode = m_pFactory->CreateNode(XFA_PacketType::Datasets, 777 XFA_Element::DataGroup); 778 if (!pNode) { 779 if (pDataXMLNode != pXMLDocumentNode) 780 delete pDataXMLNode; 781 return nullptr; 782 } 783 WideString wsLocalName = 784 static_cast<CFX_XMLElement*>(pDataXMLNode)->GetLocalTagName(); 785 pNode->JSObject()->SetCData(XFA_Attribute::Name, wsLocalName, false, false); 786 if (!DataLoader(pNode, pDataXMLNode, true)) 787 return nullptr; 788 789 pNode->SetXMLMappingNode(pDataXMLNode); 790 if (pDataXMLNode != pXMLDocumentNode) 791 pNode->SetFlag(XFA_NodeFlag_OwnXMLNode, false); 792 return pNode; 793 } 794 return nullptr; 795 } 796 797 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_LocaleConnectionSourceSet( 798 CFX_XMLNode* pXMLDocumentNode, 799 XFA_PacketType packet_type, 800 XFA_Element element) { 801 const PacketInfo* packet = GetPacketByIndex(packet_type); 802 if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri, 803 packet->flags)) { 804 return nullptr; 805 } 806 807 CXFA_Node* pNode = m_pFactory->CreateNode(packet_type, element); 808 if (!pNode) 809 return nullptr; 810 811 pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false); 812 if (!NormalLoader(pNode, pXMLDocumentNode, packet_type, true)) 813 return nullptr; 814 815 pNode->SetXMLMappingNode(pXMLDocumentNode); 816 return pNode; 817 } 818 819 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Xdc( 820 CFX_XMLNode* pXMLDocumentNode) { 821 const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Xdc); 822 if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri, 823 packet->flags)) 824 return nullptr; 825 826 CXFA_Node* pNode = 827 m_pFactory->CreateNode(XFA_PacketType::Xdc, XFA_Element::Xdc); 828 if (!pNode) 829 return nullptr; 830 831 pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false); 832 pNode->SetXMLMappingNode(pXMLDocumentNode); 833 return pNode; 834 } 835 836 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_User( 837 CFX_XMLNode* pXMLDocumentNode) { 838 CXFA_Node* pNode = 839 m_pFactory->CreateNode(XFA_PacketType::Xdp, XFA_Element::Packet); 840 if (!pNode) 841 return nullptr; 842 843 WideString wsName = 844 static_cast<CFX_XMLElement*>(pXMLDocumentNode)->GetLocalTagName(); 845 pNode->JSObject()->SetCData(XFA_Attribute::Name, wsName, false, false); 846 if (!UserPacketLoader(pNode, pXMLDocumentNode)) 847 return nullptr; 848 849 pNode->SetXMLMappingNode(pXMLDocumentNode); 850 return pNode; 851 } 852 853 CXFA_Node* CXFA_SimpleParser::UserPacketLoader(CXFA_Node* pXFANode, 854 CFX_XMLNode* pXMLDoc) { 855 return pXFANode; 856 } 857 858 CXFA_Node* CXFA_SimpleParser::DataLoader(CXFA_Node* pXFANode, 859 CFX_XMLNode* pXMLDoc, 860 bool bDoTransform) { 861 ParseDataGroup(pXFANode, pXMLDoc, XFA_PacketType::Datasets); 862 return pXFANode; 863 } 864 865 CXFA_Node* CXFA_SimpleParser::NormalLoader(CXFA_Node* pXFANode, 866 CFX_XMLNode* pXMLDoc, 867 XFA_PacketType ePacketID, 868 bool bUseAttribute) { 869 bool bOneOfPropertyFound = false; 870 for (CFX_XMLNode* pXMLChild = pXMLDoc->GetNodeItem(CFX_XMLNode::FirstChild); 871 pXMLChild; 872 pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) { 873 switch (pXMLChild->GetType()) { 874 case FX_XMLNODE_Element: { 875 CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLChild); 876 WideString wsTagName = pXMLElement->GetLocalTagName(); 877 XFA_Element eType = CXFA_Node::NameToElement(wsTagName); 878 if (eType == XFA_Element::Unknown) 879 continue; 880 881 if (pXFANode->HasPropertyFlags( 882 eType, 883 XFA_PROPERTYFLAG_OneOf | XFA_PROPERTYFLAG_DefaultOneOf)) { 884 if (bOneOfPropertyFound) 885 break; 886 bOneOfPropertyFound = true; 887 } 888 889 CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, eType); 890 if (!pXFAChild) 891 return nullptr; 892 if (ePacketID == XFA_PacketType::Config) { 893 pXFAChild->JSObject()->SetAttribute(XFA_Attribute::Name, 894 wsTagName.AsStringView(), false); 895 } 896 897 bool IsNeedValue = true; 898 for (auto it : pXMLElement->GetAttributes()) { 899 WideString wsAttrName; 900 GetAttributeLocalName(it.first.AsStringView(), wsAttrName); 901 if (wsAttrName == L"nil" && it.second == L"true") 902 IsNeedValue = false; 903 904 XFA_Attribute attr = 905 CXFA_Node::NameToAttribute(wsAttrName.AsStringView()); 906 if (attr == XFA_Attribute::Unknown) 907 continue; 908 909 if (!bUseAttribute && attr != XFA_Attribute::Name && 910 attr != XFA_Attribute::Save) { 911 continue; 912 } 913 pXFAChild->JSObject()->SetAttribute(attr, it.second.AsStringView(), 914 false); 915 } 916 pXFANode->InsertChild(pXFAChild, nullptr); 917 if (eType == XFA_Element::Validate || eType == XFA_Element::Locale) { 918 if (ePacketID == XFA_PacketType::Config) 919 ParseContentNode(pXFAChild, pXMLElement, ePacketID); 920 else 921 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute); 922 923 break; 924 } 925 switch (pXFAChild->GetObjectType()) { 926 case XFA_ObjectType::ContentNode: 927 case XFA_ObjectType::TextNode: 928 case XFA_ObjectType::NodeC: 929 case XFA_ObjectType::NodeV: 930 if (IsNeedValue) 931 ParseContentNode(pXFAChild, pXMLElement, ePacketID); 932 break; 933 default: 934 NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute); 935 break; 936 } 937 } break; 938 case FX_XMLNODE_Instruction: 939 ParseInstruction(pXFANode, static_cast<CFX_XMLInstruction*>(pXMLChild), 940 ePacketID); 941 break; 942 default: 943 break; 944 } 945 } 946 return pXFANode; 947 } 948 949 void CXFA_SimpleParser::ParseContentNode(CXFA_Node* pXFANode, 950 CFX_XMLNode* pXMLNode, 951 XFA_PacketType ePacketID) { 952 XFA_Element element = XFA_Element::Sharptext; 953 if (pXFANode->GetElementType() == XFA_Element::ExData) { 954 WideString wsContentType = 955 pXFANode->JSObject()->GetCData(XFA_Attribute::ContentType); 956 if (wsContentType == L"text/html") 957 element = XFA_Element::SharpxHTML; 958 else if (wsContentType == L"text/xml") 959 element = XFA_Element::Sharpxml; 960 } 961 if (element == XFA_Element::SharpxHTML) 962 pXFANode->SetXMLMappingNode(pXMLNode); 963 964 WideString wsValue; 965 for (CFX_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild); 966 pXMLChild; 967 pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) { 968 FX_XMLNODETYPE eNodeType = pXMLChild->GetType(); 969 if (eNodeType == FX_XMLNODE_Instruction) 970 continue; 971 972 if (element == XFA_Element::SharpxHTML) { 973 if (eNodeType != FX_XMLNODE_Element) 974 break; 975 976 if (XFA_RecognizeRichText(static_cast<CFX_XMLElement*>(pXMLChild))) 977 wsValue += 978 GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild)); 979 } else if (element == XFA_Element::Sharpxml) { 980 if (eNodeType != FX_XMLNODE_Element) 981 break; 982 983 ConvertXMLToPlainText(static_cast<CFX_XMLElement*>(pXMLChild), wsValue); 984 } else { 985 if (eNodeType == FX_XMLNODE_Element) 986 break; 987 if (eNodeType == FX_XMLNODE_Text || eNodeType == FX_XMLNODE_CharData) 988 wsValue = static_cast<CFX_XMLText*>(pXMLChild)->GetText(); 989 } 990 break; 991 } 992 if (!wsValue.IsEmpty()) { 993 if (pXFANode->IsContentNode()) { 994 CXFA_Node* pContentRawDataNode = 995 m_pFactory->CreateNode(ePacketID, element); 996 ASSERT(pContentRawDataNode); 997 pContentRawDataNode->JSObject()->SetCData(XFA_Attribute::Value, wsValue, 998 false, false); 999 pXFANode->InsertChild(pContentRawDataNode, nullptr); 1000 } else { 1001 pXFANode->JSObject()->SetCData(XFA_Attribute::Value, wsValue, false, 1002 false); 1003 } 1004 } 1005 } 1006 1007 void CXFA_SimpleParser::ParseDataGroup(CXFA_Node* pXFANode, 1008 CFX_XMLNode* pXMLNode, 1009 XFA_PacketType ePacketID) { 1010 for (CFX_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild); 1011 pXMLChild; 1012 pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) { 1013 switch (pXMLChild->GetType()) { 1014 case FX_XMLNODE_Element: { 1015 CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLChild); 1016 { 1017 WideString wsNamespaceURI = GetElementTagNamespaceURI(pXMLElement); 1018 if (wsNamespaceURI == L"http://www.xfa.com/schema/xfa-package/" || 1019 wsNamespaceURI == L"http://www.xfa.org/schema/xfa-package/" || 1020 wsNamespaceURI == L"http://www.w3.org/2001/XMLSchema-instance") { 1021 continue; 1022 } 1023 } 1024 1025 XFA_Element eNodeType = XFA_Element::DataModel; 1026 if (eNodeType == XFA_Element::DataModel) { 1027 WideString wsDataNodeAttr; 1028 if (FindAttributeWithNS(pXMLElement, L"dataNode", 1029 L"http://www.xfa.org/schema/xfa-data/1.0/", 1030 wsDataNodeAttr)) { 1031 if (wsDataNodeAttr == L"dataGroup") 1032 eNodeType = XFA_Element::DataGroup; 1033 else if (wsDataNodeAttr == L"dataValue") 1034 eNodeType = XFA_Element::DataValue; 1035 } 1036 } 1037 WideString wsContentType; 1038 if (eNodeType == XFA_Element::DataModel) { 1039 if (FindAttributeWithNS(pXMLElement, L"contentType", 1040 L"http://www.xfa.org/schema/xfa-data/1.0/", 1041 wsContentType)) { 1042 if (!wsContentType.IsEmpty()) 1043 eNodeType = XFA_Element::DataValue; 1044 } 1045 } 1046 if (eNodeType == XFA_Element::DataModel) { 1047 for (CFX_XMLNode* pXMLDataChild = 1048 pXMLElement->GetNodeItem(CFX_XMLNode::FirstChild); 1049 pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNodeItem( 1050 CFX_XMLNode::NextSibling)) { 1051 if (pXMLDataChild->GetType() == FX_XMLNODE_Element) { 1052 if (!XFA_RecognizeRichText( 1053 static_cast<CFX_XMLElement*>(pXMLDataChild))) { 1054 eNodeType = XFA_Element::DataGroup; 1055 break; 1056 } 1057 } 1058 } 1059 } 1060 if (eNodeType == XFA_Element::DataModel) 1061 eNodeType = XFA_Element::DataValue; 1062 1063 CXFA_Node* pXFAChild = 1064 m_pFactory->CreateNode(XFA_PacketType::Datasets, eNodeType); 1065 if (!pXFAChild) 1066 return; 1067 1068 pXFAChild->JSObject()->SetCData( 1069 XFA_Attribute::Name, pXMLElement->GetLocalTagName(), false, false); 1070 bool bNeedValue = true; 1071 1072 for (auto it : pXMLElement->GetAttributes()) { 1073 WideString wsName; 1074 WideString wsNS; 1075 if (!ResolveAttribute(pXMLElement, it.first, wsName, wsNS)) { 1076 continue; 1077 } 1078 if (wsName == L"nil" && it.second == L"true") { 1079 bNeedValue = false; 1080 continue; 1081 } 1082 if (wsNS == L"http://www.xfa.com/schema/xfa-package/" || 1083 wsNS == L"http://www.xfa.org/schema/xfa-package/" || 1084 wsNS == L"http://www.w3.org/2001/XMLSchema-instance" || 1085 wsNS == L"http://www.xfa.org/schema/xfa-data/1.0/") { 1086 continue; 1087 } 1088 CXFA_Node* pXFAMetaData = m_pFactory->CreateNode( 1089 XFA_PacketType::Datasets, XFA_Element::DataValue); 1090 if (!pXFAMetaData) 1091 return; 1092 1093 pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Name, wsName, false, 1094 false); 1095 pXFAMetaData->JSObject()->SetCData(XFA_Attribute::QualifiedName, 1096 it.first, false, false); 1097 pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Value, it.second, 1098 false, false); 1099 pXFAMetaData->JSObject()->SetEnum(XFA_Attribute::Contains, 1100 XFA_AttributeEnum::MetaData, false); 1101 pXFAChild->InsertChild(pXFAMetaData, nullptr); 1102 pXFAMetaData->SetXMLMappingNode(pXMLElement); 1103 pXFAMetaData->SetFlag(XFA_NodeFlag_Initialized, false); 1104 } 1105 1106 if (!bNeedValue) { 1107 WideString wsNilName(L"xsi:nil"); 1108 pXMLElement->RemoveAttribute(wsNilName.c_str()); 1109 } 1110 pXFANode->InsertChild(pXFAChild, nullptr); 1111 if (eNodeType == XFA_Element::DataGroup) 1112 ParseDataGroup(pXFAChild, pXMLElement, ePacketID); 1113 else if (bNeedValue) 1114 ParseDataValue(pXFAChild, pXMLChild, XFA_PacketType::Datasets); 1115 1116 pXFAChild->SetXMLMappingNode(pXMLElement); 1117 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); 1118 continue; 1119 } 1120 case FX_XMLNODE_CharData: 1121 case FX_XMLNODE_Text: { 1122 CFX_XMLText* pXMLText = static_cast<CFX_XMLText*>(pXMLChild); 1123 WideString wsText = pXMLText->GetText(); 1124 if (IsStringAllWhitespace(wsText)) 1125 continue; 1126 1127 CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_PacketType::Datasets, 1128 XFA_Element::DataValue); 1129 if (!pXFAChild) 1130 return; 1131 1132 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsText, false, 1133 false); 1134 pXFANode->InsertChild(pXFAChild, nullptr); 1135 pXFAChild->SetXMLMappingNode(pXMLText); 1136 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); 1137 continue; 1138 } 1139 default: 1140 continue; 1141 } 1142 } 1143 } 1144 1145 void CXFA_SimpleParser::ParseDataValue(CXFA_Node* pXFANode, 1146 CFX_XMLNode* pXMLNode, 1147 XFA_PacketType ePacketID) { 1148 CFX_WideTextBuf wsValueTextBuf; 1149 CFX_WideTextBuf wsCurValueTextBuf; 1150 bool bMarkAsCompound = false; 1151 CFX_XMLNode* pXMLCurValueNode = nullptr; 1152 for (CFX_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild); 1153 pXMLChild; 1154 pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) { 1155 FX_XMLNODETYPE eNodeType = pXMLChild->GetType(); 1156 if (eNodeType == FX_XMLNODE_Instruction) 1157 continue; 1158 1159 if (eNodeType == FX_XMLNODE_Text || eNodeType == FX_XMLNODE_CharData) { 1160 WideString wsText = static_cast<CFX_XMLText*>(pXMLChild)->GetText(); 1161 if (!pXMLCurValueNode) 1162 pXMLCurValueNode = pXMLChild; 1163 1164 wsCurValueTextBuf << wsText; 1165 } else if (XFA_RecognizeRichText(static_cast<CFX_XMLElement*>(pXMLChild))) { 1166 WideString wsText = 1167 GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild)); 1168 if (!pXMLCurValueNode) 1169 pXMLCurValueNode = pXMLChild; 1170 1171 wsCurValueTextBuf << wsText; 1172 } else { 1173 bMarkAsCompound = true; 1174 if (pXMLCurValueNode) { 1175 WideString wsCurValue = wsCurValueTextBuf.MakeString(); 1176 if (!wsCurValue.IsEmpty()) { 1177 CXFA_Node* pXFAChild = 1178 m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue); 1179 if (!pXFAChild) 1180 return; 1181 1182 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, L"", false, 1183 false); 1184 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsCurValue, 1185 false, false); 1186 pXFANode->InsertChild(pXFAChild, nullptr); 1187 pXFAChild->SetXMLMappingNode(pXMLCurValueNode); 1188 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); 1189 wsValueTextBuf << wsCurValue; 1190 wsCurValueTextBuf.Clear(); 1191 } 1192 pXMLCurValueNode = nullptr; 1193 } 1194 CXFA_Node* pXFAChild = 1195 m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue); 1196 if (!pXFAChild) 1197 return; 1198 1199 WideString wsNodeStr = 1200 static_cast<CFX_XMLElement*>(pXMLChild)->GetLocalTagName(); 1201 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, wsNodeStr, false, 1202 false); 1203 ParseDataValue(pXFAChild, pXMLChild, ePacketID); 1204 pXFANode->InsertChild(pXFAChild, nullptr); 1205 pXFAChild->SetXMLMappingNode(pXMLChild); 1206 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); 1207 WideString wsCurValue = 1208 pXFAChild->JSObject()->GetCData(XFA_Attribute::Value); 1209 wsValueTextBuf << wsCurValue; 1210 } 1211 } 1212 if (pXMLCurValueNode) { 1213 WideString wsCurValue = wsCurValueTextBuf.MakeString(); 1214 if (!wsCurValue.IsEmpty()) { 1215 if (bMarkAsCompound) { 1216 CXFA_Node* pXFAChild = 1217 m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue); 1218 if (!pXFAChild) 1219 return; 1220 1221 pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, L"", false, false); 1222 pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsCurValue, false, 1223 false); 1224 pXFANode->InsertChild(pXFAChild, nullptr); 1225 pXFAChild->SetXMLMappingNode(pXMLCurValueNode); 1226 pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false); 1227 } 1228 wsValueTextBuf << wsCurValue; 1229 wsCurValueTextBuf.Clear(); 1230 } 1231 pXMLCurValueNode = nullptr; 1232 } 1233 WideString wsNodeValue = wsValueTextBuf.MakeString(); 1234 pXFANode->JSObject()->SetCData(XFA_Attribute::Value, wsNodeValue, false, 1235 false); 1236 } 1237 1238 void CXFA_SimpleParser::ParseInstruction(CXFA_Node* pXFANode, 1239 CFX_XMLInstruction* pXMLInstruction, 1240 XFA_PacketType ePacketID) { 1241 if (!m_bDocumentParser) 1242 return; 1243 1244 WideString wsTargetName = pXMLInstruction->GetName(); 1245 const std::vector<WideString>& target_data = pXMLInstruction->GetTargetData(); 1246 if (wsTargetName == L"originalXFAVersion") { 1247 if (target_data.size() > 1 && 1248 (pXFANode->GetDocument()->RecognizeXFAVersionNumber(target_data[0]) != 1249 XFA_VERSION_UNKNOWN) && 1250 target_data[1] == L"v2.7-scripting:1") { 1251 pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_Scripting, true); 1252 } 1253 } else if (wsTargetName == L"acrobat") { 1254 if (target_data.size() > 1 && target_data[0] == L"JavaScript" && 1255 target_data[1] == L"strictScoping") { 1256 pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_StrictScoping, true); 1257 } 1258 } 1259 } 1260 1261 void CXFA_SimpleParser::CloseParser() { 1262 m_pXMLDoc.reset(); 1263 m_pStream.Reset(); 1264 } 1265