Home | History | Annotate | Download | only in parser
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "xfa/fxfa/parser/cxfa_simple_parser.h"
      8 
      9 #include <utility>
     10 #include <vector>
     11 
     12 #include "core/fxcrt/cfx_checksumcontext.h"
     13 #include "core/fxcrt/cfx_seekablestreamproxy.h"
     14 #include "core/fxcrt/cfx_widetextbuf.h"
     15 #include "core/fxcrt/fx_codepage.h"
     16 #include "core/fxcrt/fx_extension.h"
     17 #include "core/fxcrt/xml/cfx_xmlchardata.h"
     18 #include "core/fxcrt/xml/cfx_xmldoc.h"
     19 #include "core/fxcrt/xml/cfx_xmlelement.h"
     20 #include "core/fxcrt/xml/cfx_xmlinstruction.h"
     21 #include "core/fxcrt/xml/cfx_xmlnode.h"
     22 #include "core/fxcrt/xml/cfx_xmlparser.h"
     23 #include "core/fxcrt/xml/cfx_xmltext.h"
     24 #include "fxjs/xfa/cjx_object.h"
     25 #include "third_party/base/logging.h"
     26 #include "third_party/base/ptr_util.h"
     27 #include "xfa/fxfa/fxfa.h"
     28 #include "xfa/fxfa/parser/cxfa_document.h"
     29 #include "xfa/fxfa/parser/cxfa_node.h"
     30 #include "xfa/fxfa/parser/cxfa_subform.h"
     31 #include "xfa/fxfa/parser/cxfa_template.h"
     32 #include "xfa/fxfa/parser/xfa_basic_data.h"
     33 #include "xfa/fxfa/parser/xfa_utils.h"
     34 
     35 namespace {
     36 
     37 struct PacketInfo {
     38   uint32_t hash;
     39   const wchar_t* name;
     40   XFA_PacketType packet_type;
     41   const wchar_t* uri;
     42   uint32_t flags;
     43 };
     44 const PacketInfo PacketData[] = {
     45     {0x0, nullptr, XFA_PacketType::User, nullptr,
     46      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTMANY},
     47     {0x811929d, L"sourceSet", XFA_PacketType::SourceSet,
     48      L"http://www.xfa.org/schema/xfa-source-set/",
     49      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
     50     {0xb843dba, L"pdf", XFA_PacketType::Pdf, L"http://ns.adobe.com/xdp/pdf/",
     51      XFA_XDPPACKET_FLAGS_COMPLETEMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
     52     {0xc56afbf, L"xdc", XFA_PacketType::Xdc, L"http://www.xfa.org/schema/xdc/",
     53      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
     54     {0xc56afcc, L"xdp", XFA_PacketType::Xdp, L"http://ns.adobe.com/xdp/",
     55      XFA_XDPPACKET_FLAGS_COMPLETEMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
     56     {0x132a8fbc, L"xmpmeta", XFA_PacketType::Xmpmeta,
     57      L"http://ns.adobe.com/xmpmeta/",
     58      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTMANY},
     59     {0x48d004a8, L"xfdf", XFA_PacketType::Xfdf, L"http://ns.adobe.com/xfdf/",
     60      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
     61     {0x4e1e39b6, L"config", XFA_PacketType::Config,
     62      L"http://www.xfa.org/schema/xci/",
     63      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
     64     {0x5473b6dc, L"localeSet", XFA_PacketType::LocaleSet,
     65      L"http://www.xfa.org/schema/xfa-locale-set/",
     66      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
     67     {0x6038580a, L"stylesheet", XFA_PacketType::Stylesheet,
     68      L"http://www.w3.org/1999/XSL/Transform",
     69      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTMANY},
     70     {0x803550fc, L"template", XFA_PacketType::Template,
     71      L"http://www.xfa.org/schema/xfa-template/",
     72      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
     73     {0x8b036f32, L"signature", XFA_PacketType::Signature,
     74      L"http://www.w3.org/2000/09/xmldsig#",
     75      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
     76     {0x99b95079, L"datasets", XFA_PacketType::Datasets,
     77      L"http://www.xfa.org/schema/xfa-data/",
     78      XFA_XDPPACKET_FLAGS_PREFIXMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
     79     {0xcd309ff4, L"form", XFA_PacketType::Form,
     80      L"http://www.xfa.org/schema/xfa-form/",
     81      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
     82     {0xe14c801c, L"connectionSet", XFA_PacketType::ConnectionSet,
     83      L"http://www.xfa.org/schema/xfa-connection-set/",
     84      XFA_XDPPACKET_FLAGS_NOMATCH | XFA_XDPPACKET_FLAGS_SUPPORTONE},
     85 };
     86 
     87 const PacketInfo* GetPacketByIndex(XFA_PacketType ePacket) {
     88   return PacketData + static_cast<uint8_t>(ePacket);
     89 }
     90 
     91 const PacketInfo* GetPacketByName(const WideStringView& wsName) {
     92   if (wsName.IsEmpty())
     93     return nullptr;
     94 
     95   uint32_t hash = FX_HashCode_GetW(wsName, false);
     96   auto* elem = std::lower_bound(
     97       std::begin(PacketData), std::end(PacketData), hash,
     98       [](const PacketInfo& a, uint32_t hash) { return a.hash < hash; });
     99   if (elem != std::end(PacketData) && elem->hash == hash)
    100     return elem;
    101   return nullptr;
    102 }
    103 
    104 CFX_XMLNode* GetDocumentNode(CFX_XMLDoc* pXMLDoc,
    105                              bool bVerifyWellFormness = false) {
    106   if (!pXMLDoc)
    107     return nullptr;
    108 
    109   for (CFX_XMLNode* pXMLNode =
    110            pXMLDoc->GetRoot()->GetNodeItem(CFX_XMLNode::FirstChild);
    111        pXMLNode; pXMLNode = pXMLNode->GetNodeItem(CFX_XMLNode::NextSibling)) {
    112     if (pXMLNode->GetType() != FX_XMLNODE_Element)
    113       continue;
    114 
    115     if (!bVerifyWellFormness)
    116       return pXMLNode;
    117 
    118     for (CFX_XMLNode* pNextNode =
    119              pXMLNode->GetNodeItem(CFX_XMLNode::NextSibling);
    120          pNextNode;
    121          pNextNode = pNextNode->GetNodeItem(CFX_XMLNode::NextSibling)) {
    122       if (pNextNode->GetType() == FX_XMLNODE_Element)
    123         return nullptr;
    124     }
    125     return pXMLNode;
    126   }
    127   return nullptr;
    128 }
    129 
    130 WideString GetElementTagNamespaceURI(CFX_XMLElement* pElement) {
    131   WideString wsNodeStr = pElement->GetNamespacePrefix();
    132   WideString wsNamespaceURI;
    133   if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNodeStr,
    134                                                   &wsNamespaceURI)) {
    135     return WideString();
    136   }
    137   return wsNamespaceURI;
    138 }
    139 
    140 bool MatchNodeName(CFX_XMLNode* pNode,
    141                    const WideStringView& wsLocalTagName,
    142                    const WideStringView& wsNamespaceURIPrefix,
    143                    uint32_t eMatchFlags = XFA_XDPPACKET_FLAGS_NOMATCH) {
    144   if (!pNode || pNode->GetType() != FX_XMLNODE_Element)
    145     return false;
    146 
    147   CFX_XMLElement* pElement = reinterpret_cast<CFX_XMLElement*>(pNode);
    148   WideString wsNodeStr = pElement->GetLocalTagName();
    149   if (wsNodeStr != wsLocalTagName)
    150     return false;
    151 
    152   wsNodeStr = GetElementTagNamespaceURI(pElement);
    153   if (eMatchFlags & XFA_XDPPACKET_FLAGS_NOMATCH)
    154     return true;
    155   if (eMatchFlags & XFA_XDPPACKET_FLAGS_PREFIXMATCH) {
    156     return wsNodeStr.Left(wsNamespaceURIPrefix.GetLength()) ==
    157            wsNamespaceURIPrefix;
    158   }
    159 
    160   return wsNodeStr == wsNamespaceURIPrefix;
    161 }
    162 
    163 bool GetAttributeLocalName(const WideStringView& wsAttributeName,
    164                            WideString& wsLocalAttrName) {
    165   WideString wsAttrName(wsAttributeName);
    166   auto pos = wsAttrName.Find(L':', 0);
    167   if (!pos.has_value()) {
    168     wsLocalAttrName = wsAttrName;
    169     return false;
    170   }
    171   wsLocalAttrName = wsAttrName.Right(wsAttrName.GetLength() - pos.value() - 1);
    172   return true;
    173 }
    174 
    175 bool ResolveAttribute(CFX_XMLElement* pElement,
    176                       const WideString& wsAttrName,
    177                       WideString& wsLocalAttrName,
    178                       WideString& wsNamespaceURI) {
    179   WideString wsNSPrefix;
    180   if (GetAttributeLocalName(wsAttrName.AsStringView(), wsLocalAttrName)) {
    181     wsNSPrefix = wsAttrName.Left(wsAttrName.GetLength() -
    182                                  wsLocalAttrName.GetLength() - 1);
    183   }
    184   if (wsLocalAttrName == L"xmlns" || wsNSPrefix == L"xmlns" ||
    185       wsNSPrefix == L"xml") {
    186     return false;
    187   }
    188   if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNSPrefix,
    189                                                   &wsNamespaceURI)) {
    190     wsNamespaceURI.clear();
    191     return false;
    192   }
    193   return true;
    194 }
    195 
    196 bool FindAttributeWithNS(CFX_XMLElement* pElement,
    197                          const WideStringView& wsLocalAttributeName,
    198                          const WideStringView& wsNamespaceURIPrefix,
    199                          WideString& wsValue,
    200                          bool bMatchNSAsPrefix = false) {
    201   if (!pElement)
    202     return false;
    203 
    204   WideString wsAttrNS;
    205   for (auto it : pElement->GetAttributes()) {
    206     auto pos = it.first.Find(L':', 0);
    207     WideString wsNSPrefix;
    208     if (!pos.has_value()) {
    209       if (wsLocalAttributeName != it.first)
    210         continue;
    211     } else {
    212       if (wsLocalAttributeName !=
    213           it.first.Right(it.first.GetLength() - pos.value() - 1)) {
    214         continue;
    215       }
    216       wsNSPrefix = it.first.Left(pos.value());
    217     }
    218 
    219     if (!XFA_FDEExtension_ResolveNamespaceQualifier(pElement, wsNSPrefix,
    220                                                     &wsAttrNS)) {
    221       continue;
    222     }
    223     if (bMatchNSAsPrefix) {
    224       if (wsAttrNS.Left(wsNamespaceURIPrefix.GetLength()) !=
    225           wsNamespaceURIPrefix) {
    226         continue;
    227       }
    228     } else {
    229       if (wsAttrNS != wsNamespaceURIPrefix)
    230         continue;
    231     }
    232     wsValue = it.second;
    233     return true;
    234   }
    235   return false;
    236 }
    237 
    238 CFX_XMLNode* GetDataSetsFromXDP(CFX_XMLNode* pXMLDocumentNode) {
    239   const PacketInfo* datasets_packet =
    240       GetPacketByIndex(XFA_PacketType::Datasets);
    241   if (MatchNodeName(pXMLDocumentNode, datasets_packet->name,
    242                     datasets_packet->uri, datasets_packet->flags)) {
    243     return pXMLDocumentNode;
    244   }
    245 
    246   const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Xdp);
    247   if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
    248                      packet->flags)) {
    249     return nullptr;
    250   }
    251 
    252   for (CFX_XMLNode* pDatasetsNode =
    253            pXMLDocumentNode->GetNodeItem(CFX_XMLNode::FirstChild);
    254        pDatasetsNode;
    255        pDatasetsNode = pDatasetsNode->GetNodeItem(CFX_XMLNode::NextSibling)) {
    256     if (MatchNodeName(pDatasetsNode, datasets_packet->name,
    257                       datasets_packet->uri, datasets_packet->flags)) {
    258       return pDatasetsNode;
    259     }
    260   }
    261   return nullptr;
    262 }
    263 
    264 bool IsStringAllWhitespace(WideString wsText) {
    265   wsText.TrimRight(L"\x20\x9\xD\xA");
    266   return wsText.IsEmpty();
    267 }
    268 
    269 void ConvertXMLToPlainText(CFX_XMLElement* pRootXMLNode, WideString& wsOutput) {
    270   for (CFX_XMLNode* pXMLChild =
    271            pRootXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
    272        pXMLChild;
    273        pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
    274     switch (pXMLChild->GetType()) {
    275       case FX_XMLNODE_Element: {
    276         WideString wsTextData =
    277             static_cast<CFX_XMLElement*>(pXMLChild)->GetTextData();
    278         wsTextData += L"\n";
    279         wsOutput += wsTextData;
    280         break;
    281       }
    282       case FX_XMLNODE_Text:
    283       case FX_XMLNODE_CharData: {
    284         WideString wsText = static_cast<CFX_XMLText*>(pXMLChild)->GetText();
    285         if (IsStringAllWhitespace(wsText))
    286           continue;
    287 
    288         wsOutput = wsText;
    289         break;
    290       }
    291       default:
    292         NOTREACHED();
    293         break;
    294     }
    295   }
    296 }
    297 
    298 WideString GetPlainTextFromRichText(CFX_XMLNode* pXMLNode) {
    299   if (!pXMLNode)
    300     return L"";
    301 
    302   WideString wsPlainText;
    303   switch (pXMLNode->GetType()) {
    304     case FX_XMLNODE_Element: {
    305       CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode);
    306       WideString wsTag = pXMLElement->GetLocalTagName();
    307       uint32_t uTag = FX_HashCode_GetW(wsTag.AsStringView(), true);
    308       if (uTag == 0x0001f714) {
    309         wsPlainText += L"\n";
    310       } else if (uTag == 0x00000070) {
    311         if (!wsPlainText.IsEmpty()) {
    312           wsPlainText += L"\n";
    313         }
    314       } else if (uTag == 0xa48ac63) {
    315         if (!wsPlainText.IsEmpty() &&
    316             wsPlainText[wsPlainText.GetLength() - 1] != '\n') {
    317           wsPlainText += L"\n";
    318         }
    319       }
    320       break;
    321     }
    322     case FX_XMLNODE_Text:
    323     case FX_XMLNODE_CharData: {
    324       WideString wsContent = static_cast<CFX_XMLText*>(pXMLNode)->GetText();
    325       wsPlainText += wsContent;
    326       break;
    327     }
    328     default:
    329       break;
    330   }
    331   for (CFX_XMLNode* pChildXML = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
    332        pChildXML;
    333        pChildXML = pChildXML->GetNodeItem(CFX_XMLNode::NextSibling)) {
    334     wsPlainText += GetPlainTextFromRichText(pChildXML);
    335   }
    336 
    337   return wsPlainText;
    338 }
    339 
    340 }  // namespace
    341 
    342 bool XFA_RecognizeRichText(CFX_XMLElement* pRichTextXMLNode) {
    343   return pRichTextXMLNode && GetElementTagNamespaceURI(pRichTextXMLNode) ==
    344                                  L"http://www.w3.org/1999/xhtml";
    345 }
    346 
    347 CXFA_SimpleParser::CXFA_SimpleParser() : m_bDocumentParser(true) {}
    348 
    349 CXFA_SimpleParser::CXFA_SimpleParser(CXFA_Document* pFactory)
    350     : m_pFactory(pFactory), m_bDocumentParser(false) {}
    351 
    352 CXFA_SimpleParser::~CXFA_SimpleParser() {}
    353 
    354 void CXFA_SimpleParser::SetFactory(CXFA_Document* pFactory) {
    355   ASSERT(m_bDocumentParser);
    356   m_pFactory = pFactory;
    357 }
    358 
    359 int32_t CXFA_SimpleParser::StartParse(
    360     const RetainPtr<IFX_SeekableStream>& pStream,
    361     XFA_PacketType ePacketID) {
    362   CloseParser();
    363   m_pFileRead = pStream;
    364   m_pStream = pdfium::MakeRetain<CFX_SeekableStreamProxy>(pStream, false);
    365   uint16_t wCodePage = m_pStream->GetCodePage();
    366   if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE &&
    367       wCodePage != FX_CODEPAGE_UTF8) {
    368     m_pStream->SetCodePage(FX_CODEPAGE_UTF8);
    369   }
    370   m_pXMLDoc = pdfium::MakeUnique<CFX_XMLDoc>();
    371   auto pNewParser =
    372       pdfium::MakeUnique<CFX_XMLParser>(m_pXMLDoc->GetRoot(), m_pStream);
    373   m_pXMLParser = pNewParser.get();
    374   if (!m_pXMLDoc->LoadXML(std::move(pNewParser)))
    375     return XFA_PARSESTATUS_StatusErr;
    376 
    377   m_bParseStarted = true;
    378   m_ePacketID = ePacketID;
    379   return XFA_PARSESTATUS_Ready;
    380 }
    381 
    382 int32_t CXFA_SimpleParser::DoParse() {
    383   if (!m_pXMLDoc || !m_bParseStarted)
    384     return XFA_PARSESTATUS_StatusErr;
    385 
    386   int32_t iRet = m_pXMLDoc->DoLoad();
    387   if (iRet < 0)
    388     return XFA_PARSESTATUS_SyntaxErr;
    389   if (iRet < 100)
    390     return iRet / 2;
    391 
    392   m_pRootNode = ParseAsXDPPacket(GetDocumentNode(m_pXMLDoc.get()), m_ePacketID);
    393   m_pXMLParser.Release();
    394   m_pXMLDoc->CloseXML();
    395   m_pStream.Reset();
    396 
    397   if (!m_pRootNode)
    398     return XFA_PARSESTATUS_StatusErr;
    399 
    400   return XFA_PARSESTATUS_Done;
    401 }
    402 
    403 CFX_XMLNode* CXFA_SimpleParser::ParseXMLData(const ByteString& wsXML) {
    404   CloseParser();
    405   m_pXMLDoc = pdfium::MakeUnique<CFX_XMLDoc>();
    406 
    407   auto pStream = pdfium::MakeRetain<CFX_SeekableStreamProxy>(
    408       const_cast<uint8_t*>(wsXML.raw_str()), wsXML.GetLength());
    409   auto pParser =
    410       pdfium::MakeUnique<CFX_XMLParser>(m_pXMLDoc->GetRoot(), pStream);
    411   pParser->m_dwCheckStatus = 0x03;
    412   if (!m_pXMLDoc->LoadXML(std::move(pParser)))
    413     return nullptr;
    414 
    415   int32_t iRet = m_pXMLDoc->DoLoad();
    416   if (iRet < 0 || iRet >= 100)
    417     m_pXMLDoc->CloseXML();
    418   return iRet < 100 ? nullptr : GetDocumentNode(m_pXMLDoc.get());
    419 }
    420 
    421 void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode,
    422                                          CFX_XMLNode* pXMLNode) {
    423   XFA_PacketType ePacketID = pXFANode->GetPacketType();
    424   if (ePacketID == XFA_PacketType::Datasets) {
    425     if (pXFANode->GetElementType() == XFA_Element::DataValue) {
    426       for (CFX_XMLNode* pXMLChild =
    427                pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
    428            pXMLChild;
    429            pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
    430         FX_XMLNODETYPE eNodeType = pXMLChild->GetType();
    431         if (eNodeType == FX_XMLNODE_Instruction)
    432           continue;
    433 
    434         if (eNodeType == FX_XMLNODE_Element) {
    435           CXFA_Node* pXFAChild = m_pFactory->CreateNode(
    436               XFA_PacketType::Datasets, XFA_Element::DataValue);
    437           if (!pXFAChild)
    438             return;
    439 
    440           CFX_XMLElement* child = static_cast<CFX_XMLElement*>(pXMLChild);
    441           WideString wsNodeStr = child->GetLocalTagName();
    442           pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, wsNodeStr, false,
    443                                           false);
    444           WideString wsChildValue = GetPlainTextFromRichText(child);
    445           if (!wsChildValue.IsEmpty())
    446             pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsChildValue,
    447                                             false, false);
    448 
    449           pXFANode->InsertChild(pXFAChild, nullptr);
    450           pXFAChild->SetXMLMappingNode(pXMLChild);
    451           pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
    452           break;
    453         }
    454       }
    455       m_pRootNode = pXFANode;
    456     } else {
    457       m_pRootNode = DataLoader(pXFANode, pXMLNode, true);
    458     }
    459   } else if (pXFANode->IsContentNode()) {
    460     ParseContentNode(pXFANode, pXMLNode, ePacketID);
    461     m_pRootNode = pXFANode;
    462   } else {
    463     m_pRootNode = NormalLoader(pXFANode, pXMLNode, ePacketID, true);
    464   }
    465 }
    466 
    467 CXFA_Node* CXFA_SimpleParser::GetRootNode() const {
    468   return m_pRootNode;
    469 }
    470 
    471 CFX_XMLDoc* CXFA_SimpleParser::GetXMLDoc() const {
    472   return m_pXMLDoc.get();
    473 }
    474 
    475 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket(CFX_XMLNode* pXMLDocumentNode,
    476                                                XFA_PacketType ePacketID) {
    477   switch (ePacketID) {
    478     case XFA_PacketType::Xdp:
    479       return ParseAsXDPPacket_XDP(pXMLDocumentNode);
    480     case XFA_PacketType::Config:
    481       return ParseAsXDPPacket_Config(pXMLDocumentNode);
    482     case XFA_PacketType::Template:
    483       return ParseAsXDPPacket_Template(pXMLDocumentNode);
    484     case XFA_PacketType::Form:
    485       return ParseAsXDPPacket_Form(pXMLDocumentNode);
    486     case XFA_PacketType::Datasets:
    487       return ParseAsXDPPacket_Data(pXMLDocumentNode);
    488     case XFA_PacketType::Xdc:
    489       return ParseAsXDPPacket_Xdc(pXMLDocumentNode);
    490     case XFA_PacketType::LocaleSet:
    491       return ParseAsXDPPacket_LocaleConnectionSourceSet(
    492           pXMLDocumentNode, XFA_PacketType::LocaleSet, XFA_Element::LocaleSet);
    493     case XFA_PacketType::ConnectionSet:
    494       return ParseAsXDPPacket_LocaleConnectionSourceSet(
    495           pXMLDocumentNode, XFA_PacketType::ConnectionSet,
    496           XFA_Element::ConnectionSet);
    497     case XFA_PacketType::SourceSet:
    498       return ParseAsXDPPacket_LocaleConnectionSourceSet(
    499           pXMLDocumentNode, XFA_PacketType::SourceSet, XFA_Element::SourceSet);
    500     default:
    501       return ParseAsXDPPacket_User(pXMLDocumentNode);
    502   }
    503 }
    504 
    505 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_XDP(
    506     CFX_XMLNode* pXMLDocumentNode) {
    507   const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Xdp);
    508   if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
    509                      packet->flags)) {
    510     return nullptr;
    511   }
    512 
    513   CXFA_Node* pXFARootNode =
    514       m_pFactory->CreateNode(XFA_PacketType::Xdp, XFA_Element::Xfa);
    515   if (!pXFARootNode)
    516     return nullptr;
    517 
    518   m_pRootNode = pXFARootNode;
    519   pXFARootNode->JSObject()->SetCData(XFA_Attribute::Name, L"xfa", false, false);
    520 
    521   CFX_XMLElement* pElement = static_cast<CFX_XMLElement*>(pXMLDocumentNode);
    522   for (auto it : pElement->GetAttributes()) {
    523     if (it.first == L"uuid")
    524       pXFARootNode->JSObject()->SetCData(XFA_Attribute::Uuid, it.second, false,
    525                                          false);
    526     else if (it.first == L"timeStamp")
    527       pXFARootNode->JSObject()->SetCData(XFA_Attribute::TimeStamp, it.second,
    528                                          false, false);
    529   }
    530 
    531   CFX_XMLNode* pXMLConfigDOMRoot = nullptr;
    532   CXFA_Node* pXFAConfigDOMRoot = nullptr;
    533   for (CFX_XMLNode* pChildItem =
    534            pXMLDocumentNode->GetNodeItem(CFX_XMLNode::FirstChild);
    535        pChildItem;
    536        pChildItem = pChildItem->GetNodeItem(CFX_XMLNode::NextSibling)) {
    537     const PacketInfo* pPacketInfo = GetPacketByIndex(XFA_PacketType::Config);
    538     if (!MatchNodeName(pChildItem, pPacketInfo->name, pPacketInfo->uri,
    539                        pPacketInfo->flags)) {
    540       continue;
    541     }
    542     if (pXFARootNode->GetFirstChildByName(pPacketInfo->hash))
    543       return nullptr;
    544 
    545     pXMLConfigDOMRoot = pChildItem;
    546     pXFAConfigDOMRoot = ParseAsXDPPacket_Config(pXMLConfigDOMRoot);
    547     if (pXFAConfigDOMRoot)
    548       pXFARootNode->InsertChild(pXFAConfigDOMRoot, nullptr);
    549   }
    550 
    551   CFX_XMLNode* pXMLDatasetsDOMRoot = nullptr;
    552   CFX_XMLNode* pXMLFormDOMRoot = nullptr;
    553   CFX_XMLNode* pXMLTemplateDOMRoot = nullptr;
    554   for (CFX_XMLNode* pChildItem =
    555            pXMLDocumentNode->GetNodeItem(CFX_XMLNode::FirstChild);
    556        pChildItem;
    557        pChildItem = pChildItem->GetNodeItem(CFX_XMLNode::NextSibling)) {
    558     if (!pChildItem || pChildItem->GetType() != FX_XMLNODE_Element)
    559       continue;
    560     if (pChildItem == pXMLConfigDOMRoot)
    561       continue;
    562 
    563     CFX_XMLElement* pElement = reinterpret_cast<CFX_XMLElement*>(pChildItem);
    564     WideString wsPacketName = pElement->GetLocalTagName();
    565     const PacketInfo* pPacketInfo =
    566         GetPacketByName(wsPacketName.AsStringView());
    567     if (pPacketInfo && pPacketInfo->uri) {
    568       if (!MatchNodeName(pElement, pPacketInfo->name, pPacketInfo->uri,
    569                          pPacketInfo->flags)) {
    570         pPacketInfo = nullptr;
    571       }
    572     }
    573     XFA_PacketType ePacket =
    574         pPacketInfo ? pPacketInfo->packet_type : XFA_PacketType::User;
    575     if (ePacket == XFA_PacketType::Xdp)
    576       continue;
    577     if (ePacket == XFA_PacketType::Datasets) {
    578       if (pXMLDatasetsDOMRoot)
    579         return nullptr;
    580 
    581       pXMLDatasetsDOMRoot = pElement;
    582     } else if (ePacket == XFA_PacketType::Form) {
    583       if (pXMLFormDOMRoot)
    584         return nullptr;
    585 
    586       pXMLFormDOMRoot = pElement;
    587     } else if (ePacket == XFA_PacketType::Template) {
    588       // Found a duplicate template packet.
    589       if (pXMLTemplateDOMRoot)
    590         return nullptr;
    591 
    592       CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
    593       if (pPacketNode) {
    594         pXMLTemplateDOMRoot = pElement;
    595         pXFARootNode->InsertChild(pPacketNode, nullptr);
    596       }
    597     } else {
    598       CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
    599       if (pPacketNode) {
    600         if (pPacketInfo &&
    601             (pPacketInfo->flags & XFA_XDPPACKET_FLAGS_SUPPORTONE) &&
    602             pXFARootNode->GetFirstChildByName(pPacketInfo->hash)) {
    603           return nullptr;
    604         }
    605         pXFARootNode->InsertChild(pPacketNode, nullptr);
    606       }
    607     }
    608   }
    609 
    610   // No template is found.
    611   if (!pXMLTemplateDOMRoot)
    612     return nullptr;
    613 
    614   if (pXMLDatasetsDOMRoot) {
    615     CXFA_Node* pPacketNode =
    616         ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_PacketType::Datasets);
    617     if (pPacketNode)
    618       pXFARootNode->InsertChild(pPacketNode, nullptr);
    619   }
    620   if (pXMLFormDOMRoot) {
    621     CXFA_Node* pPacketNode =
    622         ParseAsXDPPacket(pXMLFormDOMRoot, XFA_PacketType::Form);
    623     if (pPacketNode)
    624       pXFARootNode->InsertChild(pPacketNode, nullptr);
    625   }
    626 
    627   pXFARootNode->SetXMLMappingNode(pXMLDocumentNode);
    628   return pXFARootNode;
    629 }
    630 
    631 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Config(
    632     CFX_XMLNode* pXMLDocumentNode) {
    633   const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Config);
    634   if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
    635                      packet->flags)) {
    636     return nullptr;
    637   }
    638   CXFA_Node* pNode =
    639       m_pFactory->CreateNode(XFA_PacketType::Config, XFA_Element::Config);
    640   if (!pNode)
    641     return nullptr;
    642 
    643   pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false);
    644   if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Config, true))
    645     return nullptr;
    646 
    647   pNode->SetXMLMappingNode(pXMLDocumentNode);
    648   return pNode;
    649 }
    650 
    651 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Template(
    652     CFX_XMLNode* pXMLDocumentNode) {
    653   const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Template);
    654   if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
    655                      packet->flags)) {
    656     return nullptr;
    657   }
    658 
    659   CXFA_Node* pNode =
    660       m_pFactory->CreateNode(XFA_PacketType::Template, XFA_Element::Template);
    661   if (!pNode)
    662     return nullptr;
    663 
    664   pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false);
    665   if (m_bDocumentParser) {
    666     CFX_XMLElement* pXMLDocumentElement =
    667         static_cast<CFX_XMLElement*>(pXMLDocumentNode);
    668     WideString wsNamespaceURI = pXMLDocumentElement->GetNamespaceURI();
    669     if (wsNamespaceURI.IsEmpty())
    670       wsNamespaceURI = pXMLDocumentElement->GetString(L"xmlns:xfa");
    671 
    672     pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI);
    673   }
    674   if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Template, true))
    675     return nullptr;
    676 
    677   pNode->SetXMLMappingNode(pXMLDocumentNode);
    678   return pNode;
    679 }
    680 
    681 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Form(
    682     CFX_XMLNode* pXMLDocumentNode) {
    683   const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Form);
    684   if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
    685                      packet->flags)) {
    686     return nullptr;
    687   }
    688 
    689   CFX_XMLElement* pXMLDocumentElement =
    690       static_cast<CFX_XMLElement*>(pXMLDocumentNode);
    691   WideString wsChecksum = pXMLDocumentElement->GetString(L"checksum");
    692   if (wsChecksum.GetLength() != 28 || m_pXMLParser->m_dwCheckStatus != 0x03) {
    693     return nullptr;
    694   }
    695 
    696   auto pChecksum = pdfium::MakeUnique<CFX_ChecksumContext>();
    697   pChecksum->StartChecksum();
    698   pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[0],
    699                             m_pXMLParser->m_nSize[0]);
    700   pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[1],
    701                             m_pXMLParser->m_nSize[1]);
    702   pChecksum->FinishChecksum();
    703   ByteString bsCheck = pChecksum->GetChecksum();
    704   if (bsCheck != wsChecksum.UTF8Encode())
    705     return nullptr;
    706 
    707   CXFA_Node* pNode =
    708       m_pFactory->CreateNode(XFA_PacketType::Form, XFA_Element::Form);
    709   if (!pNode)
    710     return nullptr;
    711 
    712   pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false);
    713   pNode->JSObject()->SetAttribute(XFA_Attribute::Checksum,
    714                                   wsChecksum.AsStringView(), false);
    715   CXFA_Template* pTemplateRoot =
    716       m_pRootNode->GetFirstChildByClass<CXFA_Template>(XFA_Element::Template);
    717   CXFA_Subform* pTemplateChosen =
    718       pTemplateRoot ? pTemplateRoot->GetFirstChildByClass<CXFA_Subform>(
    719                           XFA_Element::Subform)
    720                     : nullptr;
    721   bool bUseAttribute = true;
    722   if (pTemplateChosen &&
    723       pTemplateChosen->JSObject()->GetEnum(XFA_Attribute::RestoreState) !=
    724           XFA_AttributeEnum::Auto) {
    725     bUseAttribute = false;
    726   }
    727   if (!NormalLoader(pNode, pXMLDocumentNode, XFA_PacketType::Form,
    728                     bUseAttribute))
    729     return nullptr;
    730 
    731   pNode->SetXMLMappingNode(pXMLDocumentNode);
    732   return pNode;
    733 }
    734 
    735 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Data(
    736     CFX_XMLNode* pXMLDocumentNode) {
    737   CFX_XMLNode* pDatasetsXMLNode = GetDataSetsFromXDP(pXMLDocumentNode);
    738   const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Datasets);
    739   if (pDatasetsXMLNode) {
    740     CXFA_Node* pNode = m_pFactory->CreateNode(XFA_PacketType::Datasets,
    741                                               XFA_Element::DataModel);
    742     if (!pNode)
    743       return nullptr;
    744 
    745     pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false,
    746                                 false);
    747     if (!DataLoader(pNode, pDatasetsXMLNode, false))
    748       return nullptr;
    749 
    750     pNode->SetXMLMappingNode(pDatasetsXMLNode);
    751     return pNode;
    752   }
    753 
    754   CFX_XMLNode* pDataXMLNode = nullptr;
    755   if (MatchNodeName(pXMLDocumentNode, L"data", packet->uri, packet->flags)) {
    756     static_cast<CFX_XMLElement*>(pXMLDocumentNode)
    757         ->RemoveAttribute(L"xmlns:xfa");
    758     pDataXMLNode = pXMLDocumentNode;
    759   } else {
    760     CFX_XMLElement* pDataElement = new CFX_XMLElement(L"xfa:data");
    761     CFX_XMLNode* pParentXMLNode =
    762         pXMLDocumentNode->GetNodeItem(CFX_XMLNode::Parent);
    763     if (pParentXMLNode)
    764       pParentXMLNode->RemoveChildNode(pXMLDocumentNode);
    765 
    766     ASSERT(pXMLDocumentNode->GetType() == FX_XMLNODE_Element);
    767     if (pXMLDocumentNode->GetType() == FX_XMLNODE_Element) {
    768       static_cast<CFX_XMLElement*>(pXMLDocumentNode)
    769           ->RemoveAttribute(L"xmlns:xfa");
    770     }
    771     pDataElement->InsertChildNode(pXMLDocumentNode);
    772     pDataXMLNode = pDataElement;
    773   }
    774 
    775   if (pDataXMLNode) {
    776     CXFA_Node* pNode = m_pFactory->CreateNode(XFA_PacketType::Datasets,
    777                                               XFA_Element::DataGroup);
    778     if (!pNode) {
    779       if (pDataXMLNode != pXMLDocumentNode)
    780         delete pDataXMLNode;
    781       return nullptr;
    782     }
    783     WideString wsLocalName =
    784         static_cast<CFX_XMLElement*>(pDataXMLNode)->GetLocalTagName();
    785     pNode->JSObject()->SetCData(XFA_Attribute::Name, wsLocalName, false, false);
    786     if (!DataLoader(pNode, pDataXMLNode, true))
    787       return nullptr;
    788 
    789     pNode->SetXMLMappingNode(pDataXMLNode);
    790     if (pDataXMLNode != pXMLDocumentNode)
    791       pNode->SetFlag(XFA_NodeFlag_OwnXMLNode, false);
    792     return pNode;
    793   }
    794   return nullptr;
    795 }
    796 
    797 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_LocaleConnectionSourceSet(
    798     CFX_XMLNode* pXMLDocumentNode,
    799     XFA_PacketType packet_type,
    800     XFA_Element element) {
    801   const PacketInfo* packet = GetPacketByIndex(packet_type);
    802   if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
    803                      packet->flags)) {
    804     return nullptr;
    805   }
    806 
    807   CXFA_Node* pNode = m_pFactory->CreateNode(packet_type, element);
    808   if (!pNode)
    809     return nullptr;
    810 
    811   pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false);
    812   if (!NormalLoader(pNode, pXMLDocumentNode, packet_type, true))
    813     return nullptr;
    814 
    815   pNode->SetXMLMappingNode(pXMLDocumentNode);
    816   return pNode;
    817 }
    818 
    819 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Xdc(
    820     CFX_XMLNode* pXMLDocumentNode) {
    821   const PacketInfo* packet = GetPacketByIndex(XFA_PacketType::Xdc);
    822   if (!MatchNodeName(pXMLDocumentNode, packet->name, packet->uri,
    823                      packet->flags))
    824     return nullptr;
    825 
    826   CXFA_Node* pNode =
    827       m_pFactory->CreateNode(XFA_PacketType::Xdc, XFA_Element::Xdc);
    828   if (!pNode)
    829     return nullptr;
    830 
    831   pNode->JSObject()->SetCData(XFA_Attribute::Name, packet->name, false, false);
    832   pNode->SetXMLMappingNode(pXMLDocumentNode);
    833   return pNode;
    834 }
    835 
    836 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_User(
    837     CFX_XMLNode* pXMLDocumentNode) {
    838   CXFA_Node* pNode =
    839       m_pFactory->CreateNode(XFA_PacketType::Xdp, XFA_Element::Packet);
    840   if (!pNode)
    841     return nullptr;
    842 
    843   WideString wsName =
    844       static_cast<CFX_XMLElement*>(pXMLDocumentNode)->GetLocalTagName();
    845   pNode->JSObject()->SetCData(XFA_Attribute::Name, wsName, false, false);
    846   if (!UserPacketLoader(pNode, pXMLDocumentNode))
    847     return nullptr;
    848 
    849   pNode->SetXMLMappingNode(pXMLDocumentNode);
    850   return pNode;
    851 }
    852 
    853 CXFA_Node* CXFA_SimpleParser::UserPacketLoader(CXFA_Node* pXFANode,
    854                                                CFX_XMLNode* pXMLDoc) {
    855   return pXFANode;
    856 }
    857 
    858 CXFA_Node* CXFA_SimpleParser::DataLoader(CXFA_Node* pXFANode,
    859                                          CFX_XMLNode* pXMLDoc,
    860                                          bool bDoTransform) {
    861   ParseDataGroup(pXFANode, pXMLDoc, XFA_PacketType::Datasets);
    862   return pXFANode;
    863 }
    864 
    865 CXFA_Node* CXFA_SimpleParser::NormalLoader(CXFA_Node* pXFANode,
    866                                            CFX_XMLNode* pXMLDoc,
    867                                            XFA_PacketType ePacketID,
    868                                            bool bUseAttribute) {
    869   bool bOneOfPropertyFound = false;
    870   for (CFX_XMLNode* pXMLChild = pXMLDoc->GetNodeItem(CFX_XMLNode::FirstChild);
    871        pXMLChild;
    872        pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
    873     switch (pXMLChild->GetType()) {
    874       case FX_XMLNODE_Element: {
    875         CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLChild);
    876         WideString wsTagName = pXMLElement->GetLocalTagName();
    877         XFA_Element eType = CXFA_Node::NameToElement(wsTagName);
    878         if (eType == XFA_Element::Unknown)
    879           continue;
    880 
    881         if (pXFANode->HasPropertyFlags(
    882                 eType,
    883                 XFA_PROPERTYFLAG_OneOf | XFA_PROPERTYFLAG_DefaultOneOf)) {
    884           if (bOneOfPropertyFound)
    885             break;
    886           bOneOfPropertyFound = true;
    887         }
    888 
    889         CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, eType);
    890         if (!pXFAChild)
    891           return nullptr;
    892         if (ePacketID == XFA_PacketType::Config) {
    893           pXFAChild->JSObject()->SetAttribute(XFA_Attribute::Name,
    894                                               wsTagName.AsStringView(), false);
    895         }
    896 
    897         bool IsNeedValue = true;
    898         for (auto it : pXMLElement->GetAttributes()) {
    899           WideString wsAttrName;
    900           GetAttributeLocalName(it.first.AsStringView(), wsAttrName);
    901           if (wsAttrName == L"nil" && it.second == L"true")
    902             IsNeedValue = false;
    903 
    904           XFA_Attribute attr =
    905               CXFA_Node::NameToAttribute(wsAttrName.AsStringView());
    906           if (attr == XFA_Attribute::Unknown)
    907             continue;
    908 
    909           if (!bUseAttribute && attr != XFA_Attribute::Name &&
    910               attr != XFA_Attribute::Save) {
    911             continue;
    912           }
    913           pXFAChild->JSObject()->SetAttribute(attr, it.second.AsStringView(),
    914                                               false);
    915         }
    916         pXFANode->InsertChild(pXFAChild, nullptr);
    917         if (eType == XFA_Element::Validate || eType == XFA_Element::Locale) {
    918           if (ePacketID == XFA_PacketType::Config)
    919             ParseContentNode(pXFAChild, pXMLElement, ePacketID);
    920           else
    921             NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
    922 
    923           break;
    924         }
    925         switch (pXFAChild->GetObjectType()) {
    926           case XFA_ObjectType::ContentNode:
    927           case XFA_ObjectType::TextNode:
    928           case XFA_ObjectType::NodeC:
    929           case XFA_ObjectType::NodeV:
    930             if (IsNeedValue)
    931               ParseContentNode(pXFAChild, pXMLElement, ePacketID);
    932             break;
    933           default:
    934             NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
    935             break;
    936         }
    937       } break;
    938       case FX_XMLNODE_Instruction:
    939         ParseInstruction(pXFANode, static_cast<CFX_XMLInstruction*>(pXMLChild),
    940                          ePacketID);
    941         break;
    942       default:
    943         break;
    944     }
    945   }
    946   return pXFANode;
    947 }
    948 
    949 void CXFA_SimpleParser::ParseContentNode(CXFA_Node* pXFANode,
    950                                          CFX_XMLNode* pXMLNode,
    951                                          XFA_PacketType ePacketID) {
    952   XFA_Element element = XFA_Element::Sharptext;
    953   if (pXFANode->GetElementType() == XFA_Element::ExData) {
    954     WideString wsContentType =
    955         pXFANode->JSObject()->GetCData(XFA_Attribute::ContentType);
    956     if (wsContentType == L"text/html")
    957       element = XFA_Element::SharpxHTML;
    958     else if (wsContentType == L"text/xml")
    959       element = XFA_Element::Sharpxml;
    960   }
    961   if (element == XFA_Element::SharpxHTML)
    962     pXFANode->SetXMLMappingNode(pXMLNode);
    963 
    964   WideString wsValue;
    965   for (CFX_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
    966        pXMLChild;
    967        pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
    968     FX_XMLNODETYPE eNodeType = pXMLChild->GetType();
    969     if (eNodeType == FX_XMLNODE_Instruction)
    970       continue;
    971 
    972     if (element == XFA_Element::SharpxHTML) {
    973       if (eNodeType != FX_XMLNODE_Element)
    974         break;
    975 
    976       if (XFA_RecognizeRichText(static_cast<CFX_XMLElement*>(pXMLChild)))
    977         wsValue +=
    978             GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild));
    979     } else if (element == XFA_Element::Sharpxml) {
    980       if (eNodeType != FX_XMLNODE_Element)
    981         break;
    982 
    983       ConvertXMLToPlainText(static_cast<CFX_XMLElement*>(pXMLChild), wsValue);
    984     } else {
    985       if (eNodeType == FX_XMLNODE_Element)
    986         break;
    987       if (eNodeType == FX_XMLNODE_Text || eNodeType == FX_XMLNODE_CharData)
    988         wsValue = static_cast<CFX_XMLText*>(pXMLChild)->GetText();
    989     }
    990     break;
    991   }
    992   if (!wsValue.IsEmpty()) {
    993     if (pXFANode->IsContentNode()) {
    994       CXFA_Node* pContentRawDataNode =
    995           m_pFactory->CreateNode(ePacketID, element);
    996       ASSERT(pContentRawDataNode);
    997       pContentRawDataNode->JSObject()->SetCData(XFA_Attribute::Value, wsValue,
    998                                                 false, false);
    999       pXFANode->InsertChild(pContentRawDataNode, nullptr);
   1000     } else {
   1001       pXFANode->JSObject()->SetCData(XFA_Attribute::Value, wsValue, false,
   1002                                      false);
   1003     }
   1004   }
   1005 }
   1006 
   1007 void CXFA_SimpleParser::ParseDataGroup(CXFA_Node* pXFANode,
   1008                                        CFX_XMLNode* pXMLNode,
   1009                                        XFA_PacketType ePacketID) {
   1010   for (CFX_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
   1011        pXMLChild;
   1012        pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
   1013     switch (pXMLChild->GetType()) {
   1014       case FX_XMLNODE_Element: {
   1015         CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLChild);
   1016         {
   1017           WideString wsNamespaceURI = GetElementTagNamespaceURI(pXMLElement);
   1018           if (wsNamespaceURI == L"http://www.xfa.com/schema/xfa-package/" ||
   1019               wsNamespaceURI == L"http://www.xfa.org/schema/xfa-package/" ||
   1020               wsNamespaceURI == L"http://www.w3.org/2001/XMLSchema-instance") {
   1021             continue;
   1022           }
   1023         }
   1024 
   1025         XFA_Element eNodeType = XFA_Element::DataModel;
   1026         if (eNodeType == XFA_Element::DataModel) {
   1027           WideString wsDataNodeAttr;
   1028           if (FindAttributeWithNS(pXMLElement, L"dataNode",
   1029                                   L"http://www.xfa.org/schema/xfa-data/1.0/",
   1030                                   wsDataNodeAttr)) {
   1031             if (wsDataNodeAttr == L"dataGroup")
   1032               eNodeType = XFA_Element::DataGroup;
   1033             else if (wsDataNodeAttr == L"dataValue")
   1034               eNodeType = XFA_Element::DataValue;
   1035           }
   1036         }
   1037         WideString wsContentType;
   1038         if (eNodeType == XFA_Element::DataModel) {
   1039           if (FindAttributeWithNS(pXMLElement, L"contentType",
   1040                                   L"http://www.xfa.org/schema/xfa-data/1.0/",
   1041                                   wsContentType)) {
   1042             if (!wsContentType.IsEmpty())
   1043               eNodeType = XFA_Element::DataValue;
   1044           }
   1045         }
   1046         if (eNodeType == XFA_Element::DataModel) {
   1047           for (CFX_XMLNode* pXMLDataChild =
   1048                    pXMLElement->GetNodeItem(CFX_XMLNode::FirstChild);
   1049                pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNodeItem(
   1050                                   CFX_XMLNode::NextSibling)) {
   1051             if (pXMLDataChild->GetType() == FX_XMLNODE_Element) {
   1052               if (!XFA_RecognizeRichText(
   1053                       static_cast<CFX_XMLElement*>(pXMLDataChild))) {
   1054                 eNodeType = XFA_Element::DataGroup;
   1055                 break;
   1056               }
   1057             }
   1058           }
   1059         }
   1060         if (eNodeType == XFA_Element::DataModel)
   1061           eNodeType = XFA_Element::DataValue;
   1062 
   1063         CXFA_Node* pXFAChild =
   1064             m_pFactory->CreateNode(XFA_PacketType::Datasets, eNodeType);
   1065         if (!pXFAChild)
   1066           return;
   1067 
   1068         pXFAChild->JSObject()->SetCData(
   1069             XFA_Attribute::Name, pXMLElement->GetLocalTagName(), false, false);
   1070         bool bNeedValue = true;
   1071 
   1072         for (auto it : pXMLElement->GetAttributes()) {
   1073           WideString wsName;
   1074           WideString wsNS;
   1075           if (!ResolveAttribute(pXMLElement, it.first, wsName, wsNS)) {
   1076             continue;
   1077           }
   1078           if (wsName == L"nil" && it.second == L"true") {
   1079             bNeedValue = false;
   1080             continue;
   1081           }
   1082           if (wsNS == L"http://www.xfa.com/schema/xfa-package/" ||
   1083               wsNS == L"http://www.xfa.org/schema/xfa-package/" ||
   1084               wsNS == L"http://www.w3.org/2001/XMLSchema-instance" ||
   1085               wsNS == L"http://www.xfa.org/schema/xfa-data/1.0/") {
   1086             continue;
   1087           }
   1088           CXFA_Node* pXFAMetaData = m_pFactory->CreateNode(
   1089               XFA_PacketType::Datasets, XFA_Element::DataValue);
   1090           if (!pXFAMetaData)
   1091             return;
   1092 
   1093           pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Name, wsName, false,
   1094                                              false);
   1095           pXFAMetaData->JSObject()->SetCData(XFA_Attribute::QualifiedName,
   1096                                              it.first, false, false);
   1097           pXFAMetaData->JSObject()->SetCData(XFA_Attribute::Value, it.second,
   1098                                              false, false);
   1099           pXFAMetaData->JSObject()->SetEnum(XFA_Attribute::Contains,
   1100                                             XFA_AttributeEnum::MetaData, false);
   1101           pXFAChild->InsertChild(pXFAMetaData, nullptr);
   1102           pXFAMetaData->SetXMLMappingNode(pXMLElement);
   1103           pXFAMetaData->SetFlag(XFA_NodeFlag_Initialized, false);
   1104         }
   1105 
   1106         if (!bNeedValue) {
   1107           WideString wsNilName(L"xsi:nil");
   1108           pXMLElement->RemoveAttribute(wsNilName.c_str());
   1109         }
   1110         pXFANode->InsertChild(pXFAChild, nullptr);
   1111         if (eNodeType == XFA_Element::DataGroup)
   1112           ParseDataGroup(pXFAChild, pXMLElement, ePacketID);
   1113         else if (bNeedValue)
   1114           ParseDataValue(pXFAChild, pXMLChild, XFA_PacketType::Datasets);
   1115 
   1116         pXFAChild->SetXMLMappingNode(pXMLElement);
   1117         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
   1118         continue;
   1119       }
   1120       case FX_XMLNODE_CharData:
   1121       case FX_XMLNODE_Text: {
   1122         CFX_XMLText* pXMLText = static_cast<CFX_XMLText*>(pXMLChild);
   1123         WideString wsText = pXMLText->GetText();
   1124         if (IsStringAllWhitespace(wsText))
   1125           continue;
   1126 
   1127         CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_PacketType::Datasets,
   1128                                                       XFA_Element::DataValue);
   1129         if (!pXFAChild)
   1130           return;
   1131 
   1132         pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsText, false,
   1133                                         false);
   1134         pXFANode->InsertChild(pXFAChild, nullptr);
   1135         pXFAChild->SetXMLMappingNode(pXMLText);
   1136         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
   1137         continue;
   1138       }
   1139       default:
   1140         continue;
   1141     }
   1142   }
   1143 }
   1144 
   1145 void CXFA_SimpleParser::ParseDataValue(CXFA_Node* pXFANode,
   1146                                        CFX_XMLNode* pXMLNode,
   1147                                        XFA_PacketType ePacketID) {
   1148   CFX_WideTextBuf wsValueTextBuf;
   1149   CFX_WideTextBuf wsCurValueTextBuf;
   1150   bool bMarkAsCompound = false;
   1151   CFX_XMLNode* pXMLCurValueNode = nullptr;
   1152   for (CFX_XMLNode* pXMLChild = pXMLNode->GetNodeItem(CFX_XMLNode::FirstChild);
   1153        pXMLChild;
   1154        pXMLChild = pXMLChild->GetNodeItem(CFX_XMLNode::NextSibling)) {
   1155     FX_XMLNODETYPE eNodeType = pXMLChild->GetType();
   1156     if (eNodeType == FX_XMLNODE_Instruction)
   1157       continue;
   1158 
   1159     if (eNodeType == FX_XMLNODE_Text || eNodeType == FX_XMLNODE_CharData) {
   1160       WideString wsText = static_cast<CFX_XMLText*>(pXMLChild)->GetText();
   1161       if (!pXMLCurValueNode)
   1162         pXMLCurValueNode = pXMLChild;
   1163 
   1164       wsCurValueTextBuf << wsText;
   1165     } else if (XFA_RecognizeRichText(static_cast<CFX_XMLElement*>(pXMLChild))) {
   1166       WideString wsText =
   1167           GetPlainTextFromRichText(static_cast<CFX_XMLElement*>(pXMLChild));
   1168       if (!pXMLCurValueNode)
   1169         pXMLCurValueNode = pXMLChild;
   1170 
   1171       wsCurValueTextBuf << wsText;
   1172     } else {
   1173       bMarkAsCompound = true;
   1174       if (pXMLCurValueNode) {
   1175         WideString wsCurValue = wsCurValueTextBuf.MakeString();
   1176         if (!wsCurValue.IsEmpty()) {
   1177           CXFA_Node* pXFAChild =
   1178               m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
   1179           if (!pXFAChild)
   1180             return;
   1181 
   1182           pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, L"", false,
   1183                                           false);
   1184           pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsCurValue,
   1185                                           false, false);
   1186           pXFANode->InsertChild(pXFAChild, nullptr);
   1187           pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
   1188           pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
   1189           wsValueTextBuf << wsCurValue;
   1190           wsCurValueTextBuf.Clear();
   1191         }
   1192         pXMLCurValueNode = nullptr;
   1193       }
   1194       CXFA_Node* pXFAChild =
   1195           m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
   1196       if (!pXFAChild)
   1197         return;
   1198 
   1199       WideString wsNodeStr =
   1200           static_cast<CFX_XMLElement*>(pXMLChild)->GetLocalTagName();
   1201       pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, wsNodeStr, false,
   1202                                       false);
   1203       ParseDataValue(pXFAChild, pXMLChild, ePacketID);
   1204       pXFANode->InsertChild(pXFAChild, nullptr);
   1205       pXFAChild->SetXMLMappingNode(pXMLChild);
   1206       pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
   1207       WideString wsCurValue =
   1208           pXFAChild->JSObject()->GetCData(XFA_Attribute::Value);
   1209       wsValueTextBuf << wsCurValue;
   1210     }
   1211   }
   1212   if (pXMLCurValueNode) {
   1213     WideString wsCurValue = wsCurValueTextBuf.MakeString();
   1214     if (!wsCurValue.IsEmpty()) {
   1215       if (bMarkAsCompound) {
   1216         CXFA_Node* pXFAChild =
   1217             m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
   1218         if (!pXFAChild)
   1219           return;
   1220 
   1221         pXFAChild->JSObject()->SetCData(XFA_Attribute::Name, L"", false, false);
   1222         pXFAChild->JSObject()->SetCData(XFA_Attribute::Value, wsCurValue, false,
   1223                                         false);
   1224         pXFANode->InsertChild(pXFAChild, nullptr);
   1225         pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
   1226         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
   1227       }
   1228       wsValueTextBuf << wsCurValue;
   1229       wsCurValueTextBuf.Clear();
   1230     }
   1231     pXMLCurValueNode = nullptr;
   1232   }
   1233   WideString wsNodeValue = wsValueTextBuf.MakeString();
   1234   pXFANode->JSObject()->SetCData(XFA_Attribute::Value, wsNodeValue, false,
   1235                                  false);
   1236 }
   1237 
   1238 void CXFA_SimpleParser::ParseInstruction(CXFA_Node* pXFANode,
   1239                                          CFX_XMLInstruction* pXMLInstruction,
   1240                                          XFA_PacketType ePacketID) {
   1241   if (!m_bDocumentParser)
   1242     return;
   1243 
   1244   WideString wsTargetName = pXMLInstruction->GetName();
   1245   const std::vector<WideString>& target_data = pXMLInstruction->GetTargetData();
   1246   if (wsTargetName == L"originalXFAVersion") {
   1247     if (target_data.size() > 1 &&
   1248         (pXFANode->GetDocument()->RecognizeXFAVersionNumber(target_data[0]) !=
   1249          XFA_VERSION_UNKNOWN) &&
   1250         target_data[1] == L"v2.7-scripting:1") {
   1251       pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_Scripting, true);
   1252     }
   1253   } else if (wsTargetName == L"acrobat") {
   1254     if (target_data.size() > 1 && target_data[0] == L"JavaScript" &&
   1255         target_data[1] == L"strictScoping") {
   1256       pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_StrictScoping, true);
   1257     }
   1258   }
   1259 }
   1260 
   1261 void CXFA_SimpleParser::CloseParser() {
   1262   m_pXMLDoc.reset();
   1263   m_pStream.Reset();
   1264 }
   1265