Home | History | Annotate | Download | only in parser
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "xfa/fxfa/parser/cxfa_simple_parser.h"
      8 
      9 #include <utility>
     10 
     11 #include "core/fxcrt/fx_ext.h"
     12 #include "third_party/base/ptr_util.h"
     13 #include "xfa/fgas/crt/fgas_codepage.h"
     14 #include "xfa/fxfa/fxfa.h"
     15 #include "xfa/fxfa/parser/cxfa_document.h"
     16 #include "xfa/fxfa/parser/cxfa_widetextread.h"
     17 #include "xfa/fxfa/parser/cxfa_xml_parser.h"
     18 #include "xfa/fxfa/parser/xfa_basic_data.h"
     19 #include "xfa/fxfa/parser/xfa_utils.h"
     20 #include "xfa/fxfa/xfa_checksum.h"
     21 
     22 namespace {
     23 
     24 CFDE_XMLNode* GetDocumentNode(CFDE_XMLDoc* pXMLDoc,
     25                               bool bVerifyWellFormness = false) {
     26   if (!pXMLDoc)
     27     return nullptr;
     28 
     29   for (CFDE_XMLNode* pXMLNode =
     30            pXMLDoc->GetRoot()->GetNodeItem(CFDE_XMLNode::FirstChild);
     31        pXMLNode; pXMLNode = pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
     32     if (pXMLNode->GetType() != FDE_XMLNODE_Element)
     33       continue;
     34 
     35     if (!bVerifyWellFormness)
     36       return pXMLNode;
     37 
     38     for (CFDE_XMLNode* pNextNode =
     39              pXMLNode->GetNodeItem(CFDE_XMLNode::NextSibling);
     40          pNextNode;
     41          pNextNode = pNextNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
     42       if (pNextNode->GetType() == FDE_XMLNODE_Element)
     43         return nullptr;
     44     }
     45     return pXMLNode;
     46   }
     47   return nullptr;
     48 }
     49 
     50 void GetElementTagNamespaceURI(CFDE_XMLElement* pElement,
     51                                CFX_WideString& wsNamespaceURI) {
     52   CFX_WideString wsNodeStr;
     53   pElement->GetNamespacePrefix(wsNodeStr);
     54   if (!XFA_FDEExtension_ResolveNamespaceQualifier(
     55           pElement, wsNodeStr.AsStringC(), wsNamespaceURI)) {
     56     wsNamespaceURI.clear();
     57   }
     58 }
     59 
     60 bool MatchNodeName(CFDE_XMLNode* pNode,
     61                    const CFX_WideStringC& wsLocalTagName,
     62                    const CFX_WideStringC& wsNamespaceURIPrefix,
     63                    uint32_t eMatchFlags = XFA_XDPPACKET_FLAGS_NOMATCH) {
     64   if (!pNode || pNode->GetType() != FDE_XMLNODE_Element)
     65     return false;
     66 
     67   CFDE_XMLElement* pElement = reinterpret_cast<CFDE_XMLElement*>(pNode);
     68   CFX_WideString wsNodeStr;
     69   pElement->GetLocalTagName(wsNodeStr);
     70   if (wsNodeStr != wsLocalTagName)
     71     return false;
     72 
     73   GetElementTagNamespaceURI(pElement, wsNodeStr);
     74   if (eMatchFlags & XFA_XDPPACKET_FLAGS_NOMATCH)
     75     return true;
     76   if (eMatchFlags & XFA_XDPPACKET_FLAGS_PREFIXMATCH) {
     77     return wsNodeStr.Left(wsNamespaceURIPrefix.GetLength()) ==
     78            wsNamespaceURIPrefix;
     79   }
     80   return wsNodeStr == wsNamespaceURIPrefix;
     81 }
     82 
     83 bool GetAttributeLocalName(const CFX_WideStringC& wsAttributeName,
     84                            CFX_WideString& wsLocalAttrName) {
     85   CFX_WideString wsAttrName(wsAttributeName);
     86   FX_STRSIZE iFind = wsAttrName.Find(L':', 0);
     87   if (iFind < 0) {
     88     wsLocalAttrName = wsAttrName;
     89     return false;
     90   }
     91   wsLocalAttrName = wsAttrName.Right(wsAttrName.GetLength() - iFind - 1);
     92   return true;
     93 }
     94 
     95 bool ResolveAttribute(CFDE_XMLElement* pElement,
     96                       const CFX_WideStringC& wsAttributeName,
     97                       CFX_WideString& wsLocalAttrName,
     98                       CFX_WideString& wsNamespaceURI) {
     99   CFX_WideString wsAttrName(wsAttributeName);
    100   CFX_WideString wsNSPrefix;
    101   if (GetAttributeLocalName(wsAttributeName, wsLocalAttrName)) {
    102     wsNSPrefix = wsAttrName.Left(wsAttributeName.GetLength() -
    103                                  wsLocalAttrName.GetLength() - 1);
    104   }
    105   if (wsLocalAttrName == L"xmlns" || wsNSPrefix == L"xmlns" ||
    106       wsNSPrefix == L"xml") {
    107     return false;
    108   }
    109   if (!XFA_FDEExtension_ResolveNamespaceQualifier(
    110           pElement, wsNSPrefix.AsStringC(), wsNamespaceURI)) {
    111     wsNamespaceURI.clear();
    112     return false;
    113   }
    114   return true;
    115 }
    116 
    117 bool FindAttributeWithNS(CFDE_XMLElement* pElement,
    118                          const CFX_WideStringC& wsLocalAttributeName,
    119                          const CFX_WideStringC& wsNamespaceURIPrefix,
    120                          CFX_WideString& wsValue,
    121                          bool bMatchNSAsPrefix = false) {
    122   if (!pElement)
    123     return false;
    124 
    125   CFX_WideString wsAttrName;
    126   CFX_WideString wsAttrValue;
    127   CFX_WideString wsAttrNS;
    128   for (int32_t iAttrCount = pElement->CountAttributes(), i = 0; i < iAttrCount;
    129        i++) {
    130     pElement->GetAttribute(i, wsAttrName, wsAttrValue);
    131     FX_STRSIZE iFind = wsAttrName.Find(L':', 0);
    132     CFX_WideString wsNSPrefix;
    133     if (iFind < 0) {
    134       if (wsLocalAttributeName != wsAttrName)
    135         continue;
    136     } else {
    137       if (wsLocalAttributeName !=
    138           wsAttrName.Right(wsAttrName.GetLength() - iFind - 1)) {
    139         continue;
    140       }
    141       wsNSPrefix = wsAttrName.Left(iFind);
    142     }
    143     if (!XFA_FDEExtension_ResolveNamespaceQualifier(
    144             pElement, wsNSPrefix.AsStringC(), wsAttrNS)) {
    145       continue;
    146     }
    147     if (bMatchNSAsPrefix) {
    148       if (wsAttrNS.Left(wsNamespaceURIPrefix.GetLength()) !=
    149           wsNamespaceURIPrefix) {
    150         continue;
    151       }
    152     } else {
    153       if (wsAttrNS != wsNamespaceURIPrefix)
    154         continue;
    155     }
    156     wsValue = wsAttrValue;
    157     return true;
    158   }
    159   return false;
    160 }
    161 
    162 CFDE_XMLNode* GetDataSetsFromXDP(CFDE_XMLNode* pXMLDocumentNode) {
    163   if (MatchNodeName(pXMLDocumentNode,
    164                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName,
    165                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
    166                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
    167     return pXMLDocumentNode;
    168   }
    169   if (!MatchNodeName(pXMLDocumentNode,
    170                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName,
    171                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI,
    172                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) {
    173     return nullptr;
    174   }
    175   for (CFDE_XMLNode* pDatasetsNode =
    176            pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
    177        pDatasetsNode;
    178        pDatasetsNode = pDatasetsNode->GetNodeItem(CFDE_XMLNode::NextSibling)) {
    179     if (!MatchNodeName(pDatasetsNode,
    180                        XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName,
    181                        XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
    182                        XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
    183       continue;
    184     }
    185     return pDatasetsNode;
    186   }
    187   return nullptr;
    188 }
    189 
    190 bool IsStringAllWhitespace(CFX_WideString wsText) {
    191   wsText.TrimRight(L"\x20\x9\xD\xA");
    192   return wsText.IsEmpty();
    193 }
    194 
    195 void ConvertXMLToPlainText(CFDE_XMLElement* pRootXMLNode,
    196                            CFX_WideString& wsOutput) {
    197   for (CFDE_XMLNode* pXMLChild =
    198            pRootXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
    199        pXMLChild;
    200        pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
    201     switch (pXMLChild->GetType()) {
    202       case FDE_XMLNODE_Element: {
    203         CFX_WideString wsTextData;
    204         static_cast<CFDE_XMLElement*>(pXMLChild)->GetTextData(wsTextData);
    205         wsTextData += L"\n";
    206         wsOutput += wsTextData;
    207         break;
    208       }
    209       case FDE_XMLNODE_Text: {
    210         CFX_WideString wsText;
    211         static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsText);
    212         if (IsStringAllWhitespace(wsText))
    213           continue;
    214 
    215         wsOutput = wsText;
    216         break;
    217       }
    218       case FDE_XMLNODE_CharData: {
    219         CFX_WideString wsCharData;
    220         static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsCharData);
    221         if (IsStringAllWhitespace(wsCharData))
    222           continue;
    223 
    224         wsOutput = wsCharData;
    225         break;
    226       }
    227       default:
    228         ASSERT(false);
    229         break;
    230     }
    231   }
    232 }
    233 
    234 const XFA_PACKETINFO* GetPacketByName(const CFX_WideStringC& wsName) {
    235   if (wsName.IsEmpty())
    236     return nullptr;
    237 
    238   uint32_t uHash = FX_HashCode_GetW(wsName, false);
    239   int32_t iStart = 0;
    240   int32_t iEnd = g_iXFAPacketCount - 1;
    241   do {
    242     int32_t iMid = (iStart + iEnd) / 2;
    243     const XFA_PACKETINFO* pInfo = g_XFAPacketData + iMid;
    244     if (uHash == pInfo->uHash)
    245       return pInfo;
    246     if (uHash < pInfo->uHash)
    247       iEnd = iMid - 1;
    248     else
    249       iStart = iMid + 1;
    250   } while (iStart <= iEnd);
    251   return nullptr;
    252 }
    253 
    254 }  // namespace
    255 
    256 bool XFA_RecognizeRichText(CFDE_XMLElement* pRichTextXMLNode) {
    257   if (pRichTextXMLNode) {
    258     CFX_WideString wsNamespaceURI;
    259     GetElementTagNamespaceURI(pRichTextXMLNode, wsNamespaceURI);
    260     if (wsNamespaceURI == L"http://www.w3.org/1999/xhtml")
    261       return true;
    262   }
    263   return false;
    264 }
    265 
    266 CXFA_SimpleParser::CXFA_SimpleParser(CXFA_Document* pFactory,
    267                                      bool bDocumentParser)
    268     : m_pXMLParser(nullptr),
    269       m_pXMLDoc(nullptr),
    270       m_pStream(nullptr),
    271       m_pFileRead(nullptr),
    272       m_pFactory(pFactory),
    273       m_pRootNode(nullptr),
    274       m_ePacketID(XFA_XDPPACKET_UNKNOWN),
    275       m_bDocumentParser(bDocumentParser) {}
    276 
    277 CXFA_SimpleParser::~CXFA_SimpleParser() {}
    278 
    279 void CXFA_SimpleParser::SetFactory(CXFA_Document* pFactory) {
    280   m_pFactory = pFactory;
    281 }
    282 
    283 int32_t CXFA_SimpleParser::StartParse(
    284     const CFX_RetainPtr<IFX_SeekableReadStream>& pStream,
    285     XFA_XDPPACKET ePacketID) {
    286   CloseParser();
    287   m_pFileRead = pStream;
    288   m_pStream = IFGAS_Stream::CreateStream(
    289       pStream, FX_STREAMACCESS_Read | FX_STREAMACCESS_Text);
    290   if (!m_pStream)
    291     return XFA_PARSESTATUS_StreamErr;
    292 
    293   uint16_t wCodePage = m_pStream->GetCodePage();
    294   if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE &&
    295       wCodePage != FX_CODEPAGE_UTF8) {
    296     m_pStream->SetCodePage(FX_CODEPAGE_UTF8);
    297   }
    298   m_pXMLDoc = pdfium::MakeUnique<CFDE_XMLDoc>();
    299   auto pNewParser =
    300       pdfium::MakeUnique<CXFA_XMLParser>(m_pXMLDoc->GetRoot(), m_pStream);
    301   m_pXMLParser = pNewParser.get();
    302   if (!m_pXMLDoc->LoadXML(std::move(pNewParser)))
    303     return XFA_PARSESTATUS_StatusErr;
    304 
    305   m_ePacketID = ePacketID;
    306   return XFA_PARSESTATUS_Ready;
    307 }
    308 
    309 int32_t CXFA_SimpleParser::DoParse(IFX_Pause* pPause) {
    310   if (!m_pXMLDoc || m_ePacketID == XFA_XDPPACKET_UNKNOWN)
    311     return XFA_PARSESTATUS_StatusErr;
    312 
    313   int32_t iRet = m_pXMLDoc->DoLoad(pPause);
    314   if (iRet < 0)
    315     return XFA_PARSESTATUS_SyntaxErr;
    316   if (iRet < 100)
    317     return iRet / 2;
    318 
    319   m_pRootNode = ParseAsXDPPacket(GetDocumentNode(m_pXMLDoc.get()), m_ePacketID);
    320   m_pXMLDoc->CloseXML();
    321   m_pStream.Reset();
    322   if (!m_pRootNode)
    323     return XFA_PARSESTATUS_StatusErr;
    324 
    325   return XFA_PARSESTATUS_Done;
    326 }
    327 
    328 int32_t CXFA_SimpleParser::ParseXMLData(const CFX_WideString& wsXML,
    329                                         CFDE_XMLNode*& pXMLNode,
    330                                         IFX_Pause* pPause) {
    331   CloseParser();
    332   pXMLNode = nullptr;
    333   m_pXMLDoc = pdfium::MakeUnique<CFDE_XMLDoc>();
    334   auto pStream = pdfium::MakeRetain<CXFA_WideTextRead>(wsXML);
    335   auto pParser =
    336       pdfium::MakeUnique<CXFA_XMLParser>(m_pXMLDoc->GetRoot(), pStream);
    337   pParser->m_dwCheckStatus = 0x03;
    338   if (!m_pXMLDoc->LoadXML(std::move(pParser)))
    339     return XFA_PARSESTATUS_StatusErr;
    340 
    341   int32_t iRet = m_pXMLDoc->DoLoad(pPause);
    342   if (iRet < 0 || iRet >= 100)
    343     m_pXMLDoc->CloseXML();
    344   if (iRet < 0)
    345     return XFA_PARSESTATUS_SyntaxErr;
    346   if (iRet < 100)
    347     return iRet / 2;
    348 
    349   pXMLNode = GetDocumentNode(m_pXMLDoc.get());
    350   return XFA_PARSESTATUS_Done;
    351 }
    352 
    353 void CXFA_SimpleParser::ConstructXFANode(CXFA_Node* pXFANode,
    354                                          CFDE_XMLNode* pXMLNode) {
    355   XFA_XDPPACKET ePacketID = (XFA_XDPPACKET)pXFANode->GetPacketID();
    356   if (ePacketID == XFA_XDPPACKET_Datasets) {
    357     if (pXFANode->GetElementType() == XFA_Element::DataValue) {
    358       for (CFDE_XMLNode* pXMLChild =
    359                pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
    360            pXMLChild;
    361            pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
    362         FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
    363         if (eNodeType == FDE_XMLNODE_Instruction)
    364           continue;
    365 
    366         if (eNodeType == FDE_XMLNODE_Element) {
    367           CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
    368                                                         XFA_Element::DataValue);
    369           if (!pXFAChild)
    370             return;
    371 
    372           CFX_WideString wsNodeStr;
    373           CFDE_XMLElement* child = static_cast<CFDE_XMLElement*>(pXMLChild);
    374           child->GetLocalTagName(wsNodeStr);
    375           pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr);
    376           CFX_WideString wsChildValue;
    377           XFA_GetPlainTextFromRichText(child, wsChildValue);
    378           if (!wsChildValue.IsEmpty())
    379             pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsChildValue);
    380 
    381           pXFANode->InsertChild(pXFAChild);
    382           pXFAChild->SetXMLMappingNode(pXMLChild);
    383           pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
    384           break;
    385         }
    386       }
    387       m_pRootNode = pXFANode;
    388     } else {
    389       m_pRootNode = DataLoader(pXFANode, pXMLNode, true);
    390     }
    391   } else if (pXFANode->IsContentNode()) {
    392     ParseContentNode(pXFANode, pXMLNode, ePacketID);
    393     m_pRootNode = pXFANode;
    394   } else {
    395     m_pRootNode = NormalLoader(pXFANode, pXMLNode, ePacketID, true);
    396   }
    397 }
    398 
    399 CXFA_Node* CXFA_SimpleParser::GetRootNode() const {
    400   return m_pRootNode;
    401 }
    402 
    403 CFDE_XMLDoc* CXFA_SimpleParser::GetXMLDoc() const {
    404   return m_pXMLDoc.get();
    405 }
    406 
    407 bool XFA_FDEExtension_ResolveNamespaceQualifier(
    408     CFDE_XMLElement* pNode,
    409     const CFX_WideStringC& wsQualifier,
    410     CFX_WideString& wsNamespaceURI) {
    411   if (!pNode)
    412     return false;
    413 
    414   CFDE_XMLNode* pFakeRoot = pNode->GetNodeItem(CFDE_XMLNode::Root);
    415   CFX_WideString wsNSAttribute;
    416   bool bRet = false;
    417   if (wsQualifier.IsEmpty()) {
    418     wsNSAttribute = L"xmlns";
    419     bRet = true;
    420   } else {
    421     wsNSAttribute = L"xmlns:" + wsQualifier;
    422   }
    423   for (; pNode != pFakeRoot; pNode = static_cast<CFDE_XMLElement*>(
    424                                  pNode->GetNodeItem(CFDE_XMLNode::Parent))) {
    425     if (pNode->GetType() != FDE_XMLNODE_Element)
    426       continue;
    427 
    428     if (pNode->HasAttribute(wsNSAttribute.c_str())) {
    429       pNode->GetString(wsNSAttribute.c_str(), wsNamespaceURI);
    430       return true;
    431     }
    432   }
    433   wsNamespaceURI.clear();
    434   return bRet;
    435 }
    436 
    437 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket(CFDE_XMLNode* pXMLDocumentNode,
    438                                                XFA_XDPPACKET ePacketID) {
    439   switch (ePacketID) {
    440     case XFA_XDPPACKET_UNKNOWN:
    441       return nullptr;
    442     case XFA_XDPPACKET_XDP:
    443       return ParseAsXDPPacket_XDP(pXMLDocumentNode, ePacketID);
    444     case XFA_XDPPACKET_Config:
    445       return ParseAsXDPPacket_Config(pXMLDocumentNode, ePacketID);
    446     case XFA_XDPPACKET_Template:
    447     case XFA_XDPPACKET_Form:
    448       return ParseAsXDPPacket_TemplateForm(pXMLDocumentNode, ePacketID);
    449     case XFA_XDPPACKET_Datasets:
    450       return ParseAsXDPPacket_Data(pXMLDocumentNode, ePacketID);
    451     case XFA_XDPPACKET_Xdc:
    452       return ParseAsXDPPacket_Xdc(pXMLDocumentNode, ePacketID);
    453     case XFA_XDPPACKET_LocaleSet:
    454     case XFA_XDPPACKET_ConnectionSet:
    455     case XFA_XDPPACKET_SourceSet:
    456       return ParseAsXDPPacket_LocaleConnectionSourceSet(pXMLDocumentNode,
    457                                                         ePacketID);
    458     default:
    459       return ParseAsXDPPacket_User(pXMLDocumentNode, ePacketID);
    460   }
    461 }
    462 
    463 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_XDP(
    464     CFDE_XMLNode* pXMLDocumentNode,
    465     XFA_XDPPACKET ePacketID) {
    466   if (!MatchNodeName(pXMLDocumentNode,
    467                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->pName,
    468                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->pURI,
    469                      XFA_GetPacketByIndex(XFA_PACKET_XDP)->eFlags)) {
    470     return nullptr;
    471   }
    472   CXFA_Node* pXFARootNode =
    473       m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Xfa);
    474   if (!pXFARootNode)
    475     return nullptr;
    476 
    477   m_pRootNode = pXFARootNode;
    478   pXFARootNode->SetCData(XFA_ATTRIBUTE_Name, L"xfa");
    479   {
    480     CFDE_XMLElement* pElement = static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
    481     int32_t iAttributeCount = pElement->CountAttributes();
    482     for (int32_t i = 0; i < iAttributeCount; i++) {
    483       CFX_WideString wsAttriName, wsAttriValue;
    484       pElement->GetAttribute(i, wsAttriName, wsAttriValue);
    485       if (wsAttriName == L"uuid")
    486         pXFARootNode->SetCData(XFA_ATTRIBUTE_Uuid, wsAttriValue);
    487       else if (wsAttriName == L"timeStamp")
    488         pXFARootNode->SetCData(XFA_ATTRIBUTE_TimeStamp, wsAttriValue);
    489     }
    490   }
    491 
    492   CFDE_XMLNode* pXMLConfigDOMRoot = nullptr;
    493   CXFA_Node* pXFAConfigDOMRoot = nullptr;
    494   {
    495     for (CFDE_XMLNode* pChildItem =
    496              pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
    497          pChildItem;
    498          pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) {
    499       const XFA_PACKETINFO* pPacketInfo =
    500           XFA_GetPacketByIndex(XFA_PACKET_Config);
    501       if (!MatchNodeName(pChildItem, pPacketInfo->pName, pPacketInfo->pURI,
    502                          pPacketInfo->eFlags)) {
    503         continue;
    504       }
    505       if (pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) {
    506         return nullptr;
    507       }
    508       pXMLConfigDOMRoot = pChildItem;
    509       pXFAConfigDOMRoot =
    510           ParseAsXDPPacket_Config(pXMLConfigDOMRoot, XFA_XDPPACKET_Config);
    511       pXFARootNode->InsertChild(pXFAConfigDOMRoot, nullptr);
    512     }
    513   }
    514 
    515   CFDE_XMLNode* pXMLDatasetsDOMRoot = nullptr;
    516   CFDE_XMLNode* pXMLFormDOMRoot = nullptr;
    517   CFDE_XMLNode* pXMLTemplateDOMRoot = nullptr;
    518   {
    519     for (CFDE_XMLNode* pChildItem =
    520              pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::FirstChild);
    521          pChildItem;
    522          pChildItem = pChildItem->GetNodeItem(CFDE_XMLNode::NextSibling)) {
    523       if (!pChildItem || pChildItem->GetType() != FDE_XMLNODE_Element)
    524         continue;
    525       if (pChildItem == pXMLConfigDOMRoot)
    526         continue;
    527 
    528       CFDE_XMLElement* pElement =
    529           reinterpret_cast<CFDE_XMLElement*>(pChildItem);
    530       CFX_WideString wsPacketName;
    531       pElement->GetLocalTagName(wsPacketName);
    532       const XFA_PACKETINFO* pPacketInfo =
    533           GetPacketByName(wsPacketName.AsStringC());
    534       if (pPacketInfo && pPacketInfo->pURI) {
    535         if (!MatchNodeName(pElement, pPacketInfo->pName, pPacketInfo->pURI,
    536                            pPacketInfo->eFlags)) {
    537           pPacketInfo = nullptr;
    538         }
    539       }
    540       XFA_XDPPACKET ePacket =
    541           pPacketInfo ? pPacketInfo->eName : XFA_XDPPACKET_USER;
    542       if (ePacket == XFA_XDPPACKET_XDP)
    543         continue;
    544       if (ePacket == XFA_XDPPACKET_Datasets) {
    545         if (pXMLDatasetsDOMRoot)
    546           return nullptr;
    547 
    548         pXMLDatasetsDOMRoot = pElement;
    549       } else if (ePacket == XFA_XDPPACKET_Form) {
    550         if (pXMLFormDOMRoot)
    551           return nullptr;
    552 
    553         pXMLFormDOMRoot = pElement;
    554       } else if (ePacket == XFA_XDPPACKET_Template) {
    555         if (pXMLTemplateDOMRoot) {
    556           // Found a duplicate template packet.
    557           return nullptr;
    558         }
    559         CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
    560         if (pPacketNode) {
    561           pXMLTemplateDOMRoot = pElement;
    562           pXFARootNode->InsertChild(pPacketNode);
    563         }
    564       } else {
    565         CXFA_Node* pPacketNode = ParseAsXDPPacket(pElement, ePacket);
    566         if (pPacketNode) {
    567           if (pPacketInfo &&
    568               (pPacketInfo->eFlags & XFA_XDPPACKET_FLAGS_SUPPORTONE) &&
    569               pXFARootNode->GetFirstChildByName(pPacketInfo->uHash)) {
    570             return nullptr;
    571           }
    572           pXFARootNode->InsertChild(pPacketNode);
    573         }
    574       }
    575     }
    576   }
    577 
    578   if (!pXMLTemplateDOMRoot) {
    579     // No template is found.
    580     return nullptr;
    581   }
    582   if (pXMLDatasetsDOMRoot) {
    583     CXFA_Node* pPacketNode =
    584         ParseAsXDPPacket(pXMLDatasetsDOMRoot, XFA_XDPPACKET_Datasets);
    585     if (pPacketNode)
    586       pXFARootNode->InsertChild(pPacketNode);
    587   }
    588   if (pXMLFormDOMRoot) {
    589     CXFA_Node* pPacketNode =
    590         ParseAsXDPPacket(pXMLFormDOMRoot, XFA_XDPPACKET_Form);
    591     if (pPacketNode)
    592       pXFARootNode->InsertChild(pPacketNode);
    593   }
    594   pXFARootNode->SetXMLMappingNode(pXMLDocumentNode);
    595   return pXFARootNode;
    596 }
    597 
    598 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Config(
    599     CFDE_XMLNode* pXMLDocumentNode,
    600     XFA_XDPPACKET ePacketID) {
    601   if (!MatchNodeName(pXMLDocumentNode,
    602                      XFA_GetPacketByIndex(XFA_PACKET_Config)->pName,
    603                      XFA_GetPacketByIndex(XFA_PACKET_Config)->pURI,
    604                      XFA_GetPacketByIndex(XFA_PACKET_Config)->eFlags)) {
    605     return nullptr;
    606   }
    607   CXFA_Node* pNode =
    608       m_pFactory->CreateNode(XFA_XDPPACKET_Config, XFA_Element::Config);
    609   if (!pNode)
    610     return nullptr;
    611 
    612   pNode->SetCData(XFA_ATTRIBUTE_Name,
    613                   XFA_GetPacketByIndex(XFA_PACKET_Config)->pName);
    614   if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
    615     return nullptr;
    616 
    617   pNode->SetXMLMappingNode(pXMLDocumentNode);
    618   return pNode;
    619 }
    620 
    621 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_TemplateForm(
    622     CFDE_XMLNode* pXMLDocumentNode,
    623     XFA_XDPPACKET ePacketID) {
    624   CXFA_Node* pNode = nullptr;
    625   if (ePacketID == XFA_XDPPACKET_Template) {
    626     if (MatchNodeName(pXMLDocumentNode,
    627                       XFA_GetPacketByIndex(XFA_PACKET_Template)->pName,
    628                       XFA_GetPacketByIndex(XFA_PACKET_Template)->pURI,
    629                       XFA_GetPacketByIndex(XFA_PACKET_Template)->eFlags)) {
    630       pNode =
    631           m_pFactory->CreateNode(XFA_XDPPACKET_Template, XFA_Element::Template);
    632       if (!pNode)
    633         return nullptr;
    634 
    635       pNode->SetCData(XFA_ATTRIBUTE_Name,
    636                       XFA_GetPacketByIndex(XFA_PACKET_Template)->pName);
    637       if (m_bDocumentParser) {
    638         CFX_WideString wsNamespaceURI;
    639         CFDE_XMLElement* pXMLDocumentElement =
    640             static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
    641         pXMLDocumentElement->GetNamespaceURI(wsNamespaceURI);
    642         if (wsNamespaceURI.IsEmpty())
    643           pXMLDocumentElement->GetString(L"xmlns:xfa", wsNamespaceURI);
    644 
    645         pNode->GetDocument()->RecognizeXFAVersionNumber(wsNamespaceURI);
    646       }
    647       if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
    648         return nullptr;
    649     }
    650   } else if (ePacketID == XFA_XDPPACKET_Form) {
    651     if (MatchNodeName(pXMLDocumentNode,
    652                       XFA_GetPacketByIndex(XFA_PACKET_Form)->pName,
    653                       XFA_GetPacketByIndex(XFA_PACKET_Form)->pURI,
    654                       XFA_GetPacketByIndex(XFA_PACKET_Form)->eFlags)) {
    655       CFDE_XMLElement* pXMLDocumentElement =
    656           static_cast<CFDE_XMLElement*>(pXMLDocumentNode);
    657       CFX_WideString wsChecksum;
    658       pXMLDocumentElement->GetString(L"checksum", wsChecksum);
    659       if (wsChecksum.GetLength() != 28 ||
    660           m_pXMLParser->m_dwCheckStatus != 0x03) {
    661         return nullptr;
    662       }
    663       std::unique_ptr<CXFA_ChecksumContext> pChecksum(new CXFA_ChecksumContext);
    664       pChecksum->StartChecksum();
    665       pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[0],
    666                                 m_pXMLParser->m_nSize[0]);
    667       pChecksum->UpdateChecksum(m_pFileRead, m_pXMLParser->m_nStart[1],
    668                                 m_pXMLParser->m_nSize[1]);
    669       pChecksum->FinishChecksum();
    670       CFX_ByteString bsCheck = pChecksum->GetChecksum();
    671       if (bsCheck != wsChecksum.UTF8Encode())
    672         return nullptr;
    673 
    674       pNode = m_pFactory->CreateNode(XFA_XDPPACKET_Form, XFA_Element::Form);
    675       if (!pNode)
    676         return nullptr;
    677 
    678       pNode->SetCData(XFA_ATTRIBUTE_Name,
    679                       XFA_GetPacketByIndex(XFA_PACKET_Form)->pName);
    680       pNode->SetAttribute(XFA_ATTRIBUTE_Checksum, wsChecksum.AsStringC());
    681       CXFA_Node* pTemplateRoot =
    682           m_pRootNode->GetFirstChildByClass(XFA_Element::Template);
    683       CXFA_Node* pTemplateChosen =
    684           pTemplateRoot
    685               ? pTemplateRoot->GetFirstChildByClass(XFA_Element::Subform)
    686               : nullptr;
    687       bool bUseAttribute = true;
    688       if (pTemplateChosen &&
    689           pTemplateChosen->GetEnum(XFA_ATTRIBUTE_RestoreState) !=
    690               XFA_ATTRIBUTEENUM_Auto) {
    691         bUseAttribute = false;
    692       }
    693       if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, bUseAttribute))
    694         return nullptr;
    695     }
    696   }
    697   if (pNode)
    698     pNode->SetXMLMappingNode(pXMLDocumentNode);
    699 
    700   return pNode;
    701 }
    702 
    703 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Data(
    704     CFDE_XMLNode* pXMLDocumentNode,
    705     XFA_XDPPACKET ePacketID) {
    706   CFDE_XMLNode* pDatasetsXMLNode = GetDataSetsFromXDP(pXMLDocumentNode);
    707   if (pDatasetsXMLNode) {
    708     CXFA_Node* pNode =
    709         m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataModel);
    710     if (!pNode)
    711       return nullptr;
    712 
    713     pNode->SetCData(XFA_ATTRIBUTE_Name,
    714                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pName);
    715     if (!DataLoader(pNode, pDatasetsXMLNode, false))
    716       return nullptr;
    717 
    718     pNode->SetXMLMappingNode(pDatasetsXMLNode);
    719     return pNode;
    720   }
    721 
    722   CFDE_XMLNode* pDataXMLNode = nullptr;
    723   if (MatchNodeName(pXMLDocumentNode, L"data",
    724                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->pURI,
    725                     XFA_GetPacketByIndex(XFA_PACKET_Datasets)->eFlags)) {
    726     static_cast<CFDE_XMLElement*>(pXMLDocumentNode)
    727         ->RemoveAttribute(L"xmlns:xfa");
    728     pDataXMLNode = pXMLDocumentNode;
    729   } else {
    730     CFDE_XMLElement* pDataElement = new CFDE_XMLElement(L"xfa:data");
    731     CFDE_XMLNode* pParentXMLNode =
    732         pXMLDocumentNode->GetNodeItem(CFDE_XMLNode::Parent);
    733     if (pParentXMLNode)
    734       pParentXMLNode->RemoveChildNode(pXMLDocumentNode);
    735 
    736     ASSERT(pXMLDocumentNode->GetType() == FDE_XMLNODE_Element);
    737     if (pXMLDocumentNode->GetType() == FDE_XMLNODE_Element) {
    738       static_cast<CFDE_XMLElement*>(pXMLDocumentNode)
    739           ->RemoveAttribute(L"xmlns:xfa");
    740     }
    741     pDataElement->InsertChildNode(pXMLDocumentNode);
    742     pDataXMLNode = pDataElement;
    743   }
    744 
    745   if (pDataXMLNode) {
    746     CXFA_Node* pNode =
    747         m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, XFA_Element::DataGroup);
    748     if (!pNode) {
    749       if (pDataXMLNode != pXMLDocumentNode)
    750         delete pDataXMLNode;
    751       return nullptr;
    752     }
    753     CFX_WideString wsLocalName;
    754     static_cast<CFDE_XMLElement*>(pDataXMLNode)->GetLocalTagName(wsLocalName);
    755     pNode->SetCData(XFA_ATTRIBUTE_Name, wsLocalName);
    756     if (!DataLoader(pNode, pDataXMLNode, true))
    757       return nullptr;
    758 
    759     pNode->SetXMLMappingNode(pDataXMLNode);
    760     if (pDataXMLNode != pXMLDocumentNode)
    761       pNode->SetFlag(XFA_NodeFlag_OwnXMLNode, false);
    762     return pNode;
    763   }
    764   return nullptr;
    765 }
    766 
    767 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_LocaleConnectionSourceSet(
    768     CFDE_XMLNode* pXMLDocumentNode,
    769     XFA_XDPPACKET ePacketID) {
    770   CXFA_Node* pNode = nullptr;
    771   if (ePacketID == XFA_XDPPACKET_LocaleSet) {
    772     if (MatchNodeName(pXMLDocumentNode,
    773                       XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName,
    774                       XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pURI,
    775                       XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->eFlags)) {
    776       pNode = m_pFactory->CreateNode(XFA_XDPPACKET_LocaleSet,
    777                                      XFA_Element::LocaleSet);
    778       if (!pNode)
    779         return nullptr;
    780 
    781       pNode->SetCData(XFA_ATTRIBUTE_Name,
    782                       XFA_GetPacketByIndex(XFA_PACKET_LocaleSet)->pName);
    783       if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
    784         return nullptr;
    785     }
    786   } else if (ePacketID == XFA_XDPPACKET_ConnectionSet) {
    787     if (MatchNodeName(pXMLDocumentNode,
    788                       XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName,
    789                       XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pURI,
    790                       XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->eFlags)) {
    791       pNode = m_pFactory->CreateNode(XFA_XDPPACKET_ConnectionSet,
    792                                      XFA_Element::ConnectionSet);
    793       if (!pNode)
    794         return nullptr;
    795 
    796       pNode->SetCData(XFA_ATTRIBUTE_Name,
    797                       XFA_GetPacketByIndex(XFA_PACKET_ConnectionSet)->pName);
    798       if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
    799         return nullptr;
    800     }
    801   } else if (ePacketID == XFA_XDPPACKET_SourceSet) {
    802     if (MatchNodeName(pXMLDocumentNode,
    803                       XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName,
    804                       XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pURI,
    805                       XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->eFlags)) {
    806       pNode = m_pFactory->CreateNode(XFA_XDPPACKET_SourceSet,
    807                                      XFA_Element::SourceSet);
    808       if (!pNode)
    809         return nullptr;
    810 
    811       pNode->SetCData(XFA_ATTRIBUTE_Name,
    812                       XFA_GetPacketByIndex(XFA_PACKET_SourceSet)->pName);
    813       if (!NormalLoader(pNode, pXMLDocumentNode, ePacketID, true))
    814         return nullptr;
    815     }
    816   }
    817   if (pNode)
    818     pNode->SetXMLMappingNode(pXMLDocumentNode);
    819   return pNode;
    820 }
    821 
    822 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_Xdc(
    823     CFDE_XMLNode* pXMLDocumentNode,
    824     XFA_XDPPACKET ePacketID) {
    825   if (!MatchNodeName(pXMLDocumentNode,
    826                      XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName,
    827                      XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pURI,
    828                      XFA_GetPacketByIndex(XFA_PACKET_Xdc)->eFlags))
    829     return nullptr;
    830 
    831   CXFA_Node* pNode =
    832       m_pFactory->CreateNode(XFA_XDPPACKET_Xdc, XFA_Element::Xdc);
    833   if (!pNode)
    834     return nullptr;
    835 
    836   pNode->SetCData(XFA_ATTRIBUTE_Name,
    837                   XFA_GetPacketByIndex(XFA_PACKET_Xdc)->pName);
    838   pNode->SetXMLMappingNode(pXMLDocumentNode);
    839   return pNode;
    840 }
    841 
    842 CXFA_Node* CXFA_SimpleParser::ParseAsXDPPacket_User(
    843     CFDE_XMLNode* pXMLDocumentNode,
    844     XFA_XDPPACKET ePacketID) {
    845   CXFA_Node* pNode =
    846       m_pFactory->CreateNode(XFA_XDPPACKET_XDP, XFA_Element::Packet);
    847   if (!pNode)
    848     return nullptr;
    849 
    850   CFX_WideString wsName;
    851   static_cast<CFDE_XMLElement*>(pXMLDocumentNode)->GetLocalTagName(wsName);
    852   pNode->SetCData(XFA_ATTRIBUTE_Name, wsName);
    853   if (!UserPacketLoader(pNode, pXMLDocumentNode))
    854     return nullptr;
    855 
    856   pNode->SetXMLMappingNode(pXMLDocumentNode);
    857   return pNode;
    858 }
    859 
    860 CXFA_Node* CXFA_SimpleParser::UserPacketLoader(CXFA_Node* pXFANode,
    861                                                CFDE_XMLNode* pXMLDoc) {
    862   return pXFANode;
    863 }
    864 
    865 CXFA_Node* CXFA_SimpleParser::DataLoader(CXFA_Node* pXFANode,
    866                                          CFDE_XMLNode* pXMLDoc,
    867                                          bool bDoTransform) {
    868   ParseDataGroup(pXFANode, pXMLDoc, XFA_XDPPACKET_Datasets);
    869   return pXFANode;
    870 }
    871 
    872 CXFA_Node* CXFA_SimpleParser::NormalLoader(CXFA_Node* pXFANode,
    873                                            CFDE_XMLNode* pXMLDoc,
    874                                            XFA_XDPPACKET ePacketID,
    875                                            bool bUseAttribute) {
    876   bool bOneOfPropertyFound = false;
    877   for (CFDE_XMLNode* pXMLChild = pXMLDoc->GetNodeItem(CFDE_XMLNode::FirstChild);
    878        pXMLChild;
    879        pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
    880     switch (pXMLChild->GetType()) {
    881       case FDE_XMLNODE_Element: {
    882         CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLChild);
    883         CFX_WideString wsTagName;
    884         pXMLElement->GetLocalTagName(wsTagName);
    885         XFA_Element eType = XFA_GetElementTypeForName(wsTagName.AsStringC());
    886         if (eType == XFA_Element::Unknown)
    887           continue;
    888 
    889         const XFA_PROPERTY* pPropertyInfo = XFA_GetPropertyOfElement(
    890             pXFANode->GetElementType(), eType, ePacketID);
    891         if (pPropertyInfo &&
    892             ((pPropertyInfo->uFlags &
    893               (XFA_PROPERTYFLAG_OneOf | XFA_PROPERTYFLAG_DefaultOneOf)) != 0)) {
    894           if (bOneOfPropertyFound)
    895             break;
    896 
    897           bOneOfPropertyFound = true;
    898         }
    899         CXFA_Node* pXFAChild = m_pFactory->CreateNode(ePacketID, eType);
    900         if (!pXFAChild)
    901           return nullptr;
    902         if (ePacketID == XFA_XDPPACKET_Config)
    903           pXFAChild->SetAttribute(XFA_ATTRIBUTE_Name, wsTagName.AsStringC());
    904 
    905         bool IsNeedValue = true;
    906         for (int32_t i = 0, count = pXMLElement->CountAttributes(); i < count;
    907              i++) {
    908           CFX_WideString wsAttrQualifiedName;
    909           CFX_WideString wsAttrName;
    910           CFX_WideString wsAttrValue;
    911           pXMLElement->GetAttribute(i, wsAttrQualifiedName, wsAttrValue);
    912           GetAttributeLocalName(wsAttrQualifiedName.AsStringC(), wsAttrName);
    913           if (wsAttrName == L"nil" && wsAttrValue == L"true") {
    914             IsNeedValue = false;
    915           }
    916           const XFA_ATTRIBUTEINFO* lpAttrInfo =
    917               XFA_GetAttributeByName(wsAttrName.AsStringC());
    918           if (!lpAttrInfo)
    919             continue;
    920 
    921           if (!bUseAttribute && lpAttrInfo->eName != XFA_ATTRIBUTE_Name &&
    922               lpAttrInfo->eName != XFA_ATTRIBUTE_Save) {
    923             continue;
    924           }
    925           pXFAChild->SetAttribute(lpAttrInfo->eName, wsAttrValue.AsStringC());
    926         }
    927         pXFANode->InsertChild(pXFAChild);
    928         if (eType == XFA_Element::Validate || eType == XFA_Element::Locale) {
    929           if (ePacketID == XFA_XDPPACKET_Config)
    930             ParseContentNode(pXFAChild, pXMLElement, ePacketID);
    931           else
    932             NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
    933 
    934           break;
    935         }
    936         switch (pXFAChild->GetObjectType()) {
    937           case XFA_ObjectType::ContentNode:
    938           case XFA_ObjectType::TextNode:
    939           case XFA_ObjectType::NodeC:
    940           case XFA_ObjectType::NodeV:
    941             if (IsNeedValue)
    942               ParseContentNode(pXFAChild, pXMLElement, ePacketID);
    943             break;
    944           default:
    945             NormalLoader(pXFAChild, pXMLElement, ePacketID, bUseAttribute);
    946             break;
    947         }
    948       } break;
    949       case FDE_XMLNODE_Instruction:
    950         ParseInstruction(pXFANode, static_cast<CFDE_XMLInstruction*>(pXMLChild),
    951                          ePacketID);
    952         break;
    953       default:
    954         break;
    955     }
    956   }
    957   return pXFANode;
    958 }
    959 
    960 void CXFA_SimpleParser::ParseContentNode(CXFA_Node* pXFANode,
    961                                          CFDE_XMLNode* pXMLNode,
    962                                          XFA_XDPPACKET ePacketID) {
    963   XFA_Element element = XFA_Element::Sharptext;
    964   if (pXFANode->GetElementType() == XFA_Element::ExData) {
    965     CFX_WideStringC wsContentType =
    966         pXFANode->GetCData(XFA_ATTRIBUTE_ContentType);
    967     if (wsContentType == L"text/html")
    968       element = XFA_Element::SharpxHTML;
    969     else if (wsContentType == L"text/xml")
    970       element = XFA_Element::Sharpxml;
    971   }
    972   if (element == XFA_Element::SharpxHTML)
    973     pXFANode->SetXMLMappingNode(pXMLNode);
    974 
    975   CFX_WideString wsValue;
    976   for (CFDE_XMLNode* pXMLChild =
    977            pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
    978        pXMLChild;
    979        pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
    980     FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
    981     if (eNodeType == FDE_XMLNODE_Instruction)
    982       continue;
    983 
    984     if (element == XFA_Element::SharpxHTML) {
    985       if (eNodeType != FDE_XMLNODE_Element)
    986         break;
    987 
    988       if (XFA_RecognizeRichText(static_cast<CFDE_XMLElement*>(pXMLChild)))
    989         XFA_GetPlainTextFromRichText(static_cast<CFDE_XMLElement*>(pXMLChild),
    990                                      wsValue);
    991     } else if (element == XFA_Element::Sharpxml) {
    992       if (eNodeType != FDE_XMLNODE_Element)
    993         break;
    994 
    995       ConvertXMLToPlainText(static_cast<CFDE_XMLElement*>(pXMLChild), wsValue);
    996     } else {
    997       if (eNodeType == FDE_XMLNODE_Element)
    998         break;
    999       if (eNodeType == FDE_XMLNODE_Text)
   1000         static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsValue);
   1001       else if (eNodeType == FDE_XMLNODE_CharData)
   1002         static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsValue);
   1003     }
   1004     break;
   1005   }
   1006   if (!wsValue.IsEmpty()) {
   1007     if (pXFANode->IsContentNode()) {
   1008       CXFA_Node* pContentRawDataNode =
   1009           m_pFactory->CreateNode(ePacketID, element);
   1010       ASSERT(pContentRawDataNode);
   1011       pContentRawDataNode->SetCData(XFA_ATTRIBUTE_Value, wsValue);
   1012       pXFANode->InsertChild(pContentRawDataNode);
   1013     } else {
   1014       pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsValue);
   1015     }
   1016   }
   1017 }
   1018 
   1019 void CXFA_SimpleParser::ParseDataGroup(CXFA_Node* pXFANode,
   1020                                        CFDE_XMLNode* pXMLNode,
   1021                                        XFA_XDPPACKET ePacketID) {
   1022   for (CFDE_XMLNode* pXMLChild =
   1023            pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
   1024        pXMLChild;
   1025        pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
   1026     switch (pXMLChild->GetType()) {
   1027       case FDE_XMLNODE_Element: {
   1028         CFDE_XMLElement* pXMLElement = static_cast<CFDE_XMLElement*>(pXMLChild);
   1029         {
   1030           CFX_WideString wsNamespaceURI;
   1031           GetElementTagNamespaceURI(pXMLElement, wsNamespaceURI);
   1032           if (wsNamespaceURI == L"http://www.xfa.com/schema/xfa-package/" ||
   1033               wsNamespaceURI == L"http://www.xfa.org/schema/xfa-package/" ||
   1034               wsNamespaceURI == L"http://www.w3.org/2001/XMLSchema-instance") {
   1035             continue;
   1036           }
   1037         }
   1038 
   1039         XFA_Element eNodeType = XFA_Element::DataModel;
   1040         if (eNodeType == XFA_Element::DataModel) {
   1041           CFX_WideString wsDataNodeAttr;
   1042           if (FindAttributeWithNS(pXMLElement, L"dataNode",
   1043                                   L"http://www.xfa.org/schema/xfa-data/1.0/",
   1044                                   wsDataNodeAttr)) {
   1045             if (wsDataNodeAttr == L"dataGroup")
   1046               eNodeType = XFA_Element::DataGroup;
   1047             else if (wsDataNodeAttr == L"dataValue")
   1048               eNodeType = XFA_Element::DataValue;
   1049           }
   1050         }
   1051         CFX_WideString wsContentType;
   1052         if (eNodeType == XFA_Element::DataModel) {
   1053           if (FindAttributeWithNS(pXMLElement, L"contentType",
   1054                                   L"http://www.xfa.org/schema/xfa-data/1.0/",
   1055                                   wsContentType)) {
   1056             if (!wsContentType.IsEmpty())
   1057               eNodeType = XFA_Element::DataValue;
   1058           }
   1059         }
   1060         if (eNodeType == XFA_Element::DataModel) {
   1061           for (CFDE_XMLNode* pXMLDataChild =
   1062                    pXMLElement->GetNodeItem(CFDE_XMLNode::FirstChild);
   1063                pXMLDataChild; pXMLDataChild = pXMLDataChild->GetNodeItem(
   1064                                   CFDE_XMLNode::NextSibling)) {
   1065             if (pXMLDataChild->GetType() == FDE_XMLNODE_Element) {
   1066               if (!XFA_RecognizeRichText(
   1067                       static_cast<CFDE_XMLElement*>(pXMLDataChild))) {
   1068                 eNodeType = XFA_Element::DataGroup;
   1069                 break;
   1070               }
   1071             }
   1072           }
   1073         }
   1074         if (eNodeType == XFA_Element::DataModel)
   1075           eNodeType = XFA_Element::DataValue;
   1076 
   1077         CXFA_Node* pXFAChild =
   1078             m_pFactory->CreateNode(XFA_XDPPACKET_Datasets, eNodeType);
   1079         if (!pXFAChild)
   1080           return;
   1081 
   1082         CFX_WideString wsNodeName;
   1083         pXMLElement->GetLocalTagName(wsNodeName);
   1084         pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeName);
   1085         bool bNeedValue = true;
   1086         for (int32_t i = 0; i < pXMLElement->CountAttributes(); ++i) {
   1087           CFX_WideString wsQualifiedName;
   1088           CFX_WideString wsValue;
   1089           CFX_WideString wsName;
   1090           CFX_WideString wsNS;
   1091           pXMLElement->GetAttribute(i, wsQualifiedName, wsValue);
   1092           if (!ResolveAttribute(pXMLElement, wsQualifiedName.AsStringC(),
   1093                                 wsName, wsNS)) {
   1094             continue;
   1095           }
   1096           if (wsName == L"nil" && wsValue == L"true") {
   1097             bNeedValue = false;
   1098             continue;
   1099           }
   1100           if (wsNS == L"http://www.xfa.com/schema/xfa-package/" ||
   1101               wsNS == L"http://www.xfa.org/schema/xfa-package/" ||
   1102               wsNS == L"http://www.w3.org/2001/XMLSchema-instance" ||
   1103               wsNS == L"http://www.xfa.org/schema/xfa-data/1.0/") {
   1104             continue;
   1105           }
   1106           CXFA_Node* pXFAMetaData = m_pFactory->CreateNode(
   1107               XFA_XDPPACKET_Datasets, XFA_Element::DataValue);
   1108           if (!pXFAMetaData)
   1109             return;
   1110 
   1111           pXFAMetaData->SetCData(XFA_ATTRIBUTE_Name, wsName);
   1112           pXFAMetaData->SetCData(XFA_ATTRIBUTE_QualifiedName, wsQualifiedName);
   1113           pXFAMetaData->SetCData(XFA_ATTRIBUTE_Value, wsValue);
   1114           pXFAMetaData->SetEnum(XFA_ATTRIBUTE_Contains,
   1115                                 XFA_ATTRIBUTEENUM_MetaData);
   1116           pXFAChild->InsertChild(pXFAMetaData);
   1117           pXFAMetaData->SetXMLMappingNode(pXMLElement);
   1118           pXFAMetaData->SetFlag(XFA_NodeFlag_Initialized, false);
   1119         }
   1120 
   1121         if (!bNeedValue) {
   1122           CFX_WideString wsNilName(L"xsi:nil");
   1123           pXMLElement->RemoveAttribute(wsNilName.c_str());
   1124         }
   1125         pXFANode->InsertChild(pXFAChild);
   1126         if (eNodeType == XFA_Element::DataGroup)
   1127           ParseDataGroup(pXFAChild, pXMLElement, ePacketID);
   1128         else if (bNeedValue)
   1129           ParseDataValue(pXFAChild, pXMLChild, XFA_XDPPACKET_Datasets);
   1130 
   1131         pXFAChild->SetXMLMappingNode(pXMLElement);
   1132         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
   1133         continue;
   1134       }
   1135       case FDE_XMLNODE_CharData: {
   1136         CFDE_XMLCharData* pXMLCharData =
   1137             static_cast<CFDE_XMLCharData*>(pXMLChild);
   1138         CFX_WideString wsCharData;
   1139         pXMLCharData->GetCharData(wsCharData);
   1140         if (IsStringAllWhitespace(wsCharData))
   1141           continue;
   1142 
   1143         CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
   1144                                                       XFA_Element::DataValue);
   1145         if (!pXFAChild)
   1146           return;
   1147 
   1148         pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCharData);
   1149         pXFANode->InsertChild(pXFAChild);
   1150         pXFAChild->SetXMLMappingNode(pXMLCharData);
   1151         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
   1152         continue;
   1153       }
   1154       case FDE_XMLNODE_Text: {
   1155         CFDE_XMLText* pXMLText = static_cast<CFDE_XMLText*>(pXMLChild);
   1156         CFX_WideString wsText;
   1157         pXMLText->GetText(wsText);
   1158         if (IsStringAllWhitespace(wsText))
   1159           continue;
   1160 
   1161         CXFA_Node* pXFAChild = m_pFactory->CreateNode(XFA_XDPPACKET_Datasets,
   1162                                                       XFA_Element::DataValue);
   1163         if (!pXFAChild)
   1164           return;
   1165 
   1166         pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsText);
   1167         pXFANode->InsertChild(pXFAChild);
   1168         pXFAChild->SetXMLMappingNode(pXMLText);
   1169         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
   1170         continue;
   1171       }
   1172       default:
   1173         continue;
   1174     }
   1175   }
   1176 }
   1177 
   1178 void CXFA_SimpleParser::ParseDataValue(CXFA_Node* pXFANode,
   1179                                        CFDE_XMLNode* pXMLNode,
   1180                                        XFA_XDPPACKET ePacketID) {
   1181   CFX_WideTextBuf wsValueTextBuf;
   1182   CFX_WideTextBuf wsCurValueTextBuf;
   1183   bool bMarkAsCompound = false;
   1184   CFDE_XMLNode* pXMLCurValueNode = nullptr;
   1185   for (CFDE_XMLNode* pXMLChild =
   1186            pXMLNode->GetNodeItem(CFDE_XMLNode::FirstChild);
   1187        pXMLChild;
   1188        pXMLChild = pXMLChild->GetNodeItem(CFDE_XMLNode::NextSibling)) {
   1189     FDE_XMLNODETYPE eNodeType = pXMLChild->GetType();
   1190     if (eNodeType == FDE_XMLNODE_Instruction)
   1191       continue;
   1192 
   1193     CFX_WideString wsText;
   1194     if (eNodeType == FDE_XMLNODE_Text) {
   1195       static_cast<CFDE_XMLText*>(pXMLChild)->GetText(wsText);
   1196       if (!pXMLCurValueNode)
   1197         pXMLCurValueNode = pXMLChild;
   1198 
   1199       wsCurValueTextBuf << wsText;
   1200     } else if (eNodeType == FDE_XMLNODE_CharData) {
   1201       static_cast<CFDE_XMLCharData*>(pXMLChild)->GetCharData(wsText);
   1202       if (!pXMLCurValueNode)
   1203         pXMLCurValueNode = pXMLChild;
   1204 
   1205       wsCurValueTextBuf << wsText;
   1206     } else if (XFA_RecognizeRichText(
   1207                    static_cast<CFDE_XMLElement*>(pXMLChild))) {
   1208       XFA_GetPlainTextFromRichText(static_cast<CFDE_XMLElement*>(pXMLChild),
   1209                                    wsText);
   1210       if (!pXMLCurValueNode)
   1211         pXMLCurValueNode = pXMLChild;
   1212 
   1213       wsCurValueTextBuf << wsText;
   1214     } else {
   1215       bMarkAsCompound = true;
   1216       if (pXMLCurValueNode) {
   1217         CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString();
   1218         if (!wsCurValue.IsEmpty()) {
   1219           CXFA_Node* pXFAChild =
   1220               m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
   1221           if (!pXFAChild)
   1222             return;
   1223 
   1224           pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L"");
   1225           pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue);
   1226           pXFANode->InsertChild(pXFAChild);
   1227           pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
   1228           pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
   1229           wsValueTextBuf << wsCurValue;
   1230           wsCurValueTextBuf.Clear();
   1231         }
   1232         pXMLCurValueNode = nullptr;
   1233       }
   1234       CXFA_Node* pXFAChild =
   1235           m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
   1236       if (!pXFAChild)
   1237         return;
   1238 
   1239       CFX_WideString wsNodeStr;
   1240       static_cast<CFDE_XMLElement*>(pXMLChild)->GetLocalTagName(wsNodeStr);
   1241       pXFAChild->SetCData(XFA_ATTRIBUTE_Name, wsNodeStr);
   1242       ParseDataValue(pXFAChild, pXMLChild, ePacketID);
   1243       pXFANode->InsertChild(pXFAChild);
   1244       pXFAChild->SetXMLMappingNode(pXMLChild);
   1245       pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
   1246       CFX_WideStringC wsCurValue = pXFAChild->GetCData(XFA_ATTRIBUTE_Value);
   1247       wsValueTextBuf << wsCurValue;
   1248     }
   1249   }
   1250   if (pXMLCurValueNode) {
   1251     CFX_WideString wsCurValue = wsCurValueTextBuf.MakeString();
   1252     if (!wsCurValue.IsEmpty()) {
   1253       if (bMarkAsCompound) {
   1254         CXFA_Node* pXFAChild =
   1255             m_pFactory->CreateNode(ePacketID, XFA_Element::DataValue);
   1256         if (!pXFAChild)
   1257           return;
   1258 
   1259         pXFAChild->SetCData(XFA_ATTRIBUTE_Name, L"");
   1260         pXFAChild->SetCData(XFA_ATTRIBUTE_Value, wsCurValue);
   1261         pXFANode->InsertChild(pXFAChild);
   1262         pXFAChild->SetXMLMappingNode(pXMLCurValueNode);
   1263         pXFAChild->SetFlag(XFA_NodeFlag_Initialized, false);
   1264       }
   1265       wsValueTextBuf << wsCurValue;
   1266       wsCurValueTextBuf.Clear();
   1267     }
   1268     pXMLCurValueNode = nullptr;
   1269   }
   1270   CFX_WideString wsNodeValue = wsValueTextBuf.MakeString();
   1271   pXFANode->SetCData(XFA_ATTRIBUTE_Value, wsNodeValue);
   1272 }
   1273 
   1274 void CXFA_SimpleParser::ParseInstruction(CXFA_Node* pXFANode,
   1275                                          CFDE_XMLInstruction* pXMLInstruction,
   1276                                          XFA_XDPPACKET ePacketID) {
   1277   if (!m_bDocumentParser)
   1278     return;
   1279 
   1280   CFX_WideString wsTargetName;
   1281   pXMLInstruction->GetTargetName(wsTargetName);
   1282   if (wsTargetName == L"originalXFAVersion") {
   1283     CFX_WideString wsData;
   1284     if (pXMLInstruction->GetData(0, wsData) &&
   1285         (pXFANode->GetDocument()->RecognizeXFAVersionNumber(wsData) !=
   1286          XFA_VERSION_UNKNOWN)) {
   1287       wsData.clear();
   1288       if (pXMLInstruction->GetData(1, wsData) &&
   1289           wsData == L"v2.7-scripting:1") {
   1290         pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_Scripting, true);
   1291       }
   1292     }
   1293   } else if (wsTargetName == L"acrobat") {
   1294     CFX_WideString wsData;
   1295     if (pXMLInstruction->GetData(0, wsData) && wsData == L"JavaScript") {
   1296       if (pXMLInstruction->GetData(1, wsData) && wsData == L"strictScoping") {
   1297         pXFANode->GetDocument()->SetFlag(XFA_DOCFLAG_StrictScoping, true);
   1298       }
   1299     }
   1300   }
   1301 }
   1302 
   1303 void CXFA_SimpleParser::CloseParser() {
   1304   m_pXMLDoc.reset();
   1305   m_pStream.Reset();
   1306 }
   1307