1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "xfa/fxfa/parser/xfa_utils.h" 8 9 #include <algorithm> 10 #include <vector> 11 12 #include "core/fxcrt/cfx_memorystream.h" 13 #include "core/fxcrt/cfx_widetextbuf.h" 14 #include "core/fxcrt/fx_codepage.h" 15 #include "core/fxcrt/fx_extension.h" 16 #include "core/fxcrt/xml/cfx_xmlchardata.h" 17 #include "core/fxcrt/xml/cfx_xmlelement.h" 18 #include "core/fxcrt/xml/cfx_xmlnode.h" 19 #include "core/fxcrt/xml/cfx_xmltext.h" 20 #include "fxjs/xfa/cjx_object.h" 21 #include "xfa/fxfa/parser/cxfa_document.h" 22 #include "xfa/fxfa/parser/cxfa_localemgr.h" 23 #include "xfa/fxfa/parser/cxfa_localevalue.h" 24 #include "xfa/fxfa/parser/cxfa_measurement.h" 25 #include "xfa/fxfa/parser/cxfa_node.h" 26 #include "xfa/fxfa/parser/cxfa_ui.h" 27 #include "xfa/fxfa/parser/cxfa_value.h" 28 #include "xfa/fxfa/parser/xfa_basic_data.h" 29 30 namespace { 31 32 constexpr const wchar_t kFormNS[] = L"http://www.xfa.org/schema/xfa-form/"; 33 34 const double fraction_scales[] = {0.1, 35 0.01, 36 0.001, 37 0.0001, 38 0.00001, 39 0.000001, 40 0.0000001, 41 0.00000001, 42 0.000000001, 43 0.0000000001, 44 0.00000000001, 45 0.000000000001, 46 0.0000000000001, 47 0.00000000000001, 48 0.000000000000001, 49 0.0000000000000001}; 50 51 WideString ExportEncodeAttribute(const WideString& str) { 52 CFX_WideTextBuf textBuf; 53 int32_t iLen = str.GetLength(); 54 for (int32_t i = 0; i < iLen; i++) { 55 switch (str[i]) { 56 case '&': 57 textBuf << L"&"; 58 break; 59 case '<': 60 textBuf << L"<"; 61 break; 62 case '>': 63 textBuf << L">"; 64 break; 65 case '\'': 66 textBuf << L"'"; 67 break; 68 case '\"': 69 textBuf << L"""; 70 break; 71 default: 72 textBuf.AppendChar(str[i]); 73 } 74 } 75 return textBuf.MakeString(); 76 } 77 78 bool IsXMLValidChar(wchar_t ch) { 79 return ch == 0x09 || ch == 0x0A || ch == 0x0D || 80 (ch >= 0x20 && ch <= 0xD7FF) || (ch >= 0xE000 && ch <= 0xFFFD); 81 } 82 83 WideString ExportEncodeContent(const WideString& str) { 84 CFX_WideTextBuf textBuf; 85 int32_t iLen = str.GetLength(); 86 for (int32_t i = 0; i < iLen; i++) { 87 wchar_t ch = str[i]; 88 if (!IsXMLValidChar(ch)) 89 continue; 90 91 if (ch == '&') { 92 textBuf << L"&"; 93 } else if (ch == '<') { 94 textBuf << L"<"; 95 } else if (ch == '>') { 96 textBuf << L">"; 97 } else if (ch == '\'') { 98 textBuf << L"'"; 99 } else if (ch == '\"') { 100 textBuf << L"""; 101 } else if (ch == ' ') { 102 if (i && str[i - 1] != ' ') { 103 textBuf.AppendChar(' '); 104 } else { 105 textBuf << L" "; 106 } 107 } else { 108 textBuf.AppendChar(str[i]); 109 } 110 } 111 return textBuf.MakeString(); 112 } 113 114 bool AttributeSaveInDataModel(CXFA_Node* pNode, XFA_Attribute eAttribute) { 115 bool bSaveInDataModel = false; 116 if (pNode->GetElementType() != XFA_Element::Image) 117 return bSaveInDataModel; 118 119 CXFA_Node* pValueNode = pNode->GetParent(); 120 if (!pValueNode || pValueNode->GetElementType() != XFA_Element::Value) 121 return bSaveInDataModel; 122 123 CXFA_Node* pFieldNode = pValueNode->GetParent(); 124 if (pFieldNode && pFieldNode->GetBindData() && 125 eAttribute == XFA_Attribute::Href) { 126 bSaveInDataModel = true; 127 } 128 return bSaveInDataModel; 129 } 130 131 bool ContentNodeNeedtoExport(CXFA_Node* pContentNode) { 132 Optional<WideString> wsContent = 133 pContentNode->JSObject()->TryContent(false, false); 134 if (!wsContent) 135 return false; 136 137 ASSERT(pContentNode->IsContentNode()); 138 CXFA_Node* pParentNode = pContentNode->GetParent(); 139 if (!pParentNode || pParentNode->GetElementType() != XFA_Element::Value) 140 return true; 141 142 CXFA_Node* pGrandParentNode = pParentNode->GetParent(); 143 if (!pGrandParentNode || !pGrandParentNode->IsContainerNode()) 144 return true; 145 if (pGrandParentNode->GetBindData()) 146 return false; 147 148 XFA_Element eUIType = pGrandParentNode->GetWidgetAcc()->GetUIType(); 149 if (eUIType == XFA_Element::PasswordEdit) 150 return false; 151 return true; 152 } 153 154 void SaveAttribute(CXFA_Node* pNode, 155 XFA_Attribute eName, 156 const WideString& wsName, 157 bool bProto, 158 WideString& wsOutput) { 159 if (!bProto && !pNode->JSObject()->HasAttribute(eName)) 160 return; 161 162 Optional<WideString> value = pNode->JSObject()->TryAttribute(eName, false); 163 if (!value) 164 return; 165 166 wsOutput += L" "; 167 wsOutput += wsName; 168 wsOutput += L"=\""; 169 wsOutput += ExportEncodeAttribute(*value); 170 wsOutput += L"\""; 171 } 172 173 void RegenerateFormFile_Changed(CXFA_Node* pNode, 174 CFX_WideTextBuf& buf, 175 bool bSaveXML) { 176 WideString wsAttrs; 177 for (size_t i = 0;; ++i) { 178 XFA_Attribute attr = pNode->GetAttribute(i); 179 if (attr == XFA_Attribute::Unknown) 180 break; 181 182 if (attr == XFA_Attribute::Name || 183 (AttributeSaveInDataModel(pNode, attr) && !bSaveXML)) { 184 continue; 185 } 186 WideString wsAttr; 187 SaveAttribute(pNode, attr, CXFA_Node::AttributeToName(attr), bSaveXML, 188 wsAttr); 189 wsAttrs += wsAttr; 190 } 191 192 WideString wsChildren; 193 switch (pNode->GetObjectType()) { 194 case XFA_ObjectType::ContentNode: { 195 if (!bSaveXML && !ContentNodeNeedtoExport(pNode)) 196 break; 197 198 CXFA_Node* pRawValueNode = pNode->GetFirstChild(); 199 while (pRawValueNode && 200 pRawValueNode->GetElementType() != XFA_Element::SharpxHTML && 201 pRawValueNode->GetElementType() != XFA_Element::Sharptext && 202 pRawValueNode->GetElementType() != XFA_Element::Sharpxml) { 203 pRawValueNode = pRawValueNode->GetNextSibling(); 204 } 205 if (!pRawValueNode) 206 break; 207 208 Optional<WideString> contentType = 209 pNode->JSObject()->TryAttribute(XFA_Attribute::ContentType, false); 210 if (pRawValueNode->GetElementType() == XFA_Element::SharpxHTML && 211 (contentType && *contentType == L"text/html")) { 212 CFX_XMLNode* pExDataXML = pNode->GetXMLMappingNode(); 213 if (!pExDataXML) 214 break; 215 216 CFX_XMLNode* pRichTextXML = 217 pExDataXML->GetNodeItem(CFX_XMLNode::FirstChild); 218 if (!pRichTextXML) 219 break; 220 221 auto pMemStream = pdfium::MakeRetain<CFX_MemoryStream>(true); 222 auto pTempStream = 223 pdfium::MakeRetain<CFX_SeekableStreamProxy>(pMemStream, true); 224 225 pTempStream->SetCodePage(FX_CODEPAGE_UTF8); 226 pRichTextXML->SaveXMLNode(pTempStream); 227 wsChildren += WideString::FromUTF8( 228 ByteStringView(pMemStream->GetBuffer(), pMemStream->GetSize())); 229 } else if (pRawValueNode->GetElementType() == XFA_Element::Sharpxml && 230 (contentType && *contentType == L"text/xml")) { 231 Optional<WideString> rawValue = pRawValueNode->JSObject()->TryAttribute( 232 XFA_Attribute::Value, false); 233 if (!rawValue || rawValue->IsEmpty()) 234 break; 235 236 std::vector<WideString> wsSelTextArray; 237 size_t iStart = 0; 238 auto iEnd = rawValue->Find(L'\n', iStart); 239 iEnd = !iEnd.has_value() ? rawValue->GetLength() : iEnd; 240 while (iEnd.has_value() && iEnd >= iStart) { 241 wsSelTextArray.push_back( 242 rawValue->Mid(iStart, iEnd.value() - iStart)); 243 iStart = iEnd.value() + 1; 244 if (iStart >= rawValue->GetLength()) 245 break; 246 iEnd = rawValue->Find(L'\n', iStart); 247 } 248 249 CXFA_Node* pParentNode = pNode->GetParent(); 250 ASSERT(pParentNode); 251 CXFA_Node* pGrandparentNode = pParentNode->GetParent(); 252 ASSERT(pGrandparentNode); 253 WideString bodyTagName; 254 bodyTagName = 255 pGrandparentNode->JSObject()->GetCData(XFA_Attribute::Name); 256 if (bodyTagName.IsEmpty()) 257 bodyTagName = L"ListBox1"; 258 259 buf << L"<"; 260 buf << bodyTagName; 261 buf << L" xmlns=\"\"\n>"; 262 for (int32_t i = 0; i < pdfium::CollectionSize<int32_t>(wsSelTextArray); 263 i++) { 264 buf << L"<value\n>"; 265 buf << ExportEncodeContent(wsSelTextArray[i]); 266 buf << L"</value\n>"; 267 } 268 buf << L"</"; 269 buf << bodyTagName; 270 buf << L"\n>"; 271 wsChildren += buf.AsStringView(); 272 buf.Clear(); 273 } else { 274 WideString wsValue = 275 pRawValueNode->JSObject()->GetCData(XFA_Attribute::Value); 276 wsChildren += ExportEncodeContent(wsValue); 277 } 278 break; 279 } 280 case XFA_ObjectType::TextNode: 281 case XFA_ObjectType::NodeC: 282 case XFA_ObjectType::NodeV: { 283 WideString wsValue = pNode->JSObject()->GetCData(XFA_Attribute::Value); 284 wsChildren += ExportEncodeContent(wsValue); 285 break; 286 } 287 default: 288 if (pNode->GetElementType() == XFA_Element::Items) { 289 CXFA_Node* pTemplateNode = pNode->GetTemplateNodeIfExists(); 290 if (!pTemplateNode || 291 pTemplateNode->CountChildren(XFA_Element::Unknown, false) != 292 pNode->CountChildren(XFA_Element::Unknown, false)) { 293 bSaveXML = true; 294 } 295 } 296 CFX_WideTextBuf newBuf; 297 CXFA_Node* pChildNode = pNode->GetFirstChild(); 298 while (pChildNode) { 299 RegenerateFormFile_Changed(pChildNode, newBuf, bSaveXML); 300 wsChildren += newBuf.AsStringView(); 301 newBuf.Clear(); 302 pChildNode = pChildNode->GetNextSibling(); 303 } 304 if (!bSaveXML && !wsChildren.IsEmpty() && 305 pNode->GetElementType() == XFA_Element::Items) { 306 wsChildren.clear(); 307 bSaveXML = true; 308 CXFA_Node* pChild = pNode->GetFirstChild(); 309 while (pChild) { 310 RegenerateFormFile_Changed(pChild, newBuf, bSaveXML); 311 wsChildren += newBuf.AsStringView(); 312 newBuf.Clear(); 313 pChild = pChild->GetNextSibling(); 314 } 315 } 316 break; 317 } 318 319 if (!wsChildren.IsEmpty() || !wsAttrs.IsEmpty() || 320 pNode->JSObject()->HasAttribute(XFA_Attribute::Name)) { 321 WideStringView wsElement = pNode->GetClassName(); 322 WideString wsName; 323 SaveAttribute(pNode, XFA_Attribute::Name, L"name", true, wsName); 324 buf << L"<"; 325 buf << wsElement; 326 buf << wsName; 327 buf << wsAttrs; 328 if (wsChildren.IsEmpty()) { 329 buf << L"\n/>"; 330 } else { 331 buf << L"\n>"; 332 buf << wsChildren; 333 buf << L"</"; 334 buf << wsElement; 335 buf << L"\n>"; 336 } 337 } 338 } 339 340 void RegenerateFormFile_Container( 341 CXFA_Node* pNode, 342 const RetainPtr<CFX_SeekableStreamProxy>& pStream, 343 bool bSaveXML) { 344 XFA_Element eType = pNode->GetElementType(); 345 if (eType == XFA_Element::Field || eType == XFA_Element::Draw || 346 !pNode->IsContainerNode()) { 347 CFX_WideTextBuf buf; 348 RegenerateFormFile_Changed(pNode, buf, bSaveXML); 349 size_t nLen = buf.GetLength(); 350 if (nLen > 0) 351 pStream->WriteString(buf.AsStringView()); 352 return; 353 } 354 355 WideStringView wsElement(pNode->GetClassName()); 356 pStream->WriteString(L"<"); 357 pStream->WriteString(wsElement); 358 359 WideString wsOutput; 360 SaveAttribute(pNode, XFA_Attribute::Name, L"name", true, wsOutput); 361 362 WideString wsAttrs; 363 for (size_t i = 0;; ++i) { 364 XFA_Attribute attr = pNode->GetAttribute(i); 365 if (attr == XFA_Attribute::Unknown) 366 break; 367 if (attr == XFA_Attribute::Name) 368 continue; 369 370 WideString wsAttr; 371 SaveAttribute(pNode, attr, CXFA_Node::AttributeToName(attr), false, wsAttr); 372 wsOutput += wsAttr; 373 } 374 375 if (!wsOutput.IsEmpty()) 376 pStream->WriteString(wsOutput.AsStringView()); 377 378 CXFA_Node* pChildNode = pNode->GetFirstChild(); 379 if (pChildNode) { 380 pStream->WriteString(L"\n>"); 381 while (pChildNode) { 382 RegenerateFormFile_Container(pChildNode, pStream, bSaveXML); 383 pChildNode = pChildNode->GetNextSibling(); 384 } 385 pStream->WriteString(L"</"); 386 pStream->WriteString(wsElement); 387 pStream->WriteString(L"\n>"); 388 } else { 389 pStream->WriteString(L"\n/>"); 390 } 391 } 392 393 WideString RecognizeXFAVersionNumber(CXFA_Node* pTemplateRoot) { 394 if (!pTemplateRoot) 395 return WideString(); 396 397 Optional<WideString> templateNS = pTemplateRoot->JSObject()->TryNamespace(); 398 if (!templateNS) 399 return WideString(); 400 401 XFA_VERSION eVersion = 402 pTemplateRoot->GetDocument()->RecognizeXFAVersionNumber(*templateNS); 403 if (eVersion == XFA_VERSION_UNKNOWN) 404 eVersion = XFA_VERSION_DEFAULT; 405 406 return WideString::Format(L"%i.%i", eVersion / 100, eVersion % 100); 407 } 408 409 } // namespace 410 411 double XFA_GetFractionalScale(uint32_t idx) { 412 return fraction_scales[idx]; 413 } 414 415 int XFA_GetMaxFractionalScale() { 416 return FX_ArraySize(fraction_scales); 417 } 418 419 CXFA_LocaleValue XFA_GetLocaleValue(CXFA_Node* pNode) { 420 CXFA_Value* pNodeValue = 421 pNode->GetChild<CXFA_Value>(0, XFA_Element::Value, false); 422 if (!pNodeValue) 423 return CXFA_LocaleValue(); 424 425 CXFA_Node* pValueChild = pNodeValue->GetFirstChild(); 426 if (!pValueChild) 427 return CXFA_LocaleValue(); 428 429 int32_t iVTType = XFA_VT_NULL; 430 switch (pValueChild->GetElementType()) { 431 case XFA_Element::Decimal: 432 iVTType = XFA_VT_DECIMAL; 433 break; 434 case XFA_Element::Float: 435 iVTType = XFA_VT_FLOAT; 436 break; 437 case XFA_Element::Date: 438 iVTType = XFA_VT_DATE; 439 break; 440 case XFA_Element::Time: 441 iVTType = XFA_VT_TIME; 442 break; 443 case XFA_Element::DateTime: 444 iVTType = XFA_VT_DATETIME; 445 break; 446 case XFA_Element::Boolean: 447 iVTType = XFA_VT_BOOLEAN; 448 break; 449 case XFA_Element::Integer: 450 iVTType = XFA_VT_INTEGER; 451 break; 452 case XFA_Element::Text: 453 iVTType = XFA_VT_TEXT; 454 break; 455 default: 456 iVTType = XFA_VT_NULL; 457 break; 458 } 459 return CXFA_LocaleValue(iVTType, pNode->GetRawValue(), 460 pNode->GetDocument()->GetLocalMgr()); 461 } 462 463 bool XFA_FDEExtension_ResolveNamespaceQualifier(CFX_XMLElement* pNode, 464 const WideString& wsQualifier, 465 WideString* wsNamespaceURI) { 466 if (!pNode) 467 return false; 468 469 CFX_XMLNode* pFakeRoot = pNode->GetNodeItem(CFX_XMLNode::Root); 470 WideString wsNSAttribute; 471 bool bRet = false; 472 if (wsQualifier.IsEmpty()) { 473 wsNSAttribute = L"xmlns"; 474 bRet = true; 475 } else { 476 wsNSAttribute = L"xmlns:" + wsQualifier; 477 } 478 for (CFX_XMLNode* pParent = pNode; pParent != pFakeRoot; 479 pParent = pParent->GetNodeItem(CFX_XMLNode::Parent)) { 480 if (pParent->GetType() != FX_XMLNODE_Element) 481 continue; 482 483 auto* pElement = static_cast<CFX_XMLElement*>(pParent); 484 if (pElement->HasAttribute(wsNSAttribute.c_str())) { 485 *wsNamespaceURI = pElement->GetString(wsNSAttribute.c_str()); 486 return true; 487 } 488 } 489 wsNamespaceURI->clear(); 490 return bRet; 491 } 492 493 void XFA_DataExporter_DealWithDataGroupNode(CXFA_Node* pDataNode) { 494 if (!pDataNode || pDataNode->GetElementType() == XFA_Element::DataValue) 495 return; 496 497 int32_t iChildNum = 0; 498 for (CXFA_Node* pChildNode = pDataNode->GetFirstChild(); pChildNode; 499 pChildNode = pChildNode->GetNextSibling()) { 500 iChildNum++; 501 XFA_DataExporter_DealWithDataGroupNode(pChildNode); 502 } 503 504 if (pDataNode->GetElementType() != XFA_Element::DataGroup) 505 return; 506 507 if (iChildNum > 0) { 508 CFX_XMLNode* pXMLNode = pDataNode->GetXMLMappingNode(); 509 ASSERT(pXMLNode->GetType() == FX_XMLNODE_Element); 510 CFX_XMLElement* pXMLElement = static_cast<CFX_XMLElement*>(pXMLNode); 511 if (pXMLElement->HasAttribute(L"xfa:dataNode")) 512 pXMLElement->RemoveAttribute(L"xfa:dataNode"); 513 514 return; 515 } 516 517 CFX_XMLNode* pXMLNode = pDataNode->GetXMLMappingNode(); 518 ASSERT(pXMLNode->GetType() == FX_XMLNODE_Element); 519 static_cast<CFX_XMLElement*>(pXMLNode)->SetString(L"xfa:dataNode", 520 L"dataGroup"); 521 } 522 523 void XFA_DataExporter_RegenerateFormFile( 524 CXFA_Node* pNode, 525 const RetainPtr<CFX_SeekableStreamProxy>& pStream, 526 const char* pChecksum, 527 bool bSaveXML) { 528 if (pNode->IsModelNode()) { 529 pStream->WriteString(L"<form"); 530 if (pChecksum) { 531 WideString wsChecksum = WideString::FromUTF8(pChecksum); 532 pStream->WriteString(L" checksum=\""); 533 pStream->WriteString(wsChecksum.AsStringView()); 534 pStream->WriteString(L"\""); 535 } 536 pStream->WriteString(L" xmlns=\""); 537 pStream->WriteString(WideStringView(kFormNS)); 538 539 WideString wsVersionNumber = RecognizeXFAVersionNumber( 540 ToNode(pNode->GetDocument()->GetXFAObject(XFA_HASHCODE_Template))); 541 if (wsVersionNumber.IsEmpty()) 542 wsVersionNumber = L"2.8"; 543 544 wsVersionNumber += L"/\"\n>"; 545 pStream->WriteString(wsVersionNumber.AsStringView()); 546 547 CXFA_Node* pChildNode = pNode->GetFirstChild(); 548 while (pChildNode) { 549 RegenerateFormFile_Container(pChildNode, pStream, false); 550 pChildNode = pChildNode->GetNextSibling(); 551 } 552 pStream->WriteString(L"</form\n>"); 553 } else { 554 RegenerateFormFile_Container(pNode, pStream, bSaveXML); 555 } 556 } 557 558 bool XFA_FieldIsMultiListBox(CXFA_Node* pFieldNode) { 559 if (!pFieldNode) 560 return false; 561 562 CXFA_Ui* pUIChild = pFieldNode->GetChild<CXFA_Ui>(0, XFA_Element::Ui, false); 563 if (!pUIChild) 564 return false; 565 566 CXFA_Node* pFirstChild = pUIChild->GetFirstChild(); 567 if (!pFirstChild || 568 pFirstChild->GetElementType() != XFA_Element::ChoiceList) { 569 return false; 570 } 571 572 return pFirstChild->JSObject()->GetEnum(XFA_Attribute::Open) == 573 XFA_AttributeEnum::MultiSelect; 574 } 575 576 int32_t XFA_MapRotation(int32_t nRotation) { 577 nRotation = nRotation % 360; 578 nRotation = nRotation < 0 ? nRotation + 360 : nRotation; 579 return nRotation; 580 } 581 582 const XFA_SCRIPTATTRIBUTEINFO* XFA_GetScriptAttributeByName( 583 XFA_Element eElement, 584 const WideStringView& wsAttributeName) { 585 if (wsAttributeName.IsEmpty()) 586 return nullptr; 587 588 int32_t iElementIndex = static_cast<int32_t>(eElement); 589 while (iElementIndex != -1) { 590 const XFA_SCRIPTHIERARCHY* scriptIndex = g_XFAScriptIndex + iElementIndex; 591 int32_t icount = scriptIndex->wAttributeCount; 592 if (icount == 0) { 593 iElementIndex = scriptIndex->wParentIndex; 594 continue; 595 } 596 uint32_t uHash = FX_HashCode_GetW(wsAttributeName, false); 597 int32_t iStart = scriptIndex->wAttributeStart, iEnd = iStart + icount - 1; 598 do { 599 int32_t iMid = (iStart + iEnd) / 2; 600 const XFA_SCRIPTATTRIBUTEINFO* pInfo = g_SomAttributeData + iMid; 601 if (uHash == pInfo->uHash) 602 return pInfo; 603 if (uHash < pInfo->uHash) 604 iEnd = iMid - 1; 605 else 606 iStart = iMid + 1; 607 } while (iStart <= iEnd); 608 iElementIndex = scriptIndex->wParentIndex; 609 } 610 return nullptr; 611 } 612