1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "xfa/fde/xml/fde_xml_imp.h" 8 9 #include <algorithm> 10 #include <utility> 11 12 #include "core/fxcrt/fx_ext.h" 13 #include "core/fxcrt/fx_safe_types.h" 14 #include "third_party/base/stl_util.h" 15 #include "xfa/fgas/crt/fgas_codepage.h" 16 17 namespace { 18 19 const uint32_t kMaxCharRange = 0x10ffff; 20 21 const uint16_t g_XMLValidCharRange[][2] = {{0x09, 0x09}, 22 {0x0A, 0x0A}, 23 {0x0D, 0x0D}, 24 {0x20, 0xD7FF}, 25 {0xE000, 0xFFFD}}; 26 27 bool FDE_IsXMLWhiteSpace(FX_WCHAR ch) { 28 return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09; 29 } 30 31 struct FDE_XMLNAMECHAR { 32 uint16_t wStart; 33 uint16_t wEnd; 34 bool bStartChar; 35 }; 36 37 const FDE_XMLNAMECHAR g_XMLNameChars[] = { 38 {L'-', L'.', false}, {L'0', L'9', false}, {L':', L':', false}, 39 {L'A', L'Z', true}, {L'_', L'_', true}, {L'a', L'z', true}, 40 {0xB7, 0xB7, false}, {0xC0, 0xD6, true}, {0xD8, 0xF6, true}, 41 {0xF8, 0x02FF, true}, {0x0300, 0x036F, false}, {0x0370, 0x037D, true}, 42 {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true}, {0x203F, 0x2040, false}, 43 {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true}, {0x3001, 0xD7FF, true}, 44 {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true}, 45 }; 46 47 bool FDE_IsXMLNameChar(FX_WCHAR ch, bool bFirstChar) { 48 int32_t iStart = 0; 49 int32_t iEnd = FX_ArraySize(g_XMLNameChars) - 1; 50 while (iStart <= iEnd) { 51 int32_t iMid = (iStart + iEnd) / 2; 52 if (ch < g_XMLNameChars[iMid].wStart) { 53 iEnd = iMid - 1; 54 } else if (ch > g_XMLNameChars[iMid].wEnd) { 55 iStart = iMid + 1; 56 } else { 57 return bFirstChar ? g_XMLNameChars[iMid].bStartChar : true; 58 } 59 } 60 return false; 61 } 62 63 } // namespace 64 65 bool FDE_IsXMLValidChar(FX_WCHAR ch) { 66 int32_t iStart = 0; 67 int32_t iEnd = FX_ArraySize(g_XMLValidCharRange) - 1; 68 while (iStart <= iEnd) { 69 int32_t iMid = (iStart + iEnd) / 2; 70 if (ch < g_XMLValidCharRange[iMid][0]) { 71 iEnd = iMid - 1; 72 } else if (ch > g_XMLValidCharRange[iMid][1]) { 73 iStart = iMid + 1; 74 } else { 75 return true; 76 } 77 } 78 return false; 79 } 80 81 CFDE_XMLNode::CFDE_XMLNode() 82 : m_pParent(nullptr), 83 m_pChild(nullptr), 84 m_pPrior(nullptr), 85 m_pNext(nullptr) {} 86 87 FDE_XMLNODETYPE CFDE_XMLNode::GetType() const { 88 return FDE_XMLNODE_Unknown; 89 } 90 91 CFDE_XMLNode::~CFDE_XMLNode() { 92 DeleteChildren(); 93 } 94 95 void CFDE_XMLNode::DeleteChildren() { 96 CFDE_XMLNode* pChild = m_pChild; 97 while (pChild) { 98 CFDE_XMLNode* pNext = pChild->m_pNext; 99 delete pChild; 100 pChild = pNext; 101 } 102 m_pChild = nullptr; 103 } 104 105 int32_t CFDE_XMLNode::CountChildNodes() const { 106 int32_t iCount = 0; 107 CFDE_XMLNode* pChild = m_pChild; 108 while (pChild) { 109 iCount++; 110 pChild = pChild->m_pNext; 111 } 112 return iCount; 113 } 114 115 CFDE_XMLNode* CFDE_XMLNode::GetChildNode(int32_t index) const { 116 CFDE_XMLNode* pChild = m_pChild; 117 while (pChild) { 118 if (index == 0) { 119 return pChild; 120 } 121 index--; 122 pChild = pChild->m_pNext; 123 } 124 return nullptr; 125 } 126 127 int32_t CFDE_XMLNode::GetChildNodeIndex(CFDE_XMLNode* pNode) const { 128 int32_t index = 0; 129 CFDE_XMLNode* pChild = m_pChild; 130 while (pChild) { 131 if (pChild == pNode) { 132 return index; 133 } 134 index++; 135 pChild = pChild->m_pNext; 136 } 137 return -1; 138 } 139 140 CFDE_XMLNode* CFDE_XMLNode::GetPath(const FX_WCHAR* pPath, 141 int32_t iLength, 142 bool bQualifiedName) const { 143 ASSERT(pPath); 144 if (iLength < 0) { 145 iLength = FXSYS_wcslen(pPath); 146 } 147 if (iLength == 0) { 148 return nullptr; 149 } 150 CFX_WideString csPath; 151 const FX_WCHAR* pStart = pPath; 152 const FX_WCHAR* pEnd = pPath + iLength; 153 FX_WCHAR ch; 154 while (pStart < pEnd) { 155 ch = *pStart++; 156 if (ch == L'/') { 157 break; 158 } else { 159 csPath += ch; 160 } 161 } 162 iLength -= pStart - pPath; 163 CFDE_XMLNode* pFind = nullptr; 164 if (csPath.GetLength() < 1) { 165 pFind = GetNodeItem(CFDE_XMLNode::Root); 166 } else if (csPath.Compare(L"..") == 0) { 167 pFind = m_pParent; 168 } else if (csPath.Compare(L".") == 0) { 169 pFind = (CFDE_XMLNode*)this; 170 } else { 171 CFX_WideString wsTag; 172 CFDE_XMLNode* pNode = m_pChild; 173 while (pNode) { 174 if (pNode->GetType() == FDE_XMLNODE_Element) { 175 if (bQualifiedName) { 176 ((CFDE_XMLElement*)pNode)->GetTagName(wsTag); 177 } else { 178 ((CFDE_XMLElement*)pNode)->GetLocalTagName(wsTag); 179 } 180 if (wsTag.Compare(csPath) == 0) { 181 if (iLength < 1) { 182 pFind = pNode; 183 } else { 184 pFind = pNode->GetPath(pStart, iLength, bQualifiedName); 185 } 186 if (pFind) 187 return pFind; 188 } 189 } 190 pNode = pNode->m_pNext; 191 } 192 } 193 if (!pFind || iLength < 1) 194 return pFind; 195 return pFind->GetPath(pStart, iLength, bQualifiedName); 196 } 197 198 int32_t CFDE_XMLNode::InsertChildNode(CFDE_XMLNode* pNode, int32_t index) { 199 pNode->m_pParent = this; 200 if (!m_pChild) { 201 m_pChild = pNode; 202 pNode->m_pPrior = nullptr; 203 pNode->m_pNext = nullptr; 204 return 0; 205 } 206 if (index == 0) { 207 pNode->m_pNext = m_pChild; 208 pNode->m_pPrior = nullptr; 209 m_pChild->m_pPrior = pNode; 210 m_pChild = pNode; 211 return 0; 212 } 213 int32_t iCount = 0; 214 CFDE_XMLNode* pFind = m_pChild; 215 while (++iCount != index && pFind->m_pNext) { 216 pFind = pFind->m_pNext; 217 } 218 pNode->m_pPrior = pFind; 219 pNode->m_pNext = pFind->m_pNext; 220 if (pFind->m_pNext) 221 pFind->m_pNext->m_pPrior = pNode; 222 pFind->m_pNext = pNode; 223 return iCount; 224 } 225 226 void CFDE_XMLNode::RemoveChildNode(CFDE_XMLNode* pNode) { 227 ASSERT(m_pChild && pNode); 228 if (m_pChild == pNode) { 229 m_pChild = pNode->m_pNext; 230 } else { 231 pNode->m_pPrior->m_pNext = pNode->m_pNext; 232 } 233 if (pNode->m_pNext) 234 pNode->m_pNext->m_pPrior = pNode->m_pPrior; 235 pNode->m_pParent = nullptr; 236 pNode->m_pNext = nullptr; 237 pNode->m_pPrior = nullptr; 238 } 239 240 CFDE_XMLNode* CFDE_XMLNode::GetNodeItem(CFDE_XMLNode::NodeItem eItem) const { 241 switch (eItem) { 242 case CFDE_XMLNode::Root: { 243 CFDE_XMLNode* pParent = (CFDE_XMLNode*)this; 244 while (pParent->m_pParent) { 245 pParent = pParent->m_pParent; 246 } 247 return pParent; 248 } 249 case CFDE_XMLNode::Parent: 250 return m_pParent; 251 case CFDE_XMLNode::FirstSibling: { 252 CFDE_XMLNode* pItem = (CFDE_XMLNode*)this; 253 while (pItem->m_pPrior) { 254 pItem = pItem->m_pPrior; 255 } 256 return pItem == (CFDE_XMLNode*)this ? nullptr : pItem; 257 } 258 case CFDE_XMLNode::PriorSibling: 259 return m_pPrior; 260 case CFDE_XMLNode::NextSibling: 261 return m_pNext; 262 case CFDE_XMLNode::LastSibling: { 263 CFDE_XMLNode* pItem = (CFDE_XMLNode*)this; 264 while (pItem->m_pNext) 265 pItem = pItem->m_pNext; 266 return pItem == (CFDE_XMLNode*)this ? nullptr : pItem; 267 } 268 case CFDE_XMLNode::FirstNeighbor: { 269 CFDE_XMLNode* pParent = (CFDE_XMLNode*)this; 270 while (pParent->m_pParent) 271 pParent = pParent->m_pParent; 272 return pParent == (CFDE_XMLNode*)this ? nullptr : pParent; 273 } 274 case CFDE_XMLNode::PriorNeighbor: { 275 if (!m_pPrior) 276 return m_pParent; 277 278 CFDE_XMLNode* pItem = m_pPrior; 279 while (pItem->m_pChild) { 280 pItem = pItem->m_pChild; 281 while (pItem->m_pNext) 282 pItem = pItem->m_pNext; 283 } 284 return pItem; 285 } 286 case CFDE_XMLNode::NextNeighbor: { 287 if (m_pChild) 288 return m_pChild; 289 if (m_pNext) 290 return m_pNext; 291 CFDE_XMLNode* pItem = m_pParent; 292 while (pItem) { 293 if (pItem->m_pNext) 294 return pItem->m_pNext; 295 pItem = pItem->m_pParent; 296 } 297 return nullptr; 298 } 299 case CFDE_XMLNode::LastNeighbor: { 300 CFDE_XMLNode* pItem = (CFDE_XMLNode*)this; 301 while (pItem->m_pParent) { 302 pItem = pItem->m_pParent; 303 } 304 while (true) { 305 while (pItem->m_pNext) 306 pItem = pItem->m_pNext; 307 if (!pItem->m_pChild) 308 break; 309 pItem = pItem->m_pChild; 310 } 311 return pItem == (CFDE_XMLNode*)this ? nullptr : pItem; 312 } 313 case CFDE_XMLNode::FirstChild: 314 return m_pChild; 315 case CFDE_XMLNode::LastChild: { 316 if (!m_pChild) 317 return nullptr; 318 319 CFDE_XMLNode* pChild = m_pChild; 320 while (pChild->m_pNext) 321 pChild = pChild->m_pNext; 322 return pChild; 323 } 324 default: 325 break; 326 } 327 return nullptr; 328 } 329 330 int32_t CFDE_XMLNode::GetNodeLevel() const { 331 int32_t iLevel = 0; 332 const CFDE_XMLNode* pItem = m_pParent; 333 while (pItem) { 334 iLevel++; 335 pItem = pItem->m_pParent; 336 } 337 return iLevel; 338 } 339 340 bool CFDE_XMLNode::InsertNodeItem(CFDE_XMLNode::NodeItem eItem, 341 CFDE_XMLNode* pNode) { 342 switch (eItem) { 343 case CFDE_XMLNode::NextSibling: { 344 pNode->m_pParent = m_pParent; 345 pNode->m_pNext = m_pNext; 346 pNode->m_pPrior = this; 347 if (m_pNext) { 348 m_pNext->m_pPrior = pNode; 349 } 350 m_pNext = pNode; 351 return true; 352 } 353 case CFDE_XMLNode::PriorSibling: { 354 pNode->m_pParent = m_pParent; 355 pNode->m_pNext = this; 356 pNode->m_pPrior = m_pPrior; 357 if (m_pPrior) { 358 m_pPrior->m_pNext = pNode; 359 } else if (m_pParent) { 360 m_pParent->m_pChild = pNode; 361 } 362 m_pPrior = pNode; 363 return true; 364 } 365 default: 366 return false; 367 } 368 } 369 370 CFDE_XMLNode* CFDE_XMLNode::RemoveNodeItem(CFDE_XMLNode::NodeItem eItem) { 371 CFDE_XMLNode* pNode = nullptr; 372 switch (eItem) { 373 case CFDE_XMLNode::NextSibling: 374 if (m_pNext) { 375 pNode = m_pNext; 376 m_pNext = pNode->m_pNext; 377 if (m_pNext) { 378 m_pNext->m_pPrior = this; 379 } 380 pNode->m_pParent = nullptr; 381 pNode->m_pNext = nullptr; 382 pNode->m_pPrior = nullptr; 383 } 384 break; 385 default: 386 break; 387 } 388 return pNode; 389 } 390 391 CFDE_XMLNode* CFDE_XMLNode::Clone(bool bRecursive) { 392 return nullptr; 393 } 394 395 void CFDE_XMLNode::SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream) { 396 CFDE_XMLNode* pNode = (CFDE_XMLNode*)this; 397 switch (pNode->GetType()) { 398 case FDE_XMLNODE_Instruction: { 399 CFX_WideString ws; 400 CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode; 401 if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) { 402 ws = L"<?xml version=\"1.0\" encoding=\""; 403 uint16_t wCodePage = pXMLStream->GetCodePage(); 404 if (wCodePage == FX_CODEPAGE_UTF16LE) { 405 ws += L"UTF-16"; 406 } else if (wCodePage == FX_CODEPAGE_UTF16BE) { 407 ws += L"UTF-16be"; 408 } else { 409 ws += L"UTF-8"; 410 } 411 ws += L"\"?>"; 412 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 413 } else { 414 ws.Format(L"<?%s", pInstruction->m_wsTarget.c_str()); 415 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 416 std::vector<CFX_WideString>& attributes = pInstruction->m_Attributes; 417 int32_t i; 418 int32_t iCount = pdfium::CollectionSize<int32_t>(attributes); 419 CFX_WideString wsValue; 420 for (i = 0; i < iCount; i += 2) { 421 ws = L" "; 422 ws += attributes[i]; 423 ws += L"=\""; 424 wsValue = attributes[i + 1]; 425 wsValue.Replace(L"&", L"&"); 426 wsValue.Replace(L"<", L"<"); 427 wsValue.Replace(L">", L">"); 428 wsValue.Replace(L"\'", L"'"); 429 wsValue.Replace(L"\"", L"""); 430 ws += wsValue; 431 ws += L"\""; 432 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 433 } 434 std::vector<CFX_WideString>& targetdata = pInstruction->m_TargetData; 435 iCount = pdfium::CollectionSize<int32_t>(targetdata); 436 for (i = 0; i < iCount; i++) { 437 ws = L" \""; 438 ws += targetdata[i]; 439 ws += L"\""; 440 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 441 } 442 ws = L"?>"; 443 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 444 } 445 } break; 446 case FDE_XMLNODE_Element: { 447 CFX_WideString ws; 448 ws = L"<"; 449 ws += ((CFDE_XMLElement*)pNode)->m_wsTag; 450 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 451 std::vector<CFX_WideString>& attributes = 452 static_cast<CFDE_XMLElement*>(pNode)->m_Attributes; 453 int32_t iCount = pdfium::CollectionSize<int32_t>(attributes); 454 CFX_WideString wsValue; 455 for (int32_t i = 0; i < iCount; i += 2) { 456 ws = L" "; 457 ws += attributes[i]; 458 ws += L"=\""; 459 wsValue = attributes[i + 1]; 460 wsValue.Replace(L"&", L"&"); 461 wsValue.Replace(L"<", L"<"); 462 wsValue.Replace(L">", L">"); 463 wsValue.Replace(L"\'", L"'"); 464 wsValue.Replace(L"\"", L"""); 465 ws += wsValue; 466 ws += L"\""; 467 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 468 } 469 if (pNode->m_pChild) { 470 ws = L"\n>"; 471 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 472 CFDE_XMLNode* pChild = pNode->m_pChild; 473 while (pChild) { 474 pChild->SaveXMLNode(pXMLStream); 475 pChild = pChild->m_pNext; 476 } 477 ws = L"</"; 478 ws += ((CFDE_XMLElement*)pNode)->m_wsTag; 479 ws += L"\n>"; 480 } else { 481 ws = L"\n/>"; 482 } 483 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 484 } break; 485 case FDE_XMLNODE_Text: { 486 CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText; 487 ws.Replace(L"&", L"&"); 488 ws.Replace(L"<", L"<"); 489 ws.Replace(L">", L">"); 490 ws.Replace(L"\'", L"'"); 491 ws.Replace(L"\"", L"""); 492 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 493 } break; 494 case FDE_XMLNODE_CharData: { 495 CFX_WideString ws = L"<![CDATA["; 496 ws += ((CFDE_XMLCharData*)pNode)->m_wsCharData; 497 ws += L"]]>"; 498 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 499 } break; 500 case FDE_XMLNODE_Unknown: 501 break; 502 default: 503 break; 504 } 505 } 506 507 void CFDE_XMLNode::CloneChildren(CFDE_XMLNode* pClone) { 508 if (!m_pChild) { 509 return; 510 } 511 CFDE_XMLNode* pNext = m_pChild; 512 CFDE_XMLNode* pCloneNext = pNext->Clone(true); 513 pClone->InsertChildNode(pCloneNext); 514 pNext = pNext->m_pNext; 515 while (pNext) { 516 CFDE_XMLNode* pChild = pNext->Clone(true); 517 pCloneNext->InsertNodeItem(CFDE_XMLNode::NextSibling, pChild); 518 pCloneNext = pChild; 519 pNext = pNext->m_pNext; 520 } 521 } 522 523 CFDE_XMLInstruction::CFDE_XMLInstruction(const CFX_WideString& wsTarget) 524 : m_wsTarget(wsTarget) { 525 ASSERT(m_wsTarget.GetLength() > 0); 526 } 527 528 FDE_XMLNODETYPE CFDE_XMLInstruction::GetType() const { 529 return FDE_XMLNODE_Instruction; 530 } 531 532 CFDE_XMLNode* CFDE_XMLInstruction::Clone(bool bRecursive) { 533 CFDE_XMLInstruction* pClone = new CFDE_XMLInstruction(m_wsTarget); 534 if (!pClone) 535 return nullptr; 536 537 pClone->m_Attributes = m_Attributes; 538 pClone->m_TargetData = m_TargetData; 539 if (bRecursive) 540 CloneChildren(pClone); 541 542 return pClone; 543 } 544 545 int32_t CFDE_XMLInstruction::CountAttributes() const { 546 return pdfium::CollectionSize<int32_t>(m_Attributes) / 2; 547 } 548 549 bool CFDE_XMLInstruction::GetAttribute(int32_t index, 550 CFX_WideString& wsAttriName, 551 CFX_WideString& wsAttriValue) const { 552 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); 553 ASSERT(index > -1 && index < iCount / 2); 554 for (int32_t i = 0; i < iCount; i += 2) { 555 if (index == 0) { 556 wsAttriName = m_Attributes[i]; 557 wsAttriValue = m_Attributes[i + 1]; 558 return true; 559 } 560 index--; 561 } 562 return false; 563 } 564 565 bool CFDE_XMLInstruction::HasAttribute(const FX_WCHAR* pwsAttriName) const { 566 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); 567 for (int32_t i = 0; i < iCount; i += 2) { 568 if (m_Attributes[i].Compare(pwsAttriName) == 0) { 569 return true; 570 } 571 } 572 return false; 573 } 574 575 void CFDE_XMLInstruction::GetString(const FX_WCHAR* pwsAttriName, 576 CFX_WideString& wsAttriValue, 577 const FX_WCHAR* pwsDefValue) const { 578 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); 579 for (int32_t i = 0; i < iCount; i += 2) { 580 if (m_Attributes[i].Compare(pwsAttriName) == 0) { 581 wsAttriValue = m_Attributes[i + 1]; 582 return; 583 } 584 } 585 wsAttriValue = pwsDefValue; 586 } 587 588 void CFDE_XMLInstruction::SetString(const CFX_WideString& wsAttriName, 589 const CFX_WideString& wsAttriValue) { 590 ASSERT(wsAttriName.GetLength() > 0); 591 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); 592 for (int32_t i = 0; i < iCount; i += 2) { 593 if (m_Attributes[i].Compare(wsAttriName) == 0) { 594 m_Attributes[i] = wsAttriName; 595 m_Attributes[i + 1] = wsAttriValue; 596 return; 597 } 598 } 599 m_Attributes.push_back(wsAttriName); 600 m_Attributes.push_back(wsAttriValue); 601 } 602 603 int32_t CFDE_XMLInstruction::GetInteger(const FX_WCHAR* pwsAttriName, 604 int32_t iDefValue) const { 605 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); 606 for (int32_t i = 0; i < iCount; i += 2) { 607 if (m_Attributes[i].Compare(pwsAttriName) == 0) { 608 return FXSYS_wtoi(m_Attributes[i + 1].c_str()); 609 } 610 } 611 return iDefValue; 612 } 613 614 void CFDE_XMLInstruction::SetInteger(const FX_WCHAR* pwsAttriName, 615 int32_t iAttriValue) { 616 CFX_WideString wsValue; 617 wsValue.Format(L"%d", iAttriValue); 618 SetString(pwsAttriName, wsValue); 619 } 620 621 FX_FLOAT CFDE_XMLInstruction::GetFloat(const FX_WCHAR* pwsAttriName, 622 FX_FLOAT fDefValue) const { 623 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); 624 for (int32_t i = 0; i < iCount; i += 2) { 625 if (m_Attributes[i].Compare(pwsAttriName) == 0) { 626 return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr); 627 } 628 } 629 return fDefValue; 630 } 631 632 void CFDE_XMLInstruction::SetFloat(const FX_WCHAR* pwsAttriName, 633 FX_FLOAT fAttriValue) { 634 CFX_WideString wsValue; 635 wsValue.Format(L"%f", fAttriValue); 636 SetString(pwsAttriName, wsValue); 637 } 638 639 void CFDE_XMLInstruction::RemoveAttribute(const FX_WCHAR* pwsAttriName) { 640 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); 641 for (int32_t i = 0; i < iCount; i += 2) { 642 if (m_Attributes[i].Compare(pwsAttriName) == 0) { 643 m_Attributes.erase(m_Attributes.begin() + i, 644 m_Attributes.begin() + i + 2); 645 return; 646 } 647 } 648 } 649 650 int32_t CFDE_XMLInstruction::CountData() const { 651 return pdfium::CollectionSize<int32_t>(m_TargetData); 652 } 653 654 bool CFDE_XMLInstruction::GetData(int32_t index, CFX_WideString& wsData) const { 655 if (index < 0 || index >= pdfium::CollectionSize<int32_t>(m_TargetData)) 656 return false; 657 658 wsData = m_TargetData[index]; 659 return true; 660 } 661 662 void CFDE_XMLInstruction::AppendData(const CFX_WideString& wsData) { 663 m_TargetData.push_back(wsData); 664 } 665 666 void CFDE_XMLInstruction::RemoveData(int32_t index) { 667 if (index < 0 || index >= pdfium::CollectionSize<int32_t>(m_TargetData)) 668 return; 669 670 m_TargetData.erase(m_TargetData.begin() + index); 671 } 672 673 CFDE_XMLInstruction::~CFDE_XMLInstruction() {} 674 675 CFDE_XMLElement::CFDE_XMLElement(const CFX_WideString& wsTag) 676 : CFDE_XMLNode(), m_wsTag(wsTag), m_Attributes() { 677 ASSERT(m_wsTag.GetLength() > 0); 678 } 679 680 CFDE_XMLElement::~CFDE_XMLElement() {} 681 682 FDE_XMLNODETYPE CFDE_XMLElement::GetType() const { 683 return FDE_XMLNODE_Element; 684 } 685 686 CFDE_XMLNode* CFDE_XMLElement::Clone(bool bRecursive) { 687 CFDE_XMLElement* pClone = new CFDE_XMLElement(m_wsTag); 688 if (!pClone) 689 return nullptr; 690 691 pClone->m_Attributes = m_Attributes; 692 if (bRecursive) { 693 CloneChildren(pClone); 694 } else { 695 CFX_WideString wsText; 696 CFDE_XMLNode* pChild = m_pChild; 697 while (pChild) { 698 switch (pChild->GetType()) { 699 case FDE_XMLNODE_Text: 700 wsText += ((CFDE_XMLText*)pChild)->m_wsText; 701 break; 702 default: 703 break; 704 } 705 pChild = pChild->m_pNext; 706 } 707 pClone->SetTextData(wsText); 708 } 709 return pClone; 710 } 711 712 void CFDE_XMLElement::GetTagName(CFX_WideString& wsTag) const { 713 wsTag = m_wsTag; 714 } 715 716 void CFDE_XMLElement::GetLocalTagName(CFX_WideString& wsTag) const { 717 FX_STRSIZE iFind = m_wsTag.Find(L':', 0); 718 if (iFind < 0) { 719 wsTag = m_wsTag; 720 } else { 721 wsTag = m_wsTag.Right(m_wsTag.GetLength() - iFind - 1); 722 } 723 } 724 725 void CFDE_XMLElement::GetNamespacePrefix(CFX_WideString& wsPrefix) const { 726 FX_STRSIZE iFind = m_wsTag.Find(L':', 0); 727 if (iFind < 0) { 728 wsPrefix.clear(); 729 } else { 730 wsPrefix = m_wsTag.Left(iFind); 731 } 732 } 733 734 void CFDE_XMLElement::GetNamespaceURI(CFX_WideString& wsNamespace) const { 735 CFX_WideString wsAttri(L"xmlns"), wsPrefix; 736 GetNamespacePrefix(wsPrefix); 737 if (wsPrefix.GetLength() > 0) { 738 wsAttri += L":"; 739 wsAttri += wsPrefix; 740 } 741 wsNamespace.clear(); 742 CFDE_XMLNode* pNode = (CFDE_XMLNode*)this; 743 while (pNode) { 744 if (pNode->GetType() != FDE_XMLNODE_Element) { 745 break; 746 } 747 CFDE_XMLElement* pElement = (CFDE_XMLElement*)pNode; 748 if (!pElement->HasAttribute(wsAttri.c_str())) { 749 pNode = pNode->GetNodeItem(CFDE_XMLNode::Parent); 750 continue; 751 } 752 pElement->GetString(wsAttri.c_str(), wsNamespace); 753 break; 754 } 755 } 756 757 int32_t CFDE_XMLElement::CountAttributes() const { 758 return pdfium::CollectionSize<int32_t>(m_Attributes) / 2; 759 } 760 761 bool CFDE_XMLElement::GetAttribute(int32_t index, 762 CFX_WideString& wsAttriName, 763 CFX_WideString& wsAttriValue) const { 764 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); 765 ASSERT(index > -1 && index < iCount / 2); 766 for (int32_t i = 0; i < iCount; i += 2) { 767 if (index == 0) { 768 wsAttriName = m_Attributes[i]; 769 wsAttriValue = m_Attributes[i + 1]; 770 return true; 771 } 772 index--; 773 } 774 return false; 775 } 776 777 bool CFDE_XMLElement::HasAttribute(const FX_WCHAR* pwsAttriName) const { 778 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); 779 for (int32_t i = 0; i < iCount; i += 2) { 780 if (m_Attributes[i].Compare(pwsAttriName) == 0) 781 return true; 782 } 783 return false; 784 } 785 786 void CFDE_XMLElement::GetString(const FX_WCHAR* pwsAttriName, 787 CFX_WideString& wsAttriValue, 788 const FX_WCHAR* pwsDefValue) const { 789 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); 790 for (int32_t i = 0; i < iCount; i += 2) { 791 if (m_Attributes[i].Compare(pwsAttriName) == 0) { 792 wsAttriValue = m_Attributes[i + 1]; 793 return; 794 } 795 } 796 wsAttriValue = pwsDefValue; 797 } 798 799 void CFDE_XMLElement::SetString(const CFX_WideString& wsAttriName, 800 const CFX_WideString& wsAttriValue) { 801 ASSERT(wsAttriName.GetLength() > 0); 802 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); 803 for (int32_t i = 0; i < iCount; i += 2) { 804 if (m_Attributes[i].Compare(wsAttriName) == 0) { 805 m_Attributes[i] = wsAttriName; 806 m_Attributes[i + 1] = wsAttriValue; 807 return; 808 } 809 } 810 m_Attributes.push_back(wsAttriName); 811 m_Attributes.push_back(wsAttriValue); 812 } 813 814 int32_t CFDE_XMLElement::GetInteger(const FX_WCHAR* pwsAttriName, 815 int32_t iDefValue) const { 816 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); 817 for (int32_t i = 0; i < iCount; i += 2) { 818 if (m_Attributes[i].Compare(pwsAttriName) == 0) { 819 return FXSYS_wtoi(m_Attributes[i + 1].c_str()); 820 } 821 } 822 return iDefValue; 823 } 824 825 void CFDE_XMLElement::SetInteger(const FX_WCHAR* pwsAttriName, 826 int32_t iAttriValue) { 827 CFX_WideString wsValue; 828 wsValue.Format(L"%d", iAttriValue); 829 SetString(pwsAttriName, wsValue); 830 } 831 832 FX_FLOAT CFDE_XMLElement::GetFloat(const FX_WCHAR* pwsAttriName, 833 FX_FLOAT fDefValue) const { 834 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); 835 for (int32_t i = 0; i < iCount; i += 2) { 836 if (m_Attributes[i].Compare(pwsAttriName) == 0) { 837 return FXSYS_wcstof(m_Attributes[i + 1].c_str(), -1, nullptr); 838 } 839 } 840 return fDefValue; 841 } 842 843 void CFDE_XMLElement::SetFloat(const FX_WCHAR* pwsAttriName, 844 FX_FLOAT fAttriValue) { 845 CFX_WideString wsValue; 846 wsValue.Format(L"%f", fAttriValue); 847 SetString(pwsAttriName, wsValue); 848 } 849 850 void CFDE_XMLElement::RemoveAttribute(const FX_WCHAR* pwsAttriName) { 851 int32_t iCount = pdfium::CollectionSize<int32_t>(m_Attributes); 852 for (int32_t i = 0; i < iCount; i += 2) { 853 if (m_Attributes[i].Compare(pwsAttriName) == 0) { 854 m_Attributes.erase(m_Attributes.begin() + i, 855 m_Attributes.begin() + i + 2); 856 return; 857 } 858 } 859 } 860 861 void CFDE_XMLElement::GetTextData(CFX_WideString& wsText) const { 862 CFX_WideTextBuf buffer; 863 CFDE_XMLNode* pChild = m_pChild; 864 while (pChild) { 865 switch (pChild->GetType()) { 866 case FDE_XMLNODE_Text: 867 buffer << ((CFDE_XMLText*)pChild)->m_wsText; 868 break; 869 case FDE_XMLNODE_CharData: 870 buffer << ((CFDE_XMLCharData*)pChild)->m_wsCharData; 871 break; 872 default: 873 break; 874 } 875 pChild = pChild->m_pNext; 876 } 877 wsText = buffer.AsStringC(); 878 } 879 880 void CFDE_XMLElement::SetTextData(const CFX_WideString& wsText) { 881 if (wsText.GetLength() < 1) { 882 return; 883 } 884 InsertChildNode(new CFDE_XMLText(wsText)); 885 } 886 887 CFDE_XMLText::CFDE_XMLText(const CFX_WideString& wsText) 888 : CFDE_XMLNode(), m_wsText(wsText) {} 889 890 FDE_XMLNODETYPE CFDE_XMLText::GetType() const { 891 return FDE_XMLNODE_Text; 892 } 893 894 CFDE_XMLNode* CFDE_XMLText::Clone(bool bRecursive) { 895 CFDE_XMLText* pClone = new CFDE_XMLText(m_wsText); 896 return pClone; 897 } 898 899 CFDE_XMLText::~CFDE_XMLText() {} 900 901 CFDE_XMLCharData::CFDE_XMLCharData(const CFX_WideString& wsCData) 902 : CFDE_XMLDeclaration(), m_wsCharData(wsCData) {} 903 904 FDE_XMLNODETYPE CFDE_XMLCharData::GetType() const { 905 return FDE_XMLNODE_CharData; 906 } 907 908 CFDE_XMLNode* CFDE_XMLCharData::Clone(bool bRecursive) { 909 CFDE_XMLCharData* pClone = new CFDE_XMLCharData(m_wsCharData); 910 return pClone; 911 } 912 913 CFDE_XMLCharData::~CFDE_XMLCharData() {} 914 915 CFDE_XMLDoc::CFDE_XMLDoc() : m_pRoot(nullptr) { 916 Reset(true); 917 CFDE_XMLInstruction* pXML = new CFDE_XMLInstruction(L"xml"); 918 m_pRoot->InsertChildNode(pXML); 919 } 920 921 CFDE_XMLDoc::~CFDE_XMLDoc() { 922 Reset(false); 923 } 924 925 void CFDE_XMLDoc::Reset(bool bInitRoot) { 926 m_iStatus = 0; 927 m_pStream = nullptr; 928 if (bInitRoot) { 929 if (m_pRoot) 930 m_pRoot->DeleteChildren(); 931 else 932 m_pRoot = new CFDE_XMLNode; 933 } else { 934 delete m_pRoot; 935 m_pRoot = nullptr; 936 } 937 ReleaseParser(); 938 } 939 940 void CFDE_XMLDoc::ReleaseParser() { 941 m_pXMLParser.reset(); 942 } 943 944 bool CFDE_XMLDoc::LoadXML(std::unique_ptr<IFDE_XMLParser> pXMLParser) { 945 if (!pXMLParser) 946 return false; 947 948 Reset(true); 949 m_pXMLParser = std::move(pXMLParser); 950 return true; 951 } 952 953 int32_t CFDE_XMLDoc::DoLoad(IFX_Pause* pPause) { 954 if (m_iStatus < 100) 955 m_iStatus = m_pXMLParser->DoParser(pPause); 956 957 return m_iStatus; 958 } 959 960 void CFDE_XMLDoc::CloseXML() { 961 ReleaseParser(); 962 } 963 964 void CFDE_XMLDoc::SaveXMLNode(const CFX_RetainPtr<IFGAS_Stream>& pXMLStream, 965 CFDE_XMLNode* pINode) { 966 CFDE_XMLNode* pNode = (CFDE_XMLNode*)pINode; 967 switch (pNode->GetType()) { 968 case FDE_XMLNODE_Instruction: { 969 CFX_WideString ws; 970 CFDE_XMLInstruction* pInstruction = (CFDE_XMLInstruction*)pNode; 971 if (pInstruction->m_wsTarget.CompareNoCase(L"xml") == 0) { 972 ws = L"<?xml version=\"1.0\" encoding=\""; 973 uint16_t wCodePage = pXMLStream->GetCodePage(); 974 if (wCodePage == FX_CODEPAGE_UTF16LE) { 975 ws += L"UTF-16"; 976 } else if (wCodePage == FX_CODEPAGE_UTF16BE) { 977 ws += L"UTF-16be"; 978 } else { 979 ws += L"UTF-8"; 980 } 981 ws += L"\"?>"; 982 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 983 } else { 984 ws.Format(L"<?%s", pInstruction->m_wsTarget.c_str()); 985 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 986 std::vector<CFX_WideString>& attributes = pInstruction->m_Attributes; 987 int32_t i; 988 int32_t iCount = pdfium::CollectionSize<int32_t>(attributes); 989 CFX_WideString wsValue; 990 for (i = 0; i < iCount; i += 2) { 991 ws = L" "; 992 ws += attributes[i]; 993 ws += L"=\""; 994 wsValue = attributes[i + 1]; 995 wsValue.Replace(L"&", L"&"); 996 wsValue.Replace(L"<", L"<"); 997 wsValue.Replace(L">", L">"); 998 wsValue.Replace(L"\'", L"'"); 999 wsValue.Replace(L"\"", L"""); 1000 ws += wsValue; 1001 ws += L"\""; 1002 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 1003 } 1004 std::vector<CFX_WideString>& targetdata = pInstruction->m_TargetData; 1005 iCount = pdfium::CollectionSize<int32_t>(targetdata); 1006 for (i = 0; i < iCount; i++) { 1007 ws = L" \""; 1008 ws += targetdata[i]; 1009 ws += L"\""; 1010 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 1011 } 1012 ws = L"?>"; 1013 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 1014 } 1015 } break; 1016 case FDE_XMLNODE_Element: { 1017 CFX_WideString ws; 1018 ws = L"<"; 1019 ws += ((CFDE_XMLElement*)pNode)->m_wsTag; 1020 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 1021 std::vector<CFX_WideString>& attributes = 1022 static_cast<CFDE_XMLElement*>(pNode)->m_Attributes; 1023 int32_t iCount = pdfium::CollectionSize<int32_t>(attributes); 1024 CFX_WideString wsValue; 1025 for (int32_t i = 0; i < iCount; i += 2) { 1026 ws = L" "; 1027 ws += attributes[i]; 1028 ws += L"=\""; 1029 wsValue = attributes[i + 1]; 1030 wsValue.Replace(L"&", L"&"); 1031 wsValue.Replace(L"<", L"<"); 1032 wsValue.Replace(L">", L">"); 1033 wsValue.Replace(L"\'", L"'"); 1034 wsValue.Replace(L"\"", L"""); 1035 ws += wsValue; 1036 ws += L"\""; 1037 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 1038 } 1039 if (pNode->m_pChild) { 1040 ws = L"\n>"; 1041 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 1042 CFDE_XMLNode* pChild = pNode->m_pChild; 1043 while (pChild) { 1044 SaveXMLNode(pXMLStream, static_cast<CFDE_XMLNode*>(pChild)); 1045 pChild = pChild->m_pNext; 1046 } 1047 ws = L"</"; 1048 ws += ((CFDE_XMLElement*)pNode)->m_wsTag; 1049 ws += L"\n>"; 1050 } else { 1051 ws = L"\n/>"; 1052 } 1053 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 1054 } break; 1055 case FDE_XMLNODE_Text: { 1056 CFX_WideString ws = ((CFDE_XMLText*)pNode)->m_wsText; 1057 ws.Replace(L"&", L"&"); 1058 ws.Replace(L"<", L"<"); 1059 ws.Replace(L">", L">"); 1060 ws.Replace(L"\'", L"'"); 1061 ws.Replace(L"\"", L"""); 1062 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 1063 } break; 1064 case FDE_XMLNODE_CharData: { 1065 CFX_WideString ws = L"<![CDATA["; 1066 ws += ((CFDE_XMLCharData*)pNode)->m_wsCharData; 1067 ws += L"]]>"; 1068 pXMLStream->WriteString(ws.c_str(), ws.GetLength()); 1069 } break; 1070 case FDE_XMLNODE_Unknown: 1071 break; 1072 default: 1073 break; 1074 } 1075 } 1076 1077 void CFDE_XMLDoc::SaveXML(CFX_RetainPtr<IFGAS_Stream>& pXMLStream, 1078 bool bSaveBOM) { 1079 if (!pXMLStream || pXMLStream == m_pStream) { 1080 m_pStream->Seek(FX_STREAMSEEK_Begin, 0); 1081 pXMLStream = m_pStream; 1082 } 1083 ASSERT((pXMLStream->GetAccessModes() & FX_STREAMACCESS_Text) != 0); 1084 ASSERT((pXMLStream->GetAccessModes() & FX_STREAMACCESS_Write) != 0); 1085 uint16_t wCodePage = pXMLStream->GetCodePage(); 1086 if (wCodePage != FX_CODEPAGE_UTF16LE && wCodePage != FX_CODEPAGE_UTF16BE && 1087 wCodePage != FX_CODEPAGE_UTF8) { 1088 wCodePage = FX_CODEPAGE_UTF8; 1089 pXMLStream->SetCodePage(wCodePage); 1090 } 1091 if (bSaveBOM) { 1092 pXMLStream->WriteString(L"\xFEFF", 1); 1093 } 1094 CFDE_XMLNode* pNode = m_pRoot->m_pChild; 1095 while (pNode) { 1096 SaveXMLNode(pXMLStream, static_cast<CFDE_XMLNode*>(pNode)); 1097 pNode = pNode->m_pNext; 1098 } 1099 if (pXMLStream == m_pStream) { 1100 int32_t iPos = pXMLStream->GetPosition(); 1101 pXMLStream->SetLength(iPos); 1102 } 1103 } 1104 1105 CFDE_BlockBuffer::CFDE_BlockBuffer(int32_t iAllocStep) 1106 : m_iDataLength(0), 1107 m_iBufferSize(0), 1108 m_iAllocStep(iAllocStep), 1109 m_iStartPosition(0) {} 1110 1111 CFDE_BlockBuffer::~CFDE_BlockBuffer() { 1112 ClearBuffer(); 1113 } 1114 1115 FX_WCHAR* CFDE_BlockBuffer::GetAvailableBlock(int32_t& iIndexInBlock) { 1116 iIndexInBlock = 0; 1117 if (!m_BlockArray.GetSize()) { 1118 return nullptr; 1119 } 1120 int32_t iRealIndex = m_iStartPosition + m_iDataLength; 1121 if (iRealIndex == m_iBufferSize) { 1122 FX_WCHAR* pBlock = FX_Alloc(FX_WCHAR, m_iAllocStep); 1123 m_BlockArray.Add(pBlock); 1124 m_iBufferSize += m_iAllocStep; 1125 return pBlock; 1126 } 1127 iIndexInBlock = iRealIndex % m_iAllocStep; 1128 return m_BlockArray[iRealIndex / m_iAllocStep]; 1129 } 1130 1131 bool CFDE_BlockBuffer::InitBuffer(int32_t iBufferSize) { 1132 ClearBuffer(); 1133 int32_t iNumOfBlock = (iBufferSize - 1) / m_iAllocStep + 1; 1134 for (int32_t i = 0; i < iNumOfBlock; i++) { 1135 m_BlockArray.Add(FX_Alloc(FX_WCHAR, m_iAllocStep)); 1136 } 1137 m_iBufferSize = iNumOfBlock * m_iAllocStep; 1138 return true; 1139 } 1140 1141 void CFDE_BlockBuffer::SetTextChar(int32_t iIndex, FX_WCHAR ch) { 1142 if (iIndex < 0) { 1143 return; 1144 } 1145 int32_t iRealIndex = m_iStartPosition + iIndex; 1146 int32_t iBlockIndex = iRealIndex / m_iAllocStep; 1147 int32_t iInnerIndex = iRealIndex % m_iAllocStep; 1148 int32_t iBlockSize = m_BlockArray.GetSize(); 1149 if (iBlockIndex >= iBlockSize) { 1150 int32_t iNewBlocks = iBlockIndex - iBlockSize + 1; 1151 do { 1152 FX_WCHAR* pBlock = FX_Alloc(FX_WCHAR, m_iAllocStep); 1153 m_BlockArray.Add(pBlock); 1154 m_iBufferSize += m_iAllocStep; 1155 } while (--iNewBlocks); 1156 } 1157 FX_WCHAR* pTextData = m_BlockArray[iBlockIndex]; 1158 *(pTextData + iInnerIndex) = ch; 1159 if (m_iDataLength <= iIndex) { 1160 m_iDataLength = iIndex + 1; 1161 } 1162 } 1163 1164 int32_t CFDE_BlockBuffer::DeleteTextChars(int32_t iCount, bool bDirection) { 1165 if (iCount <= 0) { 1166 return m_iDataLength; 1167 } 1168 if (iCount >= m_iDataLength) { 1169 Reset(false); 1170 return 0; 1171 } 1172 if (bDirection) { 1173 m_iStartPosition += iCount; 1174 m_iDataLength -= iCount; 1175 } else { 1176 m_iDataLength -= iCount; 1177 } 1178 return m_iDataLength; 1179 } 1180 1181 void CFDE_BlockBuffer::GetTextData(CFX_WideString& wsTextData, 1182 int32_t iStart, 1183 int32_t iLength) const { 1184 wsTextData.clear(); 1185 int32_t iMaybeDataLength = m_iBufferSize - 1 - m_iStartPosition; 1186 if (iStart < 0 || iStart > iMaybeDataLength) { 1187 return; 1188 } 1189 if (iLength == -1 || iLength > iMaybeDataLength) { 1190 iLength = iMaybeDataLength; 1191 } 1192 if (iLength <= 0) { 1193 return; 1194 } 1195 FX_WCHAR* pBuf = wsTextData.GetBuffer(iLength); 1196 if (!pBuf) { 1197 return; 1198 } 1199 int32_t iStartBlockIndex = 0; 1200 int32_t iStartInnerIndex = 0; 1201 TextDataIndex2BufIndex(iStart, iStartBlockIndex, iStartInnerIndex); 1202 int32_t iEndBlockIndex = 0; 1203 int32_t iEndInnerIndex = 0; 1204 TextDataIndex2BufIndex(iStart + iLength, iEndBlockIndex, iEndInnerIndex); 1205 int32_t iPointer = 0; 1206 for (int32_t i = iStartBlockIndex; i <= iEndBlockIndex; i++) { 1207 int32_t iBufferPointer = 0; 1208 int32_t iCopyLength = m_iAllocStep; 1209 if (i == iStartBlockIndex) { 1210 iCopyLength -= iStartInnerIndex; 1211 iBufferPointer = iStartInnerIndex; 1212 } 1213 if (i == iEndBlockIndex) { 1214 iCopyLength -= ((m_iAllocStep - 1) - iEndInnerIndex); 1215 } 1216 FX_WCHAR* pBlockBuf = m_BlockArray[i]; 1217 FXSYS_memcpy(pBuf + iPointer, pBlockBuf + iBufferPointer, 1218 iCopyLength * sizeof(FX_WCHAR)); 1219 iPointer += iCopyLength; 1220 } 1221 wsTextData.ReleaseBuffer(iLength); 1222 } 1223 1224 void CFDE_BlockBuffer::TextDataIndex2BufIndex(const int32_t iIndex, 1225 int32_t& iBlockIndex, 1226 int32_t& iInnerIndex) const { 1227 ASSERT(iIndex >= 0); 1228 int32_t iRealIndex = m_iStartPosition + iIndex; 1229 iBlockIndex = iRealIndex / m_iAllocStep; 1230 iInnerIndex = iRealIndex % m_iAllocStep; 1231 } 1232 1233 void CFDE_BlockBuffer::ClearBuffer() { 1234 m_iBufferSize = 0; 1235 int32_t iSize = m_BlockArray.GetSize(); 1236 for (int32_t i = 0; i < iSize; i++) { 1237 FX_Free(m_BlockArray[i]); 1238 } 1239 m_BlockArray.RemoveAll(); 1240 } 1241 1242 CFDE_XMLSyntaxParser::CFDE_XMLSyntaxParser() 1243 : m_pStream(nullptr), 1244 m_iXMLPlaneSize(-1), 1245 m_iCurrentPos(0), 1246 m_iCurrentNodeNum(-1), 1247 m_iLastNodeNum(-1), 1248 m_iParsedChars(0), 1249 m_iParsedBytes(0), 1250 m_pBuffer(nullptr), 1251 m_iBufferChars(0), 1252 m_bEOS(false), 1253 m_pStart(nullptr), 1254 m_pEnd(nullptr), 1255 m_XMLNodeStack(16), 1256 m_iAllocStep(m_BlockBuffer.GetAllocStep()), 1257 m_iDataLength(m_BlockBuffer.GetDataLengthRef()), 1258 m_pCurrentBlock(nullptr), 1259 m_iIndexInBlock(0), 1260 m_iTextDataLength(0), 1261 m_syntaxParserResult(FDE_XmlSyntaxResult::None), 1262 m_syntaxParserState(FDE_XmlSyntaxState::Text), 1263 m_wQuotationMark(0), 1264 m_iEntityStart(-1), 1265 m_SkipStack(16) { 1266 m_CurNode.iNodeNum = -1; 1267 m_CurNode.eNodeType = FDE_XMLNODE_Unknown; 1268 } 1269 1270 void CFDE_XMLSyntaxParser::Init(const CFX_RetainPtr<IFGAS_Stream>& pStream, 1271 int32_t iXMLPlaneSize, 1272 int32_t iTextDataSize) { 1273 ASSERT(!m_pStream && !m_pBuffer); 1274 ASSERT(pStream && iXMLPlaneSize > 0); 1275 int32_t iStreamLength = pStream->GetLength(); 1276 ASSERT(iStreamLength > 0); 1277 m_pStream = pStream; 1278 m_iXMLPlaneSize = std::min(iXMLPlaneSize, iStreamLength); 1279 uint8_t bom[4]; 1280 m_iCurrentPos = m_pStream->GetBOM(bom); 1281 ASSERT(!m_pBuffer); 1282 1283 FX_SAFE_INT32 alloc_size_safe = m_iXMLPlaneSize; 1284 alloc_size_safe += 1; // For NUL. 1285 if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) { 1286 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 1287 return; 1288 } 1289 1290 m_pBuffer = FX_Alloc( 1291 FX_WCHAR, pdfium::base::ValueOrDieForType<size_t>(alloc_size_safe)); 1292 m_pStart = m_pEnd = m_pBuffer; 1293 ASSERT(!m_BlockBuffer.IsInitialized()); 1294 m_BlockBuffer.InitBuffer(); 1295 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1296 m_iParsedBytes = m_iParsedChars = 0; 1297 m_iBufferChars = 0; 1298 } 1299 1300 FDE_XmlSyntaxResult CFDE_XMLSyntaxParser::DoSyntaxParse() { 1301 if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error || 1302 m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) { 1303 return m_syntaxParserResult; 1304 } 1305 ASSERT(m_pStream && m_pBuffer && m_BlockBuffer.IsInitialized()); 1306 int32_t iStreamLength = m_pStream->GetLength(); 1307 int32_t iPos; 1308 1309 FDE_XmlSyntaxResult syntaxParserResult = FDE_XmlSyntaxResult::None; 1310 while (true) { 1311 if (m_pStart >= m_pEnd) { 1312 if (m_bEOS || m_iCurrentPos >= iStreamLength) { 1313 m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString; 1314 return m_syntaxParserResult; 1315 } 1316 m_iParsedChars += (m_pEnd - m_pBuffer); 1317 m_iParsedBytes = m_iCurrentPos; 1318 if (m_pStream->GetPosition() != m_iCurrentPos) { 1319 m_pStream->Seek(FX_STREAMSEEK_Begin, m_iCurrentPos); 1320 } 1321 m_iBufferChars = 1322 m_pStream->ReadString(m_pBuffer, m_iXMLPlaneSize, m_bEOS); 1323 iPos = m_pStream->GetPosition(); 1324 if (m_iBufferChars < 1) { 1325 m_iCurrentPos = iStreamLength; 1326 m_syntaxParserResult = FDE_XmlSyntaxResult::EndOfString; 1327 return m_syntaxParserResult; 1328 } 1329 m_iCurrentPos = iPos; 1330 m_pStart = m_pBuffer; 1331 m_pEnd = m_pBuffer + m_iBufferChars; 1332 } 1333 1334 while (m_pStart < m_pEnd) { 1335 FX_WCHAR ch = *m_pStart; 1336 switch (m_syntaxParserState) { 1337 case FDE_XmlSyntaxState::Text: 1338 if (ch == L'<') { 1339 if (m_iDataLength > 0) { 1340 m_iTextDataLength = m_iDataLength; 1341 m_BlockBuffer.Reset(); 1342 m_pCurrentBlock = 1343 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1344 m_iEntityStart = -1; 1345 syntaxParserResult = FDE_XmlSyntaxResult::Text; 1346 } else { 1347 m_pStart++; 1348 m_syntaxParserState = FDE_XmlSyntaxState::Node; 1349 } 1350 } else { 1351 ParseTextChar(ch); 1352 } 1353 break; 1354 case FDE_XmlSyntaxState::Node: 1355 if (ch == L'!') { 1356 m_pStart++; 1357 m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl; 1358 } else if (ch == L'/') { 1359 m_pStart++; 1360 m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; 1361 } else if (ch == L'?') { 1362 m_iLastNodeNum++; 1363 m_iCurrentNodeNum = m_iLastNodeNum; 1364 m_CurNode.iNodeNum = m_iLastNodeNum; 1365 m_CurNode.eNodeType = FDE_XMLNODE_Instruction; 1366 m_XMLNodeStack.Push(m_CurNode); 1367 m_pStart++; 1368 m_syntaxParserState = FDE_XmlSyntaxState::Target; 1369 syntaxParserResult = FDE_XmlSyntaxResult::InstructionOpen; 1370 } else { 1371 m_iLastNodeNum++; 1372 m_iCurrentNodeNum = m_iLastNodeNum; 1373 m_CurNode.iNodeNum = m_iLastNodeNum; 1374 m_CurNode.eNodeType = FDE_XMLNODE_Element; 1375 m_XMLNodeStack.Push(m_CurNode); 1376 m_syntaxParserState = FDE_XmlSyntaxState::Tag; 1377 syntaxParserResult = FDE_XmlSyntaxResult::ElementOpen; 1378 } 1379 break; 1380 case FDE_XmlSyntaxState::Target: 1381 case FDE_XmlSyntaxState::Tag: 1382 if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) { 1383 if (m_iDataLength < 1) { 1384 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 1385 return m_syntaxParserResult; 1386 } else { 1387 m_iTextDataLength = m_iDataLength; 1388 m_BlockBuffer.Reset(); 1389 m_pCurrentBlock = 1390 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1391 if (m_syntaxParserState != FDE_XmlSyntaxState::Target) { 1392 syntaxParserResult = FDE_XmlSyntaxResult::TagName; 1393 } else { 1394 syntaxParserResult = FDE_XmlSyntaxResult::TargetName; 1395 } 1396 m_syntaxParserState = FDE_XmlSyntaxState::AttriName; 1397 } 1398 } else { 1399 if (m_iIndexInBlock == m_iAllocStep) { 1400 m_pCurrentBlock = 1401 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1402 if (!m_pCurrentBlock) { 1403 return FDE_XmlSyntaxResult::Error; 1404 } 1405 } 1406 m_pCurrentBlock[m_iIndexInBlock++] = ch; 1407 m_iDataLength++; 1408 m_pStart++; 1409 } 1410 break; 1411 case FDE_XmlSyntaxState::AttriName: 1412 if (m_iDataLength < 1 && FDE_IsXMLWhiteSpace(ch)) { 1413 m_pStart++; 1414 break; 1415 } 1416 if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) { 1417 if (m_iDataLength < 1) { 1418 if (m_CurNode.eNodeType == FDE_XMLNODE_Element) { 1419 if (ch == L'>' || ch == L'/') { 1420 m_syntaxParserState = FDE_XmlSyntaxState::BreakElement; 1421 break; 1422 } 1423 } else if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { 1424 if (ch == L'?') { 1425 m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; 1426 m_pStart++; 1427 } else { 1428 m_syntaxParserState = FDE_XmlSyntaxState::TargetData; 1429 } 1430 break; 1431 } 1432 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 1433 return m_syntaxParserResult; 1434 } else { 1435 if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { 1436 if (ch != '=' && !FDE_IsXMLWhiteSpace(ch)) { 1437 m_syntaxParserState = FDE_XmlSyntaxState::TargetData; 1438 break; 1439 } 1440 } 1441 m_iTextDataLength = m_iDataLength; 1442 m_BlockBuffer.Reset(); 1443 m_pCurrentBlock = 1444 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1445 m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign; 1446 syntaxParserResult = FDE_XmlSyntaxResult::AttriName; 1447 } 1448 } else { 1449 if (m_iIndexInBlock == m_iAllocStep) { 1450 m_pCurrentBlock = 1451 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1452 if (!m_pCurrentBlock) { 1453 return FDE_XmlSyntaxResult::Error; 1454 } 1455 } 1456 m_pCurrentBlock[m_iIndexInBlock++] = ch; 1457 m_iDataLength++; 1458 m_pStart++; 1459 } 1460 break; 1461 case FDE_XmlSyntaxState::AttriEqualSign: 1462 if (FDE_IsXMLWhiteSpace(ch)) { 1463 m_pStart++; 1464 break; 1465 } 1466 if (ch != L'=') { 1467 if (m_CurNode.eNodeType == FDE_XMLNODE_Instruction) { 1468 m_syntaxParserState = FDE_XmlSyntaxState::TargetData; 1469 break; 1470 } 1471 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 1472 return m_syntaxParserResult; 1473 } else { 1474 m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation; 1475 m_pStart++; 1476 } 1477 break; 1478 case FDE_XmlSyntaxState::AttriQuotation: 1479 if (FDE_IsXMLWhiteSpace(ch)) { 1480 m_pStart++; 1481 break; 1482 } 1483 if (ch != L'\"' && ch != L'\'') { 1484 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 1485 return m_syntaxParserResult; 1486 } else { 1487 m_wQuotationMark = ch; 1488 m_syntaxParserState = FDE_XmlSyntaxState::AttriValue; 1489 m_pStart++; 1490 } 1491 break; 1492 case FDE_XmlSyntaxState::AttriValue: 1493 if (ch == m_wQuotationMark) { 1494 if (m_iEntityStart > -1) { 1495 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 1496 return m_syntaxParserResult; 1497 } 1498 m_iTextDataLength = m_iDataLength; 1499 m_wQuotationMark = 0; 1500 m_BlockBuffer.Reset(); 1501 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1502 m_pStart++; 1503 m_syntaxParserState = FDE_XmlSyntaxState::AttriName; 1504 syntaxParserResult = FDE_XmlSyntaxResult::AttriValue; 1505 } else { 1506 ParseTextChar(ch); 1507 } 1508 break; 1509 case FDE_XmlSyntaxState::CloseInstruction: 1510 if (ch != L'>') { 1511 if (m_iIndexInBlock == m_iAllocStep) { 1512 m_pCurrentBlock = 1513 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1514 if (!m_pCurrentBlock) { 1515 return FDE_XmlSyntaxResult::Error; 1516 } 1517 } 1518 m_pCurrentBlock[m_iIndexInBlock++] = ch; 1519 m_iDataLength++; 1520 m_syntaxParserState = FDE_XmlSyntaxState::TargetData; 1521 } else if (m_iDataLength > 0) { 1522 m_iTextDataLength = m_iDataLength; 1523 m_BlockBuffer.Reset(); 1524 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1525 syntaxParserResult = FDE_XmlSyntaxResult::TargetData; 1526 } else { 1527 m_pStart++; 1528 FDE_XMLNODE* pXMLNode = m_XMLNodeStack.GetTopElement(); 1529 if (!pXMLNode) { 1530 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 1531 return m_syntaxParserResult; 1532 } 1533 m_XMLNodeStack.Pop(); 1534 pXMLNode = m_XMLNodeStack.GetTopElement(); 1535 if (pXMLNode) { 1536 m_CurNode = *pXMLNode; 1537 } else { 1538 m_CurNode.iNodeNum = -1; 1539 m_CurNode.eNodeType = FDE_XMLNODE_Unknown; 1540 } 1541 m_iCurrentNodeNum = m_CurNode.iNodeNum; 1542 m_BlockBuffer.Reset(); 1543 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1544 m_syntaxParserState = FDE_XmlSyntaxState::Text; 1545 syntaxParserResult = FDE_XmlSyntaxResult::InstructionClose; 1546 } 1547 break; 1548 case FDE_XmlSyntaxState::BreakElement: 1549 if (ch == L'>') { 1550 m_syntaxParserState = FDE_XmlSyntaxState::Text; 1551 syntaxParserResult = FDE_XmlSyntaxResult::ElementBreak; 1552 } else if (ch == L'/') { 1553 m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; 1554 } else { 1555 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 1556 return m_syntaxParserResult; 1557 } 1558 m_pStart++; 1559 break; 1560 case FDE_XmlSyntaxState::CloseElement: 1561 if (!FDE_IsXMLNameChar(ch, m_iDataLength < 1)) { 1562 if (ch == L'>') { 1563 FDE_XMLNODE* pXMLNode = m_XMLNodeStack.GetTopElement(); 1564 if (!pXMLNode) { 1565 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 1566 return m_syntaxParserResult; 1567 } 1568 m_XMLNodeStack.Pop(); 1569 pXMLNode = m_XMLNodeStack.GetTopElement(); 1570 if (pXMLNode) { 1571 m_CurNode = *pXMLNode; 1572 } else { 1573 m_CurNode.iNodeNum = -1; 1574 m_CurNode.eNodeType = FDE_XMLNODE_Unknown; 1575 } 1576 m_iCurrentNodeNum = m_CurNode.iNodeNum; 1577 m_iTextDataLength = m_iDataLength; 1578 m_BlockBuffer.Reset(); 1579 m_pCurrentBlock = 1580 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1581 m_syntaxParserState = FDE_XmlSyntaxState::Text; 1582 syntaxParserResult = FDE_XmlSyntaxResult::ElementClose; 1583 } else if (!FDE_IsXMLWhiteSpace(ch)) { 1584 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 1585 return m_syntaxParserResult; 1586 } 1587 } else { 1588 if (m_iIndexInBlock == m_iAllocStep) { 1589 m_pCurrentBlock = 1590 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1591 if (!m_pCurrentBlock) { 1592 return FDE_XmlSyntaxResult::Error; 1593 } 1594 } 1595 m_pCurrentBlock[m_iIndexInBlock++] = ch; 1596 m_iDataLength++; 1597 } 1598 m_pStart++; 1599 break; 1600 case FDE_XmlSyntaxState::SkipCommentOrDecl: 1601 if (FXSYS_wcsnicmp(m_pStart, L"--", 2) == 0) { 1602 m_pStart += 2; 1603 m_syntaxParserState = FDE_XmlSyntaxState::SkipComment; 1604 } else if (FXSYS_wcsnicmp(m_pStart, L"[CDATA[", 7) == 0) { 1605 m_pStart += 7; 1606 m_syntaxParserState = FDE_XmlSyntaxState::SkipCData; 1607 } else { 1608 m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode; 1609 m_SkipChar = L'>'; 1610 m_SkipStack.Push(L'>'); 1611 } 1612 break; 1613 case FDE_XmlSyntaxState::SkipCData: { 1614 if (FXSYS_wcsnicmp(m_pStart, L"]]>", 3) == 0) { 1615 m_pStart += 3; 1616 syntaxParserResult = FDE_XmlSyntaxResult::CData; 1617 m_iTextDataLength = m_iDataLength; 1618 m_BlockBuffer.Reset(); 1619 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1620 m_syntaxParserState = FDE_XmlSyntaxState::Text; 1621 } else { 1622 if (m_iIndexInBlock == m_iAllocStep) { 1623 m_pCurrentBlock = 1624 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1625 if (!m_pCurrentBlock) 1626 return FDE_XmlSyntaxResult::Error; 1627 } 1628 m_pCurrentBlock[m_iIndexInBlock++] = ch; 1629 m_iDataLength++; 1630 m_pStart++; 1631 } 1632 break; 1633 } 1634 case FDE_XmlSyntaxState::SkipDeclNode: 1635 if (m_SkipChar == L'\'' || m_SkipChar == L'\"') { 1636 m_pStart++; 1637 if (ch != m_SkipChar) 1638 break; 1639 1640 m_SkipStack.Pop(); 1641 uint32_t* pDWord = m_SkipStack.GetTopElement(); 1642 if (!pDWord) 1643 m_syntaxParserState = FDE_XmlSyntaxState::Text; 1644 else 1645 m_SkipChar = (FX_WCHAR)*pDWord; 1646 } else { 1647 switch (ch) { 1648 case L'<': 1649 m_SkipChar = L'>'; 1650 m_SkipStack.Push(L'>'); 1651 break; 1652 case L'[': 1653 m_SkipChar = L']'; 1654 m_SkipStack.Push(L']'); 1655 break; 1656 case L'(': 1657 m_SkipChar = L')'; 1658 m_SkipStack.Push(L')'); 1659 break; 1660 case L'\'': 1661 m_SkipChar = L'\''; 1662 m_SkipStack.Push(L'\''); 1663 break; 1664 case L'\"': 1665 m_SkipChar = L'\"'; 1666 m_SkipStack.Push(L'\"'); 1667 break; 1668 default: 1669 if (ch == m_SkipChar) { 1670 m_SkipStack.Pop(); 1671 uint32_t* pDWord = m_SkipStack.GetTopElement(); 1672 if (!pDWord) { 1673 if (m_iDataLength >= 9) { 1674 CFX_WideString wsHeader; 1675 m_BlockBuffer.GetTextData(wsHeader, 0, 7); 1676 } 1677 m_iTextDataLength = m_iDataLength; 1678 m_BlockBuffer.Reset(); 1679 m_pCurrentBlock = 1680 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1681 m_syntaxParserState = FDE_XmlSyntaxState::Text; 1682 } else { 1683 m_SkipChar = static_cast<FX_WCHAR>(*pDWord); 1684 } 1685 } 1686 break; 1687 } 1688 if (m_SkipStack.GetSize() > 0) { 1689 if (m_iIndexInBlock == m_iAllocStep) { 1690 m_pCurrentBlock = 1691 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1692 if (!m_pCurrentBlock) { 1693 return FDE_XmlSyntaxResult::Error; 1694 } 1695 } 1696 m_pCurrentBlock[m_iIndexInBlock++] = ch; 1697 m_iDataLength++; 1698 } 1699 m_pStart++; 1700 } 1701 break; 1702 case FDE_XmlSyntaxState::SkipComment: 1703 if (FXSYS_wcsnicmp(m_pStart, L"-->", 3) == 0) { 1704 m_pStart += 2; 1705 m_syntaxParserState = FDE_XmlSyntaxState::Text; 1706 } 1707 1708 m_pStart++; 1709 break; 1710 case FDE_XmlSyntaxState::TargetData: 1711 if (FDE_IsXMLWhiteSpace(ch)) { 1712 if (m_iDataLength < 1) { 1713 m_pStart++; 1714 break; 1715 } else if (m_wQuotationMark == 0) { 1716 m_iTextDataLength = m_iDataLength; 1717 m_wQuotationMark = 0; 1718 m_BlockBuffer.Reset(); 1719 m_pCurrentBlock = 1720 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1721 m_pStart++; 1722 syntaxParserResult = FDE_XmlSyntaxResult::TargetData; 1723 break; 1724 } 1725 } 1726 if (ch == '?') { 1727 m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; 1728 m_pStart++; 1729 } else if (ch == '\"') { 1730 if (m_wQuotationMark == 0) { 1731 m_wQuotationMark = ch; 1732 m_pStart++; 1733 } else if (ch == m_wQuotationMark) { 1734 m_iTextDataLength = m_iDataLength; 1735 m_wQuotationMark = 0; 1736 m_BlockBuffer.Reset(); 1737 m_pCurrentBlock = 1738 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1739 m_pStart++; 1740 syntaxParserResult = FDE_XmlSyntaxResult::TargetData; 1741 } else { 1742 m_syntaxParserResult = FDE_XmlSyntaxResult::Error; 1743 return m_syntaxParserResult; 1744 } 1745 } else { 1746 if (m_iIndexInBlock == m_iAllocStep) { 1747 m_pCurrentBlock = 1748 m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1749 if (!m_pCurrentBlock) { 1750 return FDE_XmlSyntaxResult::Error; 1751 } 1752 } 1753 m_pCurrentBlock[m_iIndexInBlock++] = ch; 1754 m_iDataLength++; 1755 m_pStart++; 1756 } 1757 break; 1758 default: 1759 break; 1760 } 1761 if (syntaxParserResult != FDE_XmlSyntaxResult::None) 1762 return syntaxParserResult; 1763 } 1764 } 1765 return FDE_XmlSyntaxResult::Text; 1766 } 1767 1768 CFDE_XMLSyntaxParser::~CFDE_XMLSyntaxParser() { 1769 m_pCurrentBlock = nullptr; 1770 FX_Free(m_pBuffer); 1771 } 1772 1773 int32_t CFDE_XMLSyntaxParser::GetStatus() const { 1774 if (!m_pStream) 1775 return -1; 1776 1777 int32_t iStreamLength = m_pStream->GetLength(); 1778 if (iStreamLength < 1) 1779 return 100; 1780 1781 if (m_syntaxParserResult == FDE_XmlSyntaxResult::Error) 1782 return -1; 1783 1784 if (m_syntaxParserResult == FDE_XmlSyntaxResult::EndOfString) 1785 return 100; 1786 return m_iParsedBytes * 100 / iStreamLength; 1787 } 1788 1789 static int32_t FX_GetUTF8EncodeLength(const FX_WCHAR* pSrc, int32_t iSrcLen) { 1790 uint32_t unicode = 0; 1791 int32_t iDstNum = 0; 1792 while (iSrcLen-- > 0) { 1793 unicode = *pSrc++; 1794 int nbytes = 0; 1795 if ((uint32_t)unicode < 0x80) { 1796 nbytes = 1; 1797 } else if ((uint32_t)unicode < 0x800) { 1798 nbytes = 2; 1799 } else if ((uint32_t)unicode < 0x10000) { 1800 nbytes = 3; 1801 } else if ((uint32_t)unicode < 0x200000) { 1802 nbytes = 4; 1803 } else if ((uint32_t)unicode < 0x4000000) { 1804 nbytes = 5; 1805 } else { 1806 nbytes = 6; 1807 } 1808 iDstNum += nbytes; 1809 } 1810 return iDstNum; 1811 } 1812 1813 FX_FILESIZE CFDE_XMLSyntaxParser::GetCurrentBinaryPos() const { 1814 if (!m_pStream) 1815 return 0; 1816 1817 int32_t nSrcLen = m_pStart - m_pBuffer; 1818 int32_t nDstLen = FX_GetUTF8EncodeLength(m_pBuffer, nSrcLen); 1819 return m_iParsedBytes + nDstLen; 1820 } 1821 1822 void CFDE_XMLSyntaxParser::ParseTextChar(FX_WCHAR character) { 1823 if (m_iIndexInBlock == m_iAllocStep) { 1824 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1825 if (!m_pCurrentBlock) { 1826 return; 1827 } 1828 } 1829 m_pCurrentBlock[m_iIndexInBlock++] = character; 1830 m_iDataLength++; 1831 if (m_iEntityStart > -1 && character == L';') { 1832 CFX_WideString csEntity; 1833 m_BlockBuffer.GetTextData(csEntity, m_iEntityStart + 1, 1834 (m_iDataLength - 1) - m_iEntityStart - 1); 1835 int32_t iLen = csEntity.GetLength(); 1836 if (iLen > 0) { 1837 if (csEntity[0] == L'#') { 1838 uint32_t ch = 0; 1839 FX_WCHAR w; 1840 if (iLen > 1 && csEntity[1] == L'x') { 1841 for (int32_t i = 2; i < iLen; i++) { 1842 w = csEntity[i]; 1843 if (w >= L'0' && w <= L'9') { 1844 ch = (ch << 4) + w - L'0'; 1845 } else if (w >= L'A' && w <= L'F') { 1846 ch = (ch << 4) + w - 55; 1847 } else if (w >= L'a' && w <= L'f') { 1848 ch = (ch << 4) + w - 87; 1849 } else { 1850 break; 1851 } 1852 } 1853 } else { 1854 for (int32_t i = 1; i < iLen; i++) { 1855 w = csEntity[i]; 1856 if (w < L'0' || w > L'9') 1857 break; 1858 ch = ch * 10 + w - L'0'; 1859 } 1860 } 1861 if (ch > kMaxCharRange) 1862 ch = ' '; 1863 1864 character = static_cast<FX_WCHAR>(ch); 1865 if (character != 0) { 1866 m_BlockBuffer.SetTextChar(m_iEntityStart, character); 1867 m_iEntityStart++; 1868 } 1869 } else { 1870 if (csEntity.Compare(L"amp") == 0) { 1871 m_BlockBuffer.SetTextChar(m_iEntityStart, L'&'); 1872 m_iEntityStart++; 1873 } else if (csEntity.Compare(L"lt") == 0) { 1874 m_BlockBuffer.SetTextChar(m_iEntityStart, L'<'); 1875 m_iEntityStart++; 1876 } else if (csEntity.Compare(L"gt") == 0) { 1877 m_BlockBuffer.SetTextChar(m_iEntityStart, L'>'); 1878 m_iEntityStart++; 1879 } else if (csEntity.Compare(L"apos") == 0) { 1880 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\''); 1881 m_iEntityStart++; 1882 } else if (csEntity.Compare(L"quot") == 0) { 1883 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"'); 1884 m_iEntityStart++; 1885 } 1886 } 1887 } 1888 m_BlockBuffer.DeleteTextChars(m_iDataLength - m_iEntityStart, false); 1889 m_pCurrentBlock = m_BlockBuffer.GetAvailableBlock(m_iIndexInBlock); 1890 m_iEntityStart = -1; 1891 } else { 1892 if (m_iEntityStart < 0 && character == L'&') { 1893 m_iEntityStart = m_iDataLength - 1; 1894 } 1895 } 1896 m_pStart++; 1897 } 1898