1 // Copyright 2017 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fxcrt/xml/cfx_xmlsyntaxparser.h" 8 9 #include <algorithm> 10 #include <cwctype> 11 #include <iterator> 12 13 #include "core/fxcrt/fx_extension.h" 14 #include "core/fxcrt/fx_safe_types.h" 15 16 namespace { 17 18 const uint32_t kMaxCharRange = 0x10ffff; 19 20 bool IsXMLWhiteSpace(wchar_t ch) { 21 return ch == L' ' || ch == 0x0A || ch == 0x0D || ch == 0x09; 22 } 23 24 struct FX_XMLNAMECHAR { 25 uint16_t wStart; 26 uint16_t wEnd; 27 bool bStartChar; 28 }; 29 30 const FX_XMLNAMECHAR g_XMLNameChars[] = { 31 {L'-', L'.', false}, {L'0', L'9', false}, {L':', L':', false}, 32 {L'A', L'Z', true}, {L'_', L'_', true}, {L'a', L'z', true}, 33 {0xB7, 0xB7, false}, {0xC0, 0xD6, true}, {0xD8, 0xF6, true}, 34 {0xF8, 0x02FF, true}, {0x0300, 0x036F, false}, {0x0370, 0x037D, true}, 35 {0x037F, 0x1FFF, true}, {0x200C, 0x200D, true}, {0x203F, 0x2040, false}, 36 {0x2070, 0x218F, true}, {0x2C00, 0x2FEF, true}, {0x3001, 0xD7FF, true}, 37 {0xF900, 0xFDCF, true}, {0xFDF0, 0xFFFD, true}, 38 }; 39 40 41 int32_t GetUTF8EncodeLength(const std::vector<wchar_t>& src, 42 FX_FILESIZE iSrcLen) { 43 uint32_t unicode = 0; 44 int32_t iDstNum = 0; 45 const wchar_t* pSrc = src.data(); 46 while (iSrcLen-- > 0) { 47 unicode = *pSrc++; 48 int nbytes = 0; 49 if ((uint32_t)unicode < 0x80) { 50 nbytes = 1; 51 } else if ((uint32_t)unicode < 0x800) { 52 nbytes = 2; 53 } else if ((uint32_t)unicode < 0x10000) { 54 nbytes = 3; 55 } else if ((uint32_t)unicode < 0x200000) { 56 nbytes = 4; 57 } else if ((uint32_t)unicode < 0x4000000) { 58 nbytes = 5; 59 } else { 60 nbytes = 6; 61 } 62 iDstNum += nbytes; 63 } 64 return iDstNum; 65 } 66 67 } // namespace 68 69 // static 70 bool CFX_XMLSyntaxParser::IsXMLNameChar(wchar_t ch, bool bFirstChar) { 71 auto* it = std::lower_bound( 72 std::begin(g_XMLNameChars), std::end(g_XMLNameChars), ch, 73 [](const FX_XMLNAMECHAR& arg, wchar_t ch) { return arg.wEnd < ch; }); 74 return it != std::end(g_XMLNameChars) && ch >= it->wStart && 75 (!bFirstChar || it->bStartChar); 76 } 77 78 CFX_XMLSyntaxParser::CFX_XMLSyntaxParser( 79 const RetainPtr<CFX_SeekableStreamProxy>& pStream) 80 : m_pStream(pStream), 81 m_iXMLPlaneSize(32 * 1024), 82 m_iCurrentPos(0), 83 m_iCurrentNodeNum(-1), 84 m_iLastNodeNum(-1), 85 m_iParsedBytes(0), 86 m_ParsedChars(0), 87 m_iBufferChars(0), 88 m_bEOS(false), 89 m_Start(0), 90 m_End(0), 91 m_iAllocStep(m_BlockBuffer.GetAllocStep()), 92 m_pCurrentBlock(nullptr), 93 m_iIndexInBlock(0), 94 m_iTextDataLength(0), 95 m_syntaxParserResult(FX_XmlSyntaxResult::None), 96 m_syntaxParserState(FDE_XmlSyntaxState::Text), 97 m_wQuotationMark(0), 98 m_iEntityStart(-1) { 99 ASSERT(pStream); 100 101 m_CurNode.iNodeNum = -1; 102 m_CurNode.eNodeType = FX_XMLNODE_Unknown; 103 104 m_iXMLPlaneSize = 105 std::min(m_iXMLPlaneSize, 106 pdfium::base::checked_cast<size_t>(m_pStream->GetLength())); 107 m_iCurrentPos = m_pStream->GetBOMLength(); 108 109 FX_SAFE_SIZE_T alloc_size_safe = m_iXMLPlaneSize; 110 alloc_size_safe += 1; // For NUL. 111 if (!alloc_size_safe.IsValid() || alloc_size_safe.ValueOrDie() <= 0) { 112 m_syntaxParserResult = FX_XmlSyntaxResult::Error; 113 return; 114 } 115 116 m_Buffer.resize(pdfium::base::ValueOrDieForType<size_t>(alloc_size_safe)); 117 118 m_BlockBuffer.InitBuffer(); 119 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 120 m_BlockBuffer.GetAvailableBlock(); 121 } 122 123 CFX_XMLSyntaxParser::~CFX_XMLSyntaxParser() {} 124 125 FX_XmlSyntaxResult CFX_XMLSyntaxParser::DoSyntaxParse() { 126 if (m_syntaxParserResult == FX_XmlSyntaxResult::Error || 127 m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString) { 128 return m_syntaxParserResult; 129 } 130 131 FX_FILESIZE iStreamLength = m_pStream->GetLength(); 132 FX_FILESIZE iPos; 133 134 FX_XmlSyntaxResult syntaxParserResult = FX_XmlSyntaxResult::None; 135 while (true) { 136 if (m_Start >= m_End) { 137 if (m_bEOS || m_iCurrentPos >= iStreamLength) { 138 m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString; 139 return m_syntaxParserResult; 140 } 141 m_ParsedChars += m_End; 142 m_iParsedBytes = m_iCurrentPos; 143 if (m_pStream->GetPosition() != m_iCurrentPos) 144 m_pStream->Seek(CFX_SeekableStreamProxy::From::Begin, m_iCurrentPos); 145 146 m_iBufferChars = 147 m_pStream->ReadString(m_Buffer.data(), m_iXMLPlaneSize, &m_bEOS); 148 iPos = m_pStream->GetPosition(); 149 if (m_iBufferChars < 1) { 150 m_iCurrentPos = iStreamLength; 151 m_syntaxParserResult = FX_XmlSyntaxResult::EndOfString; 152 return m_syntaxParserResult; 153 } 154 m_iCurrentPos = iPos; 155 m_Start = 0; 156 m_End = m_iBufferChars; 157 } 158 159 while (m_Start < m_End) { 160 wchar_t ch = m_Buffer[m_Start]; 161 switch (m_syntaxParserState) { 162 case FDE_XmlSyntaxState::Text: 163 if (ch == L'<') { 164 if (!m_BlockBuffer.IsEmpty()) { 165 m_iTextDataLength = m_BlockBuffer.GetDataLength(); 166 m_BlockBuffer.Reset(true); 167 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 168 m_BlockBuffer.GetAvailableBlock(); 169 m_iEntityStart = -1; 170 syntaxParserResult = FX_XmlSyntaxResult::Text; 171 } else { 172 m_Start++; 173 m_syntaxParserState = FDE_XmlSyntaxState::Node; 174 } 175 } else { 176 ParseTextChar(ch); 177 } 178 break; 179 case FDE_XmlSyntaxState::Node: 180 if (ch == L'!') { 181 m_Start++; 182 m_syntaxParserState = FDE_XmlSyntaxState::SkipCommentOrDecl; 183 } else if (ch == L'/') { 184 m_Start++; 185 m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; 186 } else if (ch == L'?') { 187 m_iLastNodeNum++; 188 m_iCurrentNodeNum = m_iLastNodeNum; 189 m_CurNode.iNodeNum = m_iLastNodeNum; 190 m_CurNode.eNodeType = FX_XMLNODE_Instruction; 191 m_XMLNodeStack.push(m_CurNode); 192 m_Start++; 193 m_syntaxParserState = FDE_XmlSyntaxState::Target; 194 syntaxParserResult = FX_XmlSyntaxResult::InstructionOpen; 195 } else { 196 m_iLastNodeNum++; 197 m_iCurrentNodeNum = m_iLastNodeNum; 198 m_CurNode.iNodeNum = m_iLastNodeNum; 199 m_CurNode.eNodeType = FX_XMLNODE_Element; 200 m_XMLNodeStack.push(m_CurNode); 201 m_syntaxParserState = FDE_XmlSyntaxState::Tag; 202 syntaxParserResult = FX_XmlSyntaxResult::ElementOpen; 203 } 204 break; 205 case FDE_XmlSyntaxState::Target: 206 case FDE_XmlSyntaxState::Tag: 207 if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { 208 if (m_BlockBuffer.IsEmpty()) { 209 m_syntaxParserResult = FX_XmlSyntaxResult::Error; 210 return m_syntaxParserResult; 211 } 212 213 m_iTextDataLength = m_BlockBuffer.GetDataLength(); 214 m_BlockBuffer.Reset(true); 215 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 216 m_BlockBuffer.GetAvailableBlock(); 217 if (m_syntaxParserState != FDE_XmlSyntaxState::Target) 218 syntaxParserResult = FX_XmlSyntaxResult::TagName; 219 else 220 syntaxParserResult = FX_XmlSyntaxResult::TargetName; 221 222 m_syntaxParserState = FDE_XmlSyntaxState::AttriName; 223 } else { 224 if (m_iIndexInBlock == m_iAllocStep) { 225 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 226 m_BlockBuffer.GetAvailableBlock(); 227 if (!m_pCurrentBlock) { 228 return FX_XmlSyntaxResult::Error; 229 } 230 } 231 m_pCurrentBlock[m_iIndexInBlock++] = ch; 232 m_BlockBuffer.IncrementDataLength(); 233 m_Start++; 234 } 235 break; 236 case FDE_XmlSyntaxState::AttriName: 237 if (m_BlockBuffer.IsEmpty() && IsXMLWhiteSpace(ch)) { 238 m_Start++; 239 break; 240 } 241 if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { 242 if (m_BlockBuffer.IsEmpty()) { 243 if (m_CurNode.eNodeType == FX_XMLNODE_Element) { 244 if (ch == L'>' || ch == L'/') { 245 m_syntaxParserState = FDE_XmlSyntaxState::BreakElement; 246 break; 247 } 248 } else if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) { 249 if (ch == L'?') { 250 m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; 251 m_Start++; 252 } else { 253 m_syntaxParserState = FDE_XmlSyntaxState::TargetData; 254 } 255 break; 256 } 257 m_syntaxParserResult = FX_XmlSyntaxResult::Error; 258 return m_syntaxParserResult; 259 } else { 260 if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) { 261 if (ch != '=' && !IsXMLWhiteSpace(ch)) { 262 m_syntaxParserState = FDE_XmlSyntaxState::TargetData; 263 break; 264 } 265 } 266 m_iTextDataLength = m_BlockBuffer.GetDataLength(); 267 m_BlockBuffer.Reset(true); 268 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 269 m_BlockBuffer.GetAvailableBlock(); 270 m_syntaxParserState = FDE_XmlSyntaxState::AttriEqualSign; 271 syntaxParserResult = FX_XmlSyntaxResult::AttriName; 272 } 273 } else { 274 if (m_iIndexInBlock == m_iAllocStep) { 275 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 276 m_BlockBuffer.GetAvailableBlock(); 277 if (!m_pCurrentBlock) { 278 return FX_XmlSyntaxResult::Error; 279 } 280 } 281 m_pCurrentBlock[m_iIndexInBlock++] = ch; 282 m_BlockBuffer.IncrementDataLength(); 283 m_Start++; 284 } 285 break; 286 case FDE_XmlSyntaxState::AttriEqualSign: 287 if (IsXMLWhiteSpace(ch)) { 288 m_Start++; 289 break; 290 } 291 if (ch != L'=') { 292 if (m_CurNode.eNodeType == FX_XMLNODE_Instruction) { 293 m_syntaxParserState = FDE_XmlSyntaxState::TargetData; 294 break; 295 } 296 m_syntaxParserResult = FX_XmlSyntaxResult::Error; 297 return m_syntaxParserResult; 298 } else { 299 m_syntaxParserState = FDE_XmlSyntaxState::AttriQuotation; 300 m_Start++; 301 } 302 break; 303 case FDE_XmlSyntaxState::AttriQuotation: 304 if (IsXMLWhiteSpace(ch)) { 305 m_Start++; 306 break; 307 } 308 if (ch != L'\"' && ch != L'\'') { 309 m_syntaxParserResult = FX_XmlSyntaxResult::Error; 310 return m_syntaxParserResult; 311 } else { 312 m_wQuotationMark = ch; 313 m_syntaxParserState = FDE_XmlSyntaxState::AttriValue; 314 m_Start++; 315 } 316 break; 317 case FDE_XmlSyntaxState::AttriValue: 318 if (ch == m_wQuotationMark) { 319 if (m_iEntityStart > -1) { 320 m_syntaxParserResult = FX_XmlSyntaxResult::Error; 321 return m_syntaxParserResult; 322 } 323 m_iTextDataLength = m_BlockBuffer.GetDataLength(); 324 m_wQuotationMark = 0; 325 m_BlockBuffer.Reset(true); 326 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 327 m_BlockBuffer.GetAvailableBlock(); 328 m_Start++; 329 m_syntaxParserState = FDE_XmlSyntaxState::AttriName; 330 syntaxParserResult = FX_XmlSyntaxResult::AttriValue; 331 } else { 332 ParseTextChar(ch); 333 } 334 break; 335 case FDE_XmlSyntaxState::CloseInstruction: 336 if (ch != L'>') { 337 if (m_iIndexInBlock == m_iAllocStep) { 338 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 339 m_BlockBuffer.GetAvailableBlock(); 340 if (!m_pCurrentBlock) { 341 return FX_XmlSyntaxResult::Error; 342 } 343 } 344 m_pCurrentBlock[m_iIndexInBlock++] = ch; 345 m_BlockBuffer.IncrementDataLength(); 346 m_syntaxParserState = FDE_XmlSyntaxState::TargetData; 347 } else if (!m_BlockBuffer.IsEmpty()) { 348 m_iTextDataLength = m_BlockBuffer.GetDataLength(); 349 m_BlockBuffer.Reset(true); 350 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 351 m_BlockBuffer.GetAvailableBlock(); 352 syntaxParserResult = FX_XmlSyntaxResult::TargetData; 353 } else { 354 m_Start++; 355 if (m_XMLNodeStack.empty()) { 356 m_syntaxParserResult = FX_XmlSyntaxResult::Error; 357 return m_syntaxParserResult; 358 } 359 m_XMLNodeStack.pop(); 360 if (!m_XMLNodeStack.empty()) { 361 m_CurNode = m_XMLNodeStack.top(); 362 } else { 363 m_CurNode.iNodeNum = -1; 364 m_CurNode.eNodeType = FX_XMLNODE_Unknown; 365 } 366 m_iCurrentNodeNum = m_CurNode.iNodeNum; 367 m_BlockBuffer.Reset(true); 368 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 369 m_BlockBuffer.GetAvailableBlock(); 370 m_syntaxParserState = FDE_XmlSyntaxState::Text; 371 syntaxParserResult = FX_XmlSyntaxResult::InstructionClose; 372 } 373 break; 374 case FDE_XmlSyntaxState::BreakElement: 375 if (ch == L'>') { 376 m_syntaxParserState = FDE_XmlSyntaxState::Text; 377 syntaxParserResult = FX_XmlSyntaxResult::ElementBreak; 378 } else if (ch == L'/') { 379 m_syntaxParserState = FDE_XmlSyntaxState::CloseElement; 380 } else { 381 m_syntaxParserResult = FX_XmlSyntaxResult::Error; 382 return m_syntaxParserResult; 383 } 384 m_Start++; 385 break; 386 case FDE_XmlSyntaxState::CloseElement: 387 if (!IsXMLNameChar(ch, m_BlockBuffer.IsEmpty())) { 388 if (ch == L'>') { 389 if (m_XMLNodeStack.empty()) { 390 m_syntaxParserResult = FX_XmlSyntaxResult::Error; 391 return m_syntaxParserResult; 392 } 393 m_XMLNodeStack.pop(); 394 if (!m_XMLNodeStack.empty()) { 395 m_CurNode = m_XMLNodeStack.top(); 396 } else { 397 m_CurNode.iNodeNum = -1; 398 m_CurNode.eNodeType = FX_XMLNODE_Unknown; 399 } 400 m_iCurrentNodeNum = m_CurNode.iNodeNum; 401 m_iTextDataLength = m_BlockBuffer.GetDataLength(); 402 m_BlockBuffer.Reset(true); 403 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 404 m_BlockBuffer.GetAvailableBlock(); 405 m_syntaxParserState = FDE_XmlSyntaxState::Text; 406 syntaxParserResult = FX_XmlSyntaxResult::ElementClose; 407 } else if (!IsXMLWhiteSpace(ch)) { 408 m_syntaxParserResult = FX_XmlSyntaxResult::Error; 409 return m_syntaxParserResult; 410 } 411 } else { 412 if (m_iIndexInBlock == m_iAllocStep) { 413 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 414 m_BlockBuffer.GetAvailableBlock(); 415 if (!m_pCurrentBlock) { 416 return FX_XmlSyntaxResult::Error; 417 } 418 } 419 m_pCurrentBlock[m_iIndexInBlock++] = ch; 420 m_BlockBuffer.IncrementDataLength(); 421 } 422 m_Start++; 423 break; 424 case FDE_XmlSyntaxState::SkipCommentOrDecl: 425 if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"--", 2) == 0) { 426 m_Start += 2; 427 m_syntaxParserState = FDE_XmlSyntaxState::SkipComment; 428 } else if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"[CDATA[", 7) == 429 0) { 430 m_Start += 7; 431 m_syntaxParserState = FDE_XmlSyntaxState::SkipCData; 432 } else { 433 m_syntaxParserState = FDE_XmlSyntaxState::SkipDeclNode; 434 m_SkipChar = L'>'; 435 m_SkipStack.push(L'>'); 436 } 437 break; 438 case FDE_XmlSyntaxState::SkipCData: { 439 if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"]]>", 3) == 0) { 440 m_Start += 3; 441 syntaxParserResult = FX_XmlSyntaxResult::CData; 442 m_iTextDataLength = m_BlockBuffer.GetDataLength(); 443 m_BlockBuffer.Reset(true); 444 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 445 m_BlockBuffer.GetAvailableBlock(); 446 m_syntaxParserState = FDE_XmlSyntaxState::Text; 447 } else { 448 if (m_iIndexInBlock == m_iAllocStep) { 449 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 450 m_BlockBuffer.GetAvailableBlock(); 451 if (!m_pCurrentBlock) 452 return FX_XmlSyntaxResult::Error; 453 } 454 m_pCurrentBlock[m_iIndexInBlock++] = ch; 455 m_BlockBuffer.IncrementDataLength(); 456 m_Start++; 457 } 458 break; 459 } 460 case FDE_XmlSyntaxState::SkipDeclNode: 461 if (m_SkipChar == L'\'' || m_SkipChar == L'\"') { 462 m_Start++; 463 if (ch != m_SkipChar) 464 break; 465 466 m_SkipStack.pop(); 467 if (m_SkipStack.empty()) 468 m_syntaxParserState = FDE_XmlSyntaxState::Text; 469 else 470 m_SkipChar = m_SkipStack.top(); 471 } else { 472 switch (ch) { 473 case L'<': 474 m_SkipChar = L'>'; 475 m_SkipStack.push(L'>'); 476 break; 477 case L'[': 478 m_SkipChar = L']'; 479 m_SkipStack.push(L']'); 480 break; 481 case L'(': 482 m_SkipChar = L')'; 483 m_SkipStack.push(L')'); 484 break; 485 case L'\'': 486 m_SkipChar = L'\''; 487 m_SkipStack.push(L'\''); 488 break; 489 case L'\"': 490 m_SkipChar = L'\"'; 491 m_SkipStack.push(L'\"'); 492 break; 493 default: 494 if (ch == m_SkipChar) { 495 m_SkipStack.pop(); 496 if (m_SkipStack.empty()) { 497 if (m_BlockBuffer.GetDataLength() >= 9) 498 (void)m_BlockBuffer.GetTextData(0, 7); 499 500 m_iTextDataLength = m_BlockBuffer.GetDataLength(); 501 m_BlockBuffer.Reset(true); 502 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 503 m_BlockBuffer.GetAvailableBlock(); 504 m_syntaxParserState = FDE_XmlSyntaxState::Text; 505 } else { 506 m_SkipChar = m_SkipStack.top(); 507 } 508 } 509 break; 510 } 511 if (!m_SkipStack.empty()) { 512 if (m_iIndexInBlock == m_iAllocStep) { 513 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 514 m_BlockBuffer.GetAvailableBlock(); 515 if (!m_pCurrentBlock) { 516 return FX_XmlSyntaxResult::Error; 517 } 518 } 519 m_pCurrentBlock[m_iIndexInBlock++] = ch; 520 m_BlockBuffer.IncrementDataLength(); 521 } 522 m_Start++; 523 } 524 break; 525 case FDE_XmlSyntaxState::SkipComment: 526 if (FXSYS_wcsnicmp(m_Buffer.data() + m_Start, L"-->", 3) == 0) { 527 m_Start += 2; 528 m_syntaxParserState = FDE_XmlSyntaxState::Text; 529 } 530 531 m_Start++; 532 break; 533 case FDE_XmlSyntaxState::TargetData: 534 if (IsXMLWhiteSpace(ch)) { 535 if (m_BlockBuffer.IsEmpty()) { 536 m_Start++; 537 break; 538 } 539 if (m_wQuotationMark == 0) { 540 m_iTextDataLength = m_BlockBuffer.GetDataLength(); 541 m_wQuotationMark = 0; 542 m_BlockBuffer.Reset(true); 543 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 544 m_BlockBuffer.GetAvailableBlock(); 545 m_Start++; 546 syntaxParserResult = FX_XmlSyntaxResult::TargetData; 547 break; 548 } 549 } 550 if (ch == '?') { 551 m_syntaxParserState = FDE_XmlSyntaxState::CloseInstruction; 552 m_Start++; 553 } else if (ch == '\"') { 554 if (m_wQuotationMark == 0) { 555 m_wQuotationMark = ch; 556 m_Start++; 557 } else if (ch == m_wQuotationMark) { 558 m_iTextDataLength = m_BlockBuffer.GetDataLength(); 559 m_wQuotationMark = 0; 560 m_BlockBuffer.Reset(true); 561 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 562 m_BlockBuffer.GetAvailableBlock(); 563 m_Start++; 564 syntaxParserResult = FX_XmlSyntaxResult::TargetData; 565 } else { 566 m_syntaxParserResult = FX_XmlSyntaxResult::Error; 567 return m_syntaxParserResult; 568 } 569 } else { 570 if (m_iIndexInBlock == m_iAllocStep) { 571 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 572 m_BlockBuffer.GetAvailableBlock(); 573 if (!m_pCurrentBlock) { 574 return FX_XmlSyntaxResult::Error; 575 } 576 } 577 m_pCurrentBlock[m_iIndexInBlock++] = ch; 578 m_BlockBuffer.IncrementDataLength(); 579 m_Start++; 580 } 581 break; 582 default: 583 break; 584 } 585 if (syntaxParserResult != FX_XmlSyntaxResult::None) 586 return syntaxParserResult; 587 } 588 } 589 return FX_XmlSyntaxResult::Text; 590 } 591 592 int32_t CFX_XMLSyntaxParser::GetStatus() const { 593 if (!m_pStream) 594 return -1; 595 596 int32_t iStreamLength = m_pStream->GetLength(); 597 if (iStreamLength < 1) 598 return 100; 599 600 if (m_syntaxParserResult == FX_XmlSyntaxResult::Error) 601 return -1; 602 603 if (m_syntaxParserResult == FX_XmlSyntaxResult::EndOfString) 604 return 100; 605 return m_iParsedBytes * 100 / iStreamLength; 606 } 607 608 FX_FILESIZE CFX_XMLSyntaxParser::GetCurrentBinaryPos() const { 609 if (!m_pStream) 610 return 0; 611 612 int32_t nDstLen = GetUTF8EncodeLength(m_Buffer, m_Start); 613 return m_iParsedBytes + nDstLen; 614 } 615 616 void CFX_XMLSyntaxParser::ParseTextChar(wchar_t character) { 617 if (m_iIndexInBlock == m_iAllocStep) { 618 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 619 m_BlockBuffer.GetAvailableBlock(); 620 if (!m_pCurrentBlock) 621 return; 622 } 623 624 m_pCurrentBlock[m_iIndexInBlock++] = character; 625 m_BlockBuffer.IncrementDataLength(); 626 if (m_iEntityStart > -1 && character == L';') { 627 WideString csEntity = m_BlockBuffer.GetTextData( 628 m_iEntityStart + 1, 629 m_BlockBuffer.GetDataLength() - 1 - m_iEntityStart - 1); 630 int32_t iLen = csEntity.GetLength(); 631 if (iLen > 0) { 632 if (csEntity[0] == L'#') { 633 uint32_t ch = 0; 634 wchar_t w; 635 if (iLen > 1 && csEntity[1] == L'x') { 636 for (int32_t i = 2; i < iLen; i++) { 637 w = csEntity[i]; 638 if (std::iswdigit(w)) 639 ch = (ch << 4) + w - L'0'; 640 else if (w >= L'A' && w <= L'F') 641 ch = (ch << 4) + w - 55; 642 else if (w >= L'a' && w <= L'f') 643 ch = (ch << 4) + w - 87; 644 else 645 break; 646 } 647 } else { 648 for (int32_t i = 1; i < iLen; i++) { 649 w = csEntity[i]; 650 if (!std::iswdigit(w)) 651 break; 652 ch = ch * 10 + w - L'0'; 653 } 654 } 655 if (ch > kMaxCharRange) 656 ch = ' '; 657 658 character = static_cast<wchar_t>(ch); 659 if (character != 0) { 660 m_BlockBuffer.SetTextChar(m_iEntityStart, character); 661 m_iEntityStart++; 662 } 663 } else { 664 if (csEntity.Compare(L"amp") == 0) { 665 m_BlockBuffer.SetTextChar(m_iEntityStart, L'&'); 666 m_iEntityStart++; 667 } else if (csEntity.Compare(L"lt") == 0) { 668 m_BlockBuffer.SetTextChar(m_iEntityStart, L'<'); 669 m_iEntityStart++; 670 } else if (csEntity.Compare(L"gt") == 0) { 671 m_BlockBuffer.SetTextChar(m_iEntityStart, L'>'); 672 m_iEntityStart++; 673 } else if (csEntity.Compare(L"apos") == 0) { 674 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\''); 675 m_iEntityStart++; 676 } else if (csEntity.Compare(L"quot") == 0) { 677 m_BlockBuffer.SetTextChar(m_iEntityStart, L'\"'); 678 m_iEntityStart++; 679 } 680 } 681 } 682 if (m_iEntityStart >= 0 && 683 m_BlockBuffer.GetDataLength() > static_cast<size_t>(m_iEntityStart)) { 684 m_BlockBuffer.DeleteTextChars(m_BlockBuffer.GetDataLength() - 685 m_iEntityStart); 686 } 687 std::tie(m_pCurrentBlock, m_iIndexInBlock) = 688 m_BlockBuffer.GetAvailableBlock(); 689 m_iEntityStart = -1; 690 } else if (m_iEntityStart < 0 && character == L'&') { 691 m_iEntityStart = m_BlockBuffer.GetDataLength() - 1; 692 } 693 m_Start++; 694 } 695