1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include <algorithm> 8 #include <memory> 9 #include <vector> 10 11 #include "core/fxcrt/fx_ext.h" 12 #include "core/fxcrt/fx_xml.h" 13 #include "core/fxcrt/xml_int.h" 14 #include "third_party/base/ptr_util.h" 15 #include "third_party/base/stl_util.h" 16 17 namespace { 18 19 #define FXCRTM_XML_CHARTYPE_Normal 0x00 20 #define FXCRTM_XML_CHARTYPE_SpaceChar 0x01 21 #define FXCRTM_XML_CHARTYPE_Letter 0x02 22 #define FXCRTM_XML_CHARTYPE_Digital 0x04 23 #define FXCRTM_XML_CHARTYPE_NameIntro 0x08 24 #define FXCRTM_XML_CHARTYPE_NameChar 0x10 25 #define FXCRTM_XML_CHARTYPE_HexDigital 0x20 26 #define FXCRTM_XML_CHARTYPE_HexLowerLetter 0x40 27 #define FXCRTM_XML_CHARTYPE_HexUpperLetter 0x60 28 #define FXCRTM_XML_CHARTYPE_HexChar 0x60 29 30 const uint8_t g_FXCRT_XML_ByteTypes[256] = { 31 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 32 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 33 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, 0x00, 34 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10, 0x10, 0x00, 35 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x34, 0x08, 0x00, 36 0x00, 0x00, 0x00, 0x00, 0x00, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x7A, 0x1A, 37 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 38 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x18, 39 0x00, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x5A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 40 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 41 0x1A, 0x1A, 0x1A, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1A, 0x1A, 0x1A, 0x1A, 42 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 43 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 44 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 45 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 46 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 47 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 48 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 49 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 50 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 51 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 0x1A, 52 0x1A, 0x1A, 0x01, 0x01, 53 }; 54 55 bool g_FXCRT_XML_IsWhiteSpace(uint8_t ch) { 56 return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_SpaceChar); 57 } 58 59 bool g_FXCRT_XML_IsDigital(uint8_t ch) { 60 return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_Digital); 61 } 62 63 bool g_FXCRT_XML_IsNameIntro(uint8_t ch) { 64 return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameIntro); 65 } 66 67 bool g_FXCRT_XML_IsNameChar(uint8_t ch) { 68 return !!(g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_NameChar); 69 } 70 71 class CXML_DataBufAcc : public IFX_BufferedReadStream { 72 public: 73 template <typename T, typename... Args> 74 friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args); 75 76 // IFX_BufferedReadStream 77 bool IsEOF() override; 78 FX_FILESIZE GetPosition() override; 79 size_t ReadBlock(void* buffer, size_t size) override; 80 bool ReadNextBlock(bool bRestart) override; 81 const uint8_t* GetBlockBuffer() override; 82 size_t GetBlockSize() override; 83 FX_FILESIZE GetBlockOffset() override; 84 85 private: 86 CXML_DataBufAcc(const uint8_t* pBuffer, size_t size); 87 ~CXML_DataBufAcc() override; 88 89 const uint8_t* m_pBuffer; 90 size_t m_dwSize; 91 size_t m_dwCurPos; 92 }; 93 94 CXML_DataBufAcc::CXML_DataBufAcc(const uint8_t* pBuffer, size_t size) 95 : m_pBuffer(pBuffer), m_dwSize(size), m_dwCurPos(0) {} 96 97 CXML_DataBufAcc::~CXML_DataBufAcc() {} 98 99 bool CXML_DataBufAcc::IsEOF() { 100 return m_dwCurPos >= m_dwSize; 101 } 102 103 FX_FILESIZE CXML_DataBufAcc::GetPosition() { 104 return static_cast<FX_FILESIZE>(m_dwCurPos); 105 } 106 107 size_t CXML_DataBufAcc::ReadBlock(void* buffer, size_t size) { 108 return 0; 109 } 110 111 bool CXML_DataBufAcc::ReadNextBlock(bool bRestart) { 112 if (bRestart) 113 m_dwCurPos = 0; 114 115 if (m_dwCurPos < m_dwSize) { 116 m_dwCurPos = m_dwSize; 117 return true; 118 } 119 return false; 120 } 121 122 const uint8_t* CXML_DataBufAcc::GetBlockBuffer() { 123 return m_pBuffer; 124 } 125 126 size_t CXML_DataBufAcc::GetBlockSize() { 127 return m_dwSize; 128 } 129 130 FX_FILESIZE CXML_DataBufAcc::GetBlockOffset() { 131 return 0; 132 } 133 134 class CXML_DataStmAcc : public IFX_BufferedReadStream { 135 public: 136 template <typename T, typename... Args> 137 friend CFX_RetainPtr<T> pdfium::MakeRetain(Args&&... args); 138 139 // IFX_BufferedReadStream 140 bool IsEOF() override; 141 FX_FILESIZE GetPosition() override; 142 size_t ReadBlock(void* buffer, size_t size) override; 143 bool ReadNextBlock(bool bRestart) override; 144 const uint8_t* GetBlockBuffer() override; 145 size_t GetBlockSize() override; 146 FX_FILESIZE GetBlockOffset() override; 147 148 private: 149 explicit CXML_DataStmAcc( 150 const CFX_RetainPtr<IFX_SeekableReadStream>& pFileRead); 151 ~CXML_DataStmAcc() override; 152 153 CFX_RetainPtr<IFX_SeekableReadStream> m_pFileRead; 154 uint8_t* m_pBuffer; 155 FX_FILESIZE m_nStart; 156 size_t m_dwSize; 157 }; 158 159 CXML_DataStmAcc::CXML_DataStmAcc( 160 const CFX_RetainPtr<IFX_SeekableReadStream>& pFileRead) 161 : m_pFileRead(pFileRead), m_pBuffer(nullptr), m_nStart(0), m_dwSize(0) { 162 ASSERT(m_pFileRead); 163 } 164 165 CXML_DataStmAcc::~CXML_DataStmAcc() { 166 FX_Free(m_pBuffer); 167 } 168 169 bool CXML_DataStmAcc::IsEOF() { 170 return m_nStart + static_cast<FX_FILESIZE>(m_dwSize) >= 171 m_pFileRead->GetSize(); 172 } 173 174 FX_FILESIZE CXML_DataStmAcc::GetPosition() { 175 return m_nStart + static_cast<FX_FILESIZE>(m_dwSize); 176 } 177 178 size_t CXML_DataStmAcc::ReadBlock(void* buffer, size_t size) { 179 return 0; 180 } 181 182 bool CXML_DataStmAcc::ReadNextBlock(bool bRestart) { 183 if (bRestart) 184 m_nStart = 0; 185 186 FX_FILESIZE nLength = m_pFileRead->GetSize(); 187 m_nStart += static_cast<FX_FILESIZE>(m_dwSize); 188 if (m_nStart >= nLength) 189 return false; 190 191 static const FX_FILESIZE FX_XMLDATASTREAM_BufferSize = 32 * 1024; 192 m_dwSize = static_cast<size_t>( 193 std::min(FX_XMLDATASTREAM_BufferSize, nLength - m_nStart)); 194 if (!m_pBuffer) 195 m_pBuffer = FX_Alloc(uint8_t, m_dwSize); 196 197 return m_pFileRead->ReadBlock(m_pBuffer, m_nStart, m_dwSize); 198 } 199 200 const uint8_t* CXML_DataStmAcc::GetBlockBuffer() { 201 return (const uint8_t*)m_pBuffer; 202 } 203 204 size_t CXML_DataStmAcc::GetBlockSize() { 205 return m_dwSize; 206 } 207 208 FX_FILESIZE CXML_DataStmAcc::GetBlockOffset() { 209 return m_nStart; 210 } 211 212 } // namespace 213 214 CXML_Parser::CXML_Parser() 215 : m_nOffset(0), 216 m_pBuffer(nullptr), 217 m_dwBufferSize(0), 218 m_nBufferOffset(0), 219 m_dwIndex(0) {} 220 221 CXML_Parser::~CXML_Parser() {} 222 223 bool CXML_Parser::Init(const uint8_t* pBuffer, size_t size) { 224 m_pDataAcc = pdfium::MakeRetain<CXML_DataBufAcc>(pBuffer, size); 225 m_nOffset = 0; 226 return ReadNextBlock(); 227 } 228 229 bool CXML_Parser::ReadNextBlock() { 230 if (!m_pDataAcc->ReadNextBlock()) 231 return false; 232 233 m_pBuffer = m_pDataAcc->GetBlockBuffer(); 234 m_dwBufferSize = m_pDataAcc->GetBlockSize(); 235 m_nBufferOffset = m_pDataAcc->GetBlockOffset(); 236 m_dwIndex = 0; 237 return m_dwBufferSize > 0; 238 } 239 240 bool CXML_Parser::IsEOF() { 241 return m_pDataAcc->IsEOF() && m_dwIndex >= m_dwBufferSize; 242 } 243 244 void CXML_Parser::SkipWhiteSpaces() { 245 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 246 if (IsEOF()) 247 return; 248 249 do { 250 while (m_dwIndex < m_dwBufferSize && 251 g_FXCRT_XML_IsWhiteSpace(m_pBuffer[m_dwIndex])) { 252 m_dwIndex++; 253 } 254 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 255 if (m_dwIndex < m_dwBufferSize || IsEOF()) 256 break; 257 } while (ReadNextBlock()); 258 } 259 260 void CXML_Parser::GetName(CFX_ByteString* space, CFX_ByteString* name) { 261 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 262 if (IsEOF()) 263 return; 264 265 CFX_ByteTextBuf buf; 266 uint8_t ch; 267 do { 268 while (m_dwIndex < m_dwBufferSize) { 269 ch = m_pBuffer[m_dwIndex]; 270 if (ch == ':') { 271 *space = buf.AsStringC(); 272 buf.Clear(); 273 } else if (g_FXCRT_XML_IsNameChar(ch)) { 274 buf.AppendChar(ch); 275 } else { 276 break; 277 } 278 m_dwIndex++; 279 } 280 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 281 if (m_dwIndex < m_dwBufferSize || IsEOF()) 282 break; 283 } while (ReadNextBlock()); 284 *name = buf.AsStringC(); 285 } 286 287 void CXML_Parser::SkipLiterals(const CFX_ByteStringC& str) { 288 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 289 if (IsEOF()) { 290 return; 291 } 292 int32_t i = 0, iLen = str.GetLength(); 293 do { 294 while (m_dwIndex < m_dwBufferSize) { 295 if (str.GetAt(i) != m_pBuffer[m_dwIndex++]) { 296 i = 0; 297 continue; 298 } 299 i++; 300 if (i == iLen) 301 break; 302 } 303 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 304 if (i == iLen) 305 return; 306 307 if (m_dwIndex < m_dwBufferSize || IsEOF()) 308 break; 309 } while (ReadNextBlock()); 310 while (!m_pDataAcc->IsEOF()) { 311 ReadNextBlock(); 312 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwBufferSize); 313 } 314 m_dwIndex = m_dwBufferSize; 315 } 316 317 uint32_t CXML_Parser::GetCharRef() { 318 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 319 if (IsEOF()) 320 return 0; 321 322 uint8_t ch; 323 int32_t iState = 0; 324 CFX_ByteTextBuf buf; 325 uint32_t code = 0; 326 do { 327 while (m_dwIndex < m_dwBufferSize) { 328 ch = m_pBuffer[m_dwIndex]; 329 switch (iState) { 330 case 0: 331 if (ch == '#') { 332 m_dwIndex++; 333 iState = 2; 334 break; 335 } 336 iState = 1; 337 case 1: 338 m_dwIndex++; 339 if (ch == ';') { 340 CFX_ByteStringC ref = buf.AsStringC(); 341 if (ref == "gt") 342 code = '>'; 343 else if (ref == "lt") 344 code = '<'; 345 else if (ref == "amp") 346 code = '&'; 347 else if (ref == "apos") 348 code = '\''; 349 else if (ref == "quot") 350 code = '"'; 351 iState = 10; 352 break; 353 } 354 buf.AppendByte(ch); 355 break; 356 case 2: 357 if (ch == 'x') { 358 m_dwIndex++; 359 iState = 4; 360 break; 361 } 362 iState = 3; 363 case 3: 364 m_dwIndex++; 365 if (ch == ';') { 366 iState = 10; 367 break; 368 } 369 if (g_FXCRT_XML_IsDigital(ch)) 370 code = code * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); 371 break; 372 case 4: 373 m_dwIndex++; 374 if (ch == ';') { 375 iState = 10; 376 break; 377 } 378 uint8_t nHex = 379 g_FXCRT_XML_ByteTypes[ch] & FXCRTM_XML_CHARTYPE_HexChar; 380 if (nHex) { 381 if (nHex == FXCRTM_XML_CHARTYPE_HexDigital) { 382 code = 383 (code << 4) + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); 384 } else if (nHex == FXCRTM_XML_CHARTYPE_HexLowerLetter) { 385 code = (code << 4) + ch - 87; 386 } else { 387 code = (code << 4) + ch - 55; 388 } 389 } 390 break; 391 } 392 if (iState == 10) 393 break; 394 } 395 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 396 if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) { 397 break; 398 } 399 } while (ReadNextBlock()); 400 return code; 401 } 402 403 void CXML_Parser::GetAttrValue(CFX_WideString& value) { 404 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 405 if (IsEOF()) 406 return; 407 408 CFX_UTF8Decoder decoder; 409 uint8_t mark = 0, ch = 0; 410 do { 411 while (m_dwIndex < m_dwBufferSize) { 412 ch = m_pBuffer[m_dwIndex]; 413 if (mark == 0) { 414 if (ch != '\'' && ch != '"') 415 return; 416 417 mark = ch; 418 m_dwIndex++; 419 ch = 0; 420 continue; 421 } 422 m_dwIndex++; 423 if (ch == mark) 424 break; 425 426 if (ch == '&') { 427 decoder.AppendChar(GetCharRef()); 428 if (IsEOF()) { 429 value = decoder.GetResult(); 430 return; 431 } 432 } else { 433 decoder.Input(ch); 434 } 435 } 436 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 437 if (ch == mark || m_dwIndex < m_dwBufferSize || IsEOF()) 438 break; 439 } while (ReadNextBlock()); 440 value = decoder.GetResult(); 441 } 442 443 void CXML_Parser::GetTagName(bool bStartTag, 444 bool* bEndTag, 445 CFX_ByteString* space, 446 CFX_ByteString* name) { 447 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 448 if (IsEOF()) 449 return; 450 451 *bEndTag = false; 452 uint8_t ch; 453 int32_t iState = bStartTag ? 1 : 0; 454 do { 455 while (m_dwIndex < m_dwBufferSize) { 456 ch = m_pBuffer[m_dwIndex]; 457 switch (iState) { 458 case 0: 459 m_dwIndex++; 460 if (ch != '<') 461 break; 462 463 iState = 1; 464 break; 465 case 1: 466 if (ch == '?') { 467 m_dwIndex++; 468 SkipLiterals("?>"); 469 iState = 0; 470 break; 471 } 472 if (ch == '!') { 473 m_dwIndex++; 474 SkipLiterals("-->"); 475 iState = 0; 476 break; 477 } 478 if (ch == '/') { 479 m_dwIndex++; 480 GetName(space, name); 481 *bEndTag = true; 482 } else { 483 GetName(space, name); 484 *bEndTag = false; 485 } 486 return; 487 } 488 } 489 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 490 if (m_dwIndex < m_dwBufferSize || IsEOF()) 491 break; 492 } while (ReadNextBlock()); 493 } 494 495 std::unique_ptr<CXML_Element> CXML_Parser::ParseElement(CXML_Element* pParent, 496 bool bStartTag) { 497 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 498 if (IsEOF()) 499 return nullptr; 500 501 CFX_ByteString tag_name; 502 CFX_ByteString tag_space; 503 bool bEndTag; 504 GetTagName(bStartTag, &bEndTag, &tag_space, &tag_name); 505 if (tag_name.IsEmpty() || bEndTag) 506 return nullptr; 507 508 auto pElement = pdfium::MakeUnique<CXML_Element>( 509 pParent, tag_space.AsStringC(), tag_name.AsStringC()); 510 do { 511 CFX_ByteString attr_space; 512 CFX_ByteString attr_name; 513 while (m_dwIndex < m_dwBufferSize) { 514 SkipWhiteSpaces(); 515 if (IsEOF()) 516 break; 517 518 if (!g_FXCRT_XML_IsNameIntro(m_pBuffer[m_dwIndex])) 519 break; 520 521 GetName(&attr_space, &attr_name); 522 SkipWhiteSpaces(); 523 if (IsEOF()) 524 break; 525 526 if (m_pBuffer[m_dwIndex] != '=') 527 break; 528 529 m_dwIndex++; 530 SkipWhiteSpaces(); 531 if (IsEOF()) 532 break; 533 534 CFX_WideString attr_value; 535 GetAttrValue(attr_value); 536 pElement->m_AttrMap.SetAt(attr_space, attr_name, attr_value); 537 } 538 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 539 if (m_dwIndex < m_dwBufferSize || IsEOF()) 540 break; 541 } while (ReadNextBlock()); 542 SkipWhiteSpaces(); 543 if (IsEOF()) 544 return pElement; 545 546 uint8_t ch = m_pBuffer[m_dwIndex++]; 547 if (ch == '/') { 548 m_dwIndex++; 549 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 550 return pElement; 551 } 552 if (ch != '>') { 553 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 554 return nullptr; 555 } 556 SkipWhiteSpaces(); 557 if (IsEOF()) 558 return pElement; 559 560 CFX_UTF8Decoder decoder; 561 CFX_WideTextBuf content; 562 bool bCDATA = false; 563 int32_t iState = 0; 564 do { 565 while (m_dwIndex < m_dwBufferSize) { 566 ch = m_pBuffer[m_dwIndex++]; 567 switch (iState) { 568 case 0: 569 if (ch == '<') { 570 iState = 1; 571 } else if (ch == '&') { 572 decoder.ClearStatus(); 573 decoder.AppendChar(GetCharRef()); 574 } else { 575 decoder.Input(ch); 576 } 577 break; 578 case 1: 579 if (ch == '!') { 580 iState = 2; 581 } else if (ch == '?') { 582 SkipLiterals("?>"); 583 SkipWhiteSpaces(); 584 iState = 0; 585 } else if (ch == '/') { 586 CFX_ByteString space; 587 CFX_ByteString name; 588 GetName(&space, &name); 589 SkipWhiteSpaces(); 590 m_dwIndex++; 591 iState = 10; 592 } else { 593 content << decoder.GetResult(); 594 CFX_WideString dataStr = content.MakeString(); 595 if (!bCDATA) 596 dataStr.TrimRight(L" \t\r\n"); 597 598 InsertContentSegment(bCDATA, dataStr.AsStringC(), pElement.get()); 599 content.Clear(); 600 decoder.Clear(); 601 bCDATA = false; 602 iState = 0; 603 m_dwIndex--; 604 std::unique_ptr<CXML_Element> pSubElement( 605 ParseElement(pElement.get(), true)); 606 if (!pSubElement) 607 break; 608 609 pElement->m_Children.push_back( 610 {CXML_Element::Element, pSubElement.release()}); 611 SkipWhiteSpaces(); 612 } 613 break; 614 case 2: 615 if (ch == '[') { 616 SkipLiterals("]]>"); 617 } else if (ch == '-') { 618 m_dwIndex++; 619 SkipLiterals("-->"); 620 } else { 621 SkipLiterals(">"); 622 } 623 decoder.Clear(); 624 SkipWhiteSpaces(); 625 iState = 0; 626 break; 627 } 628 if (iState == 10) { 629 break; 630 } 631 } 632 m_nOffset = m_nBufferOffset + static_cast<FX_FILESIZE>(m_dwIndex); 633 if (iState == 10 || m_dwIndex < m_dwBufferSize || IsEOF()) 634 break; 635 } while (ReadNextBlock()); 636 content << decoder.GetResult(); 637 CFX_WideString dataStr = content.MakeString(); 638 dataStr.TrimRight(L" \t\r\n"); 639 640 InsertContentSegment(bCDATA, dataStr.AsStringC(), pElement.get()); 641 content.Clear(); 642 decoder.Clear(); 643 bCDATA = false; 644 return pElement; 645 } 646 647 void CXML_Parser::InsertContentSegment(bool bCDATA, 648 const CFX_WideStringC& content, 649 CXML_Element* pElement) { 650 if (content.IsEmpty()) 651 return; 652 653 CXML_Content* pContent = new CXML_Content; 654 pContent->Set(bCDATA, content); 655 pElement->m_Children.push_back({CXML_Element::Content, pContent}); 656 } 657 658 std::unique_ptr<CXML_Element> CXML_Element::Parse(const void* pBuffer, 659 size_t size) { 660 CXML_Parser parser; 661 if (!parser.Init(static_cast<const uint8_t*>(pBuffer), size)) 662 return nullptr; 663 return parser.ParseElement(nullptr, false); 664 } 665 666 CXML_Element::CXML_Element(const CXML_Element* pParent, 667 const CFX_ByteStringC& qSpace, 668 const CFX_ByteStringC& tagname) 669 : m_pParent(pParent), m_QSpaceName(qSpace), m_TagName(tagname) {} 670 671 CXML_Element::~CXML_Element() { 672 Empty(); 673 } 674 675 void CXML_Element::Empty() { 676 RemoveChildren(); 677 } 678 void CXML_Element::RemoveChildren() { 679 for (const ChildRecord& record : m_Children) { 680 if (record.type == Content) { 681 delete static_cast<CXML_Content*>(record.child); 682 } else if (record.type == Element) { 683 CXML_Element* child = static_cast<CXML_Element*>(record.child); 684 child->RemoveChildren(); 685 delete child; 686 } 687 } 688 m_Children.clear(); 689 } 690 CFX_ByteString CXML_Element::GetTagName(bool bQualified) const { 691 if (!bQualified || m_QSpaceName.IsEmpty()) { 692 return m_TagName; 693 } 694 CFX_ByteString bsTag = m_QSpaceName; 695 bsTag += ":"; 696 bsTag += m_TagName; 697 return bsTag; 698 } 699 700 CFX_ByteString CXML_Element::GetNamespace(bool bQualified) const { 701 return bQualified ? m_QSpaceName : GetNamespaceURI(m_QSpaceName); 702 } 703 704 CFX_ByteString CXML_Element::GetNamespaceURI( 705 const CFX_ByteString& qName) const { 706 const CFX_WideString* pwsSpace; 707 const CXML_Element* pElement = this; 708 do { 709 if (qName.IsEmpty()) 710 pwsSpace = pElement->m_AttrMap.Lookup("", "xmlns"); 711 else 712 pwsSpace = pElement->m_AttrMap.Lookup("xmlns", qName); 713 if (pwsSpace) 714 break; 715 716 pElement = pElement->GetParent(); 717 } while (pElement); 718 return pwsSpace ? pwsSpace->UTF8Encode() : CFX_ByteString(); 719 } 720 721 void CXML_Element::GetAttrByIndex(int index, 722 CFX_ByteString& space, 723 CFX_ByteString& name, 724 CFX_WideString& value) const { 725 if (index < 0 || index >= m_AttrMap.GetSize()) 726 return; 727 728 CXML_AttrItem& item = m_AttrMap.GetAt(index); 729 space = item.m_QSpaceName; 730 name = item.m_AttrName; 731 value = item.m_Value; 732 } 733 734 bool CXML_Element::HasAttr(const CFX_ByteStringC& name) const { 735 CFX_ByteStringC bsSpace; 736 CFX_ByteStringC bsName; 737 FX_XML_SplitQualifiedName(name, bsSpace, bsName); 738 return !!m_AttrMap.Lookup(CFX_ByteString(bsSpace), CFX_ByteString(bsName)); 739 } 740 741 bool CXML_Element::GetAttrValue(const CFX_ByteStringC& name, 742 CFX_WideString& attribute) const { 743 CFX_ByteStringC bsSpace; 744 CFX_ByteStringC bsName; 745 FX_XML_SplitQualifiedName(name, bsSpace, bsName); 746 return GetAttrValue(bsSpace, bsName, attribute); 747 } 748 749 bool CXML_Element::GetAttrValue(const CFX_ByteStringC& space, 750 const CFX_ByteStringC& name, 751 CFX_WideString& attribute) const { 752 const CFX_WideString* pValue = 753 m_AttrMap.Lookup(CFX_ByteString(space), CFX_ByteString(name)); 754 if (!pValue) 755 return false; 756 757 attribute = *pValue; 758 return true; 759 } 760 761 bool CXML_Element::GetAttrInteger(const CFX_ByteStringC& name, 762 int& attribute) const { 763 CFX_ByteStringC bsSpace; 764 CFX_ByteStringC bsName; 765 FX_XML_SplitQualifiedName(name, bsSpace, bsName); 766 const CFX_WideString* pwsValue = 767 m_AttrMap.Lookup(CFX_ByteString(bsSpace), CFX_ByteString(bsName)); 768 if (!pwsValue) 769 return false; 770 771 attribute = pwsValue->GetInteger(); 772 return true; 773 } 774 775 bool CXML_Element::GetAttrInteger(const CFX_ByteStringC& space, 776 const CFX_ByteStringC& name, 777 int& attribute) const { 778 const CFX_WideString* pwsValue = 779 m_AttrMap.Lookup(CFX_ByteString(space), CFX_ByteString(name)); 780 if (!pwsValue) 781 return false; 782 783 attribute = pwsValue->GetInteger(); 784 return true; 785 } 786 787 bool CXML_Element::GetAttrFloat(const CFX_ByteStringC& name, 788 FX_FLOAT& attribute) const { 789 CFX_ByteStringC bsSpace; 790 CFX_ByteStringC bsName; 791 FX_XML_SplitQualifiedName(name, bsSpace, bsName); 792 return GetAttrFloat(bsSpace, bsName, attribute); 793 } 794 795 bool CXML_Element::GetAttrFloat(const CFX_ByteStringC& space, 796 const CFX_ByteStringC& name, 797 FX_FLOAT& attribute) const { 798 const CFX_WideString* pValue = 799 m_AttrMap.Lookup(CFX_ByteString(space), CFX_ByteString(name)); 800 if (!pValue) 801 return false; 802 803 attribute = pValue->GetFloat(); 804 return true; 805 } 806 807 CXML_Element::ChildType CXML_Element::GetChildType(uint32_t index) const { 808 return index < m_Children.size() ? m_Children[index].type : Invalid; 809 } 810 811 CFX_WideString CXML_Element::GetContent(uint32_t index) const { 812 if (index < m_Children.size() && m_Children[index].type == Content) { 813 CXML_Content* pContent = 814 static_cast<CXML_Content*>(m_Children[index].child); 815 if (pContent) 816 return pContent->m_Content; 817 } 818 return CFX_WideString(); 819 } 820 821 CXML_Element* CXML_Element::GetElement(uint32_t index) const { 822 if (index < m_Children.size() && m_Children[index].type == Element) 823 return static_cast<CXML_Element*>(m_Children[index].child); 824 return nullptr; 825 } 826 827 uint32_t CXML_Element::CountElements(const CFX_ByteStringC& space, 828 const CFX_ByteStringC& tag) const { 829 int count = 0; 830 for (const ChildRecord& record : m_Children) { 831 if (record.type != Element) 832 continue; 833 834 CXML_Element* pKid = static_cast<CXML_Element*>(record.child); 835 if ((space.IsEmpty() || pKid->m_QSpaceName == space) && 836 pKid->m_TagName == tag) { 837 count++; 838 } 839 } 840 return count; 841 } 842 843 CXML_Element* CXML_Element::GetElement(const CFX_ByteStringC& space, 844 const CFX_ByteStringC& tag, 845 int index) const { 846 if (index < 0) 847 return nullptr; 848 849 for (const ChildRecord& record : m_Children) { 850 if (record.type != Element) 851 continue; 852 853 CXML_Element* pKid = static_cast<CXML_Element*>(record.child); 854 if ((space.IsEmpty() || pKid->m_QSpaceName == space) && 855 pKid->m_TagName == tag) { 856 if (index-- == 0) 857 return pKid; 858 } 859 } 860 return nullptr; 861 } 862 863 uint32_t CXML_Element::FindElement(CXML_Element* pChild) const { 864 int index = 0; 865 for (const ChildRecord& record : m_Children) { 866 if (record.type == Element && 867 static_cast<CXML_Element*>(record.child) == pChild) { 868 return index; 869 } 870 ++index; 871 } 872 return (uint32_t)-1; 873 } 874 875 bool CXML_AttrItem::Matches(const CFX_ByteString& space, 876 const CFX_ByteString& name) const { 877 return (space.IsEmpty() || m_QSpaceName == space) && m_AttrName == name; 878 } 879 880 CXML_AttrMap::CXML_AttrMap() {} 881 882 CXML_AttrMap::~CXML_AttrMap() {} 883 884 const CFX_WideString* CXML_AttrMap::Lookup(const CFX_ByteString& space, 885 const CFX_ByteString& name) const { 886 if (!m_pMap) 887 return nullptr; 888 889 for (const auto& item : *m_pMap) { 890 if (item.Matches(space, name)) 891 return &item.m_Value; 892 } 893 return nullptr; 894 } 895 896 void CXML_AttrMap::SetAt(const CFX_ByteString& space, 897 const CFX_ByteString& name, 898 const CFX_WideString& value) { 899 if (!m_pMap) 900 m_pMap = pdfium::MakeUnique<std::vector<CXML_AttrItem>>(); 901 902 for (CXML_AttrItem& item : *m_pMap) { 903 if (item.Matches(space, name)) { 904 item.m_Value = value; 905 return; 906 } 907 } 908 909 m_pMap->push_back({space, name, CFX_WideString(value)}); 910 } 911 912 int CXML_AttrMap::GetSize() const { 913 return m_pMap ? pdfium::CollectionSize<int>(*m_pMap) : 0; 914 } 915 916 CXML_AttrItem& CXML_AttrMap::GetAt(int index) const { 917 return (*m_pMap)[index]; 918 } 919