1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfapi/parser/cpdf_parser.h" 8 9 #include <algorithm> 10 #include <utility> 11 #include <vector> 12 13 #include "core/fpdfapi/parser/cpdf_array.h" 14 #include "core/fpdfapi/parser/cpdf_crypto_handler.h" 15 #include "core/fpdfapi/parser/cpdf_dictionary.h" 16 #include "core/fpdfapi/parser/cpdf_document.h" 17 #include "core/fpdfapi/parser/cpdf_linearized_header.h" 18 #include "core/fpdfapi/parser/cpdf_number.h" 19 #include "core/fpdfapi/parser/cpdf_reference.h" 20 #include "core/fpdfapi/parser/cpdf_security_handler.h" 21 #include "core/fpdfapi/parser/cpdf_stream.h" 22 #include "core/fpdfapi/parser/cpdf_stream_acc.h" 23 #include "core/fpdfapi/parser/cpdf_syntax_parser.h" 24 #include "core/fpdfapi/parser/fpdf_parser_utility.h" 25 #include "core/fxcrt/autorestorer.h" 26 #include "core/fxcrt/cfx_memorystream.h" 27 #include "core/fxcrt/fx_extension.h" 28 #include "core/fxcrt/fx_safe_types.h" 29 #include "third_party/base/ptr_util.h" 30 #include "third_party/base/stl_util.h" 31 32 namespace { 33 34 // A limit on the size of the xref table. Theoretical limits are higher, but 35 // this may be large enough in practice. 36 const int32_t kMaxXRefSize = 1048576; 37 38 constexpr FX_FILESIZE kPDFHeaderSize = 9; 39 40 uint32_t GetVarInt(const uint8_t* p, int32_t n) { 41 uint32_t result = 0; 42 for (int32_t i = 0; i < n; ++i) 43 result = result * 256 + p[i]; 44 return result; 45 } 46 47 int32_t GetStreamNCount(const RetainPtr<CPDF_StreamAcc>& pObjStream) { 48 return pObjStream->GetDict()->GetIntegerFor("N"); 49 } 50 51 int32_t GetStreamFirst(const RetainPtr<CPDF_StreamAcc>& pObjStream) { 52 return pObjStream->GetDict()->GetIntegerFor("First"); 53 } 54 55 } // namespace 56 57 class CPDF_Parser::TrailerData { 58 public: 59 TrailerData() {} 60 ~TrailerData() {} 61 62 CPDF_Dictionary* GetMainTrailer() const { return main_trailer_.get(); } 63 64 std::unique_ptr<CPDF_Dictionary> GetCombinedTrailer() const { 65 std::unique_ptr<CPDF_Dictionary> result = 66 ToDictionary(main_trailer_->Clone()); 67 68 // Info is optional. 69 uint32_t info_obj_num = GetInfoObjNum(); 70 if (info_obj_num > 0) 71 result->SetNewFor<CPDF_Reference>("Info", nullptr, GetInfoObjNum()); 72 73 // Root is required. 74 result->SetNewFor<CPDF_Reference>("Root", nullptr, GetRootObjNum()); 75 return result; 76 } 77 78 void SetMainTrailer(std::unique_ptr<CPDF_Dictionary> trailer) { 79 ASSERT(trailer); 80 main_trailer_ = std::move(trailer); 81 ApplyTrailer(main_trailer_.get()); 82 } 83 84 void AppendTrailer(std::unique_ptr<CPDF_Dictionary> trailer) { 85 ASSERT(trailer); 86 ApplyTrailer(trailer.get()); 87 } 88 89 void Clear() { 90 main_trailer_.reset(); 91 last_info_obj_num_ = 0; 92 last_root_obj_num_ = 0; 93 } 94 95 uint32_t GetInfoObjNum() const { 96 const CPDF_Reference* pRef = ToReference( 97 GetMainTrailer() ? GetMainTrailer()->GetObjectFor("Info") : nullptr); 98 return pRef ? pRef->GetRefObjNum() : last_info_obj_num_; 99 } 100 101 uint32_t GetRootObjNum() const { 102 const CPDF_Reference* pRef = ToReference( 103 GetMainTrailer() ? GetMainTrailer()->GetObjectFor("Root") : nullptr); 104 return pRef ? pRef->GetRefObjNum() : last_root_obj_num_; 105 } 106 107 private: 108 void ApplyTrailer(const CPDF_Dictionary* dict) { 109 // The most recent Info object number contained in last added trailer. 110 // See PDF 1.7 spec, section 3.4.5 - Incremental Updates. 111 const auto* pRef = ToReference(dict->GetObjectFor("Info")); 112 if (pRef) 113 last_info_obj_num_ = pRef->GetRefObjNum(); 114 115 const auto* pRoot = ToReference(dict->GetObjectFor("Root")); 116 if (pRoot) 117 last_root_obj_num_ = pRoot->GetRefObjNum(); 118 } 119 120 std::unique_ptr<CPDF_Dictionary> main_trailer_; 121 uint32_t last_info_obj_num_ = 0; 122 uint32_t last_root_obj_num_ = 0; 123 }; 124 125 CPDF_Parser::CPDF_Parser() 126 : m_pSyntax(pdfium::MakeUnique<CPDF_SyntaxParser>()), 127 m_bHasParsed(false), 128 m_bXRefStream(false), 129 m_FileVersion(0), 130 m_TrailerData(pdfium::MakeUnique<TrailerData>()) {} 131 132 CPDF_Parser::~CPDF_Parser() { 133 ReleaseEncryptHandler(); 134 } 135 136 uint32_t CPDF_Parser::GetLastObjNum() const { 137 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first; 138 } 139 140 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const { 141 return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first; 142 } 143 144 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const { 145 auto it = m_ObjectInfo.find(objnum); 146 return it != m_ObjectInfo.end() ? it->second.pos : 0; 147 } 148 149 CPDF_Parser::ObjectType CPDF_Parser::GetObjectType(uint32_t objnum) const { 150 ASSERT(IsValidObjectNumber(objnum)); 151 auto it = m_ObjectInfo.find(objnum); 152 return it != m_ObjectInfo.end() ? it->second.type : ObjectType::kFree; 153 } 154 155 uint16_t CPDF_Parser::GetObjectGenNum(uint32_t objnum) const { 156 ASSERT(IsValidObjectNumber(objnum)); 157 auto it = m_ObjectInfo.find(objnum); 158 return it != m_ObjectInfo.end() ? it->second.gennum : 0; 159 } 160 161 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const { 162 switch (GetObjectType(objnum)) { 163 case ObjectType::kFree: 164 case ObjectType::kNull: 165 return true; 166 case ObjectType::kNotCompressed: 167 case ObjectType::kCompressed: 168 return false; 169 } 170 ASSERT(false); // NOTREACHED(); 171 return false; 172 } 173 174 bool CPDF_Parser::IsObjectFree(uint32_t objnum) const { 175 return GetObjectType(objnum) == ObjectType::kFree; 176 } 177 178 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) { 179 m_pEncryptDict = pDict; 180 } 181 182 RetainPtr<IFX_SeekableReadStream> CPDF_Parser::GetFileAccess() const { 183 return m_pSyntax->GetFileAccess(); 184 } 185 186 void CPDF_Parser::ShrinkObjectMap(uint32_t objnum) { 187 if (objnum == 0) { 188 m_ObjectInfo.clear(); 189 return; 190 } 191 192 auto it = m_ObjectInfo.lower_bound(objnum); 193 while (it != m_ObjectInfo.end()) { 194 auto saved_it = it++; 195 m_ObjectInfo.erase(saved_it); 196 } 197 198 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1)) 199 m_ObjectInfo[objnum - 1].pos = 0; 200 } 201 202 bool CPDF_Parser::InitSyntaxParser( 203 const RetainPtr<IFX_SeekableReadStream>& file_access) { 204 const int32_t header_offset = GetHeaderOffset(file_access); 205 if (header_offset == kInvalidHeaderOffset) 206 return false; 207 if (file_access->GetSize() < header_offset + kPDFHeaderSize) 208 return false; 209 210 m_pSyntax->InitParser(file_access, header_offset); 211 return ParseFileVersion(); 212 } 213 214 bool CPDF_Parser::ParseFileVersion() { 215 m_FileVersion = 0; 216 uint8_t ch; 217 if (!m_pSyntax->GetCharAt(5, ch)) 218 return false; 219 220 if (std::isdigit(ch)) 221 m_FileVersion = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)) * 10; 222 223 if (!m_pSyntax->GetCharAt(7, ch)) 224 return false; 225 226 if (std::isdigit(ch)) 227 m_FileVersion += FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)); 228 return true; 229 } 230 231 CPDF_Parser::Error CPDF_Parser::StartParse( 232 const RetainPtr<IFX_SeekableReadStream>& pFileAccess, 233 CPDF_Document* pDocument) { 234 if (!InitSyntaxParser(pFileAccess)) 235 return FORMAT_ERROR; 236 return StartParseInternal(pDocument); 237 } 238 239 CPDF_Parser::Error CPDF_Parser::StartParseInternal(CPDF_Document* pDocument) { 240 ASSERT(!m_bHasParsed); 241 m_bHasParsed = true; 242 m_bXRefStream = false; 243 244 m_pDocument = pDocument; 245 246 bool bXRefRebuilt = false; 247 248 m_LastXRefOffset = ParseStartXRef(); 249 250 if (m_LastXRefOffset > 0) { 251 if (!LoadAllCrossRefV4(m_LastXRefOffset) && 252 !LoadAllCrossRefV5(m_LastXRefOffset)) { 253 if (!RebuildCrossRef()) 254 return FORMAT_ERROR; 255 256 bXRefRebuilt = true; 257 m_LastXRefOffset = 0; 258 } 259 } else { 260 if (!RebuildCrossRef()) 261 return FORMAT_ERROR; 262 263 bXRefRebuilt = true; 264 } 265 Error eRet = SetEncryptHandler(); 266 if (eRet != SUCCESS) 267 return eRet; 268 269 m_pDocument->LoadDoc(); 270 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { 271 if (bXRefRebuilt) 272 return FORMAT_ERROR; 273 274 ReleaseEncryptHandler(); 275 if (!RebuildCrossRef()) 276 return FORMAT_ERROR; 277 278 eRet = SetEncryptHandler(); 279 if (eRet != SUCCESS) 280 return eRet; 281 282 m_pDocument->LoadDoc(); 283 if (!m_pDocument->GetRoot()) 284 return FORMAT_ERROR; 285 } 286 if (GetRootObjNum() == 0) { 287 ReleaseEncryptHandler(); 288 if (!RebuildCrossRef() || GetRootObjNum() == 0) 289 return FORMAT_ERROR; 290 291 eRet = SetEncryptHandler(); 292 if (eRet != SUCCESS) 293 return eRet; 294 } 295 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) { 296 CPDF_Reference* pMetadata = 297 ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata")); 298 if (pMetadata) 299 m_MetadataObjnum = pMetadata->GetRefObjNum(); 300 } 301 return SUCCESS; 302 } 303 304 FX_FILESIZE CPDF_Parser::ParseStartXRef() { 305 static constexpr char kStartXRefKeyword[] = "startxref"; 306 m_pSyntax->SetPos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 307 strlen(kStartXRefKeyword)); 308 if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096)) 309 return 0; 310 311 // Skip "startxref" keyword. 312 m_pSyntax->GetKeyword(); 313 314 // Read XRef offset. 315 bool bNumber; 316 const ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber); 317 if (!bNumber || xrefpos_str.IsEmpty()) 318 return 0; 319 320 const FX_SAFE_FILESIZE result = FXSYS_atoi64(xrefpos_str.c_str()); 321 if (!result.IsValid() || result.ValueOrDie() >= GetFileAccess()->GetSize()) 322 return 0; 323 324 return result.ValueOrDie(); 325 } 326 327 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() { 328 ReleaseEncryptHandler(); 329 if (!GetTrailer()) 330 return FORMAT_ERROR; 331 332 CPDF_Object* pEncryptObj = GetTrailer()->GetObjectFor("Encrypt"); 333 if (pEncryptObj) { 334 if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) { 335 SetEncryptDictionary(pEncryptDict); 336 } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) { 337 pEncryptObj = m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum()); 338 if (pEncryptObj) 339 SetEncryptDictionary(pEncryptObj->GetDict()); 340 } 341 } 342 343 if (m_pEncryptDict) { 344 ByteString filter = m_pEncryptDict->GetStringFor("Filter"); 345 if (filter != "Standard") 346 return HANDLER_ERROR; 347 348 std::unique_ptr<CPDF_SecurityHandler> pSecurityHandler = 349 pdfium::MakeUnique<CPDF_SecurityHandler>(); 350 if (!pSecurityHandler->OnInit(m_pEncryptDict.Get(), GetIDArray(), 351 m_Password)) 352 return PASSWORD_ERROR; 353 354 m_pSecurityHandler = std::move(pSecurityHandler); 355 } 356 return SUCCESS; 357 } 358 359 void CPDF_Parser::ReleaseEncryptHandler() { 360 m_pSecurityHandler.reset(); 361 SetEncryptDictionary(nullptr); 362 } 363 364 FX_FILESIZE CPDF_Parser::GetObjectOffset(uint32_t objnum) const { 365 if (!IsValidObjectNumber(objnum)) 366 return 0; 367 368 if (GetObjectType(objnum) == ObjectType::kNotCompressed) 369 return GetObjectPositionOrZero(objnum); 370 371 if (GetObjectType(objnum) == ObjectType::kCompressed) { 372 FX_FILESIZE pos = GetObjectPositionOrZero(objnum); 373 return GetObjectPositionOrZero(pos); 374 } 375 return 0; 376 } 377 378 // Ideally, all the cross reference entries should be verified. 379 // In reality, we rarely see well-formed cross references don't match 380 // with the objects. crbug/602650 showed a case where object numbers 381 // in the cross reference table are all off by one. 382 bool CPDF_Parser::VerifyCrossRefV4() { 383 for (const auto& it : m_ObjectInfo) { 384 if (it.second.pos == 0) 385 continue; 386 // Find the first non-zero position. 387 FX_FILESIZE SavedPos = m_pSyntax->GetPos(); 388 m_pSyntax->SetPos(it.second.pos); 389 bool is_num = false; 390 ByteString num_str = m_pSyntax->GetNextWord(&is_num); 391 m_pSyntax->SetPos(SavedPos); 392 if (!is_num || num_str.IsEmpty() || 393 FXSYS_atoui(num_str.c_str()) != it.first) { 394 // If the object number read doesn't match the one stored, 395 // something is wrong with the cross reference table. 396 return false; 397 } 398 return true; 399 } 400 return true; 401 } 402 403 bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) { 404 if (!LoadCrossRefV4(xrefpos, true)) 405 return false; 406 407 std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4(); 408 if (!trailer) 409 return false; 410 411 m_TrailerData->SetMainTrailer(std::move(trailer)); 412 int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size"); 413 if (xrefsize > 0 && xrefsize <= kMaxXRefSize) 414 ShrinkObjectMap(xrefsize); 415 416 std::vector<FX_FILESIZE> CrossRefList; 417 std::vector<FX_FILESIZE> XRefStreamList; 418 std::set<FX_FILESIZE> seen_xrefpos; 419 420 CrossRefList.push_back(xrefpos); 421 XRefStreamList.push_back(GetDirectInteger(GetTrailer(), "XRefStm")); 422 seen_xrefpos.insert(xrefpos); 423 424 // When the trailer doesn't have Prev entry or Prev entry value is not 425 // numerical, GetDirectInteger() returns 0. Loading will end. 426 xrefpos = GetDirectInteger(GetTrailer(), "Prev"); 427 while (xrefpos) { 428 // Check for circular references. 429 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) 430 return false; 431 432 seen_xrefpos.insert(xrefpos); 433 434 // SLOW ... 435 CrossRefList.insert(CrossRefList.begin(), xrefpos); 436 LoadCrossRefV4(xrefpos, true); 437 438 std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4()); 439 if (!pDict) 440 return false; 441 442 xrefpos = GetDirectInteger(pDict.get(), "Prev"); 443 444 // SLOW ... 445 XRefStreamList.insert(XRefStreamList.begin(), 446 pDict->GetIntegerFor("XRefStm")); 447 m_TrailerData->AppendTrailer(std::move(pDict)); 448 } 449 450 for (size_t i = 0; i < CrossRefList.size(); ++i) { 451 if (!LoadCrossRefV4(CrossRefList[i], false)) 452 return false; 453 454 if (XRefStreamList[i] && !LoadCrossRefV5(&XRefStreamList[i], false)) 455 return false; 456 457 if (i == 0 && !VerifyCrossRefV4()) 458 return false; 459 } 460 return true; 461 } 462 463 bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos) { 464 if (!LoadCrossRefV4(xrefpos, false)) 465 return false; 466 467 std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4(); 468 if (!trailer) 469 return false; 470 471 m_TrailerData->SetMainTrailer(std::move(trailer)); 472 int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size"); 473 if (xrefsize == 0) 474 return false; 475 476 std::vector<FX_FILESIZE> CrossRefList; 477 std::vector<FX_FILESIZE> XRefStreamList; 478 std::set<FX_FILESIZE> seen_xrefpos; 479 480 CrossRefList.push_back(xrefpos); 481 XRefStreamList.push_back(GetDirectInteger(GetTrailer(), "XRefStm")); 482 seen_xrefpos.insert(xrefpos); 483 484 xrefpos = GetDirectInteger(GetTrailer(), "Prev"); 485 while (xrefpos) { 486 // Check for circular references. 487 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) 488 return false; 489 490 seen_xrefpos.insert(xrefpos); 491 492 // SLOW ... 493 CrossRefList.insert(CrossRefList.begin(), xrefpos); 494 LoadCrossRefV4(xrefpos, true); 495 496 std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4()); 497 if (!pDict) 498 return false; 499 500 xrefpos = GetDirectInteger(pDict.get(), "Prev"); 501 502 // SLOW ... 503 XRefStreamList.insert(XRefStreamList.begin(), 504 pDict->GetIntegerFor("XRefStm")); 505 m_TrailerData->AppendTrailer(std::move(pDict)); 506 } 507 508 for (size_t i = 1; i < CrossRefList.size(); ++i) { 509 if (!LoadCrossRefV4(CrossRefList[i], false)) 510 return false; 511 512 if (XRefStreamList[i] && !LoadCrossRefV5(&XRefStreamList[i], false)) 513 return false; 514 } 515 return true; 516 } 517 518 bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData( 519 uint32_t start_objnum, 520 uint32_t count, 521 std::vector<CrossRefObjData>* out_objects) { 522 // Each entry shall be exactly 20 byte. 523 // A sample entry looks like: 524 // "0000000000 00007 f\r\n" 525 static constexpr int32_t kEntryConstSize = 20; 526 527 if (!out_objects) { 528 FX_SAFE_FILESIZE pos = count; 529 pos *= kEntryConstSize; 530 pos += m_pSyntax->GetPos(); 531 if (!pos.IsValid()) 532 return false; 533 m_pSyntax->SetPos(pos.ValueOrDie()); 534 return true; 535 } 536 const size_t start_obj_index = out_objects->size(); 537 FX_SAFE_SIZE_T new_size = start_obj_index; 538 new_size += count; 539 if (!new_size.IsValid()) 540 return false; 541 542 if (new_size.ValueOrDie() > kMaxXRefSize) 543 return false; 544 545 const size_t max_entries_in_file = 546 m_pSyntax->GetFileAccess()->GetSize() / kEntryConstSize; 547 if (new_size.ValueOrDie() > max_entries_in_file) 548 return false; 549 550 out_objects->resize(new_size.ValueOrDie()); 551 552 std::vector<char> buf(1024 * kEntryConstSize + 1); 553 buf.back() = '\0'; 554 555 int32_t nBlocks = count / 1024 + 1; 556 for (int32_t block = 0; block < nBlocks; block++) { 557 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; 558 if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), 559 block_size * kEntryConstSize)) { 560 return false; 561 } 562 563 for (int32_t i = 0; i < block_size; i++) { 564 CrossRefObjData& obj_data = 565 (*out_objects)[start_obj_index + block * 1024 + i]; 566 567 const uint32_t objnum = start_objnum + block * 1024 + i; 568 569 obj_data.obj_num = objnum; 570 571 ObjectInfo& info = obj_data.info; 572 573 char* pEntry = &buf[i * kEntryConstSize]; 574 if (pEntry[17] == 'f') { 575 info.pos = 0; 576 info.type = ObjectType::kFree; 577 } else { 578 const FX_SAFE_FILESIZE offset = FXSYS_atoi64(pEntry); 579 if (!offset.IsValid()) 580 return false; 581 582 if (offset.ValueOrDie() == 0) { 583 for (int32_t c = 0; c < 10; c++) { 584 if (!std::isdigit(pEntry[c])) 585 return false; 586 } 587 } 588 589 info.pos = offset.ValueOrDie(); 590 591 // TODO(art-snake): The info.gennum is uint16_t, but version may be 592 // greated than max<uint16_t>. Needs solve this issue. 593 const int32_t version = FXSYS_atoi(pEntry + 11); 594 info.gennum = version; 595 info.type = ObjectType::kNotCompressed; 596 } 597 } 598 } 599 return true; 600 } 601 602 bool CPDF_Parser::ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects) { 603 if (out_objects) 604 out_objects->clear(); 605 606 if (m_pSyntax->GetKeyword() != "xref") 607 return false; 608 std::vector<CrossRefObjData> result_objects; 609 while (1) { 610 FX_FILESIZE SavedPos = m_pSyntax->GetPos(); 611 bool bIsNumber; 612 ByteString word = m_pSyntax->GetNextWord(&bIsNumber); 613 if (word.IsEmpty()) { 614 return false; 615 } 616 617 if (!bIsNumber) { 618 m_pSyntax->SetPos(SavedPos); 619 break; 620 } 621 622 uint32_t start_objnum = FXSYS_atoui(word.c_str()); 623 if (start_objnum >= kMaxObjectNumber) 624 return false; 625 626 uint32_t count = m_pSyntax->GetDirectNum(); 627 m_pSyntax->ToNextWord(); 628 SavedPos = m_pSyntax->GetPos(); 629 630 if (!ParseAndAppendCrossRefSubsectionData( 631 start_objnum, count, out_objects ? &result_objects : nullptr)) { 632 return false; 633 } 634 } 635 if (out_objects) 636 *out_objects = std::move(result_objects); 637 return true; 638 } 639 640 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, 641 bool bSkip) { 642 m_pSyntax->SetPos(pos); 643 std::vector<CrossRefObjData> objects; 644 if (!ParseCrossRefV4(bSkip ? nullptr : &objects)) 645 return false; 646 647 MergeCrossRefObjectsData(objects); 648 649 return true; 650 } 651 652 void CPDF_Parser::MergeCrossRefObjectsData( 653 const std::vector<CrossRefObjData>& objects) { 654 for (const auto& obj : objects) { 655 m_ObjectInfo[obj.obj_num] = obj.info; 656 } 657 } 658 659 bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) { 660 if (!LoadCrossRefV5(&xrefpos, true)) 661 return false; 662 663 std::set<FX_FILESIZE> seen_xrefpos; 664 while (xrefpos) { 665 seen_xrefpos.insert(xrefpos); 666 if (!LoadCrossRefV5(&xrefpos, false)) 667 return false; 668 669 // Check for circular references. 670 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) 671 return false; 672 } 673 m_ObjectStreamMap.clear(); 674 m_bXRefStream = true; 675 return true; 676 } 677 678 bool CPDF_Parser::RebuildCrossRef() { 679 m_ObjectInfo.clear(); 680 m_TrailerData->Clear(); 681 682 ParserState state = ParserState::kDefault; 683 int32_t inside_index = 0; 684 uint32_t objnum = 0; 685 uint32_t gennum = 0; 686 int32_t depth = 0; 687 const uint32_t kBufferSize = 4096; 688 std::vector<uint8_t> buffer(kBufferSize); 689 690 FX_FILESIZE pos = m_pSyntax->m_HeaderOffset; 691 FX_FILESIZE start_pos = 0; 692 FX_FILESIZE start_pos1 = 0; 693 FX_FILESIZE last_obj = -1; 694 FX_FILESIZE last_xref = -1; 695 FX_FILESIZE last_trailer = -1; 696 697 while (pos < m_pSyntax->m_FileLen) { 698 const FX_FILESIZE saved_pos = pos; 699 bool bOverFlow = false; 700 uint32_t size = 701 std::min((uint32_t)(m_pSyntax->m_FileLen - pos), kBufferSize); 702 if (!m_pSyntax->GetFileAccess()->ReadBlock(buffer.data(), pos, size)) 703 break; 704 705 for (uint32_t i = 0; i < size; i++) { 706 uint8_t byte = buffer[i]; 707 switch (state) { 708 case ParserState::kDefault: 709 if (PDFCharIsWhitespace(byte)) { 710 state = ParserState::kWhitespace; 711 } else if (std::isdigit(byte)) { 712 --i; 713 state = ParserState::kWhitespace; 714 } else if (byte == '%') { 715 inside_index = 0; 716 state = ParserState::kComment; 717 } else if (byte == '(') { 718 state = ParserState::kString; 719 depth = 1; 720 } else if (byte == '<') { 721 inside_index = 1; 722 state = ParserState::kHexString; 723 } else if (byte == '\\') { 724 state = ParserState::kEscapedString; 725 } else if (byte == 't') { 726 state = ParserState::kTrailer; 727 inside_index = 1; 728 } 729 break; 730 731 case ParserState::kWhitespace: 732 if (std::isdigit(byte)) { 733 start_pos = pos + i; 734 state = ParserState::kObjNum; 735 objnum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte)); 736 } else if (byte == 't') { 737 state = ParserState::kTrailer; 738 inside_index = 1; 739 } else if (byte == 'x') { 740 state = ParserState::kXref; 741 inside_index = 1; 742 } else if (!PDFCharIsWhitespace(byte)) { 743 --i; 744 state = ParserState::kDefault; 745 } 746 break; 747 748 case ParserState::kObjNum: 749 if (std::isdigit(byte)) { 750 objnum = objnum * 10 + 751 FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte)); 752 } else if (PDFCharIsWhitespace(byte)) { 753 state = ParserState::kPostObjNum; 754 } else { 755 --i; 756 state = ParserState::kEndObj; 757 inside_index = 0; 758 } 759 break; 760 761 case ParserState::kPostObjNum: 762 if (std::isdigit(byte)) { 763 start_pos1 = pos + i; 764 state = ParserState::kGenNum; 765 gennum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte)); 766 } else if (byte == 't') { 767 state = ParserState::kTrailer; 768 inside_index = 1; 769 } else if (!PDFCharIsWhitespace(byte)) { 770 --i; 771 state = ParserState::kDefault; 772 } 773 break; 774 775 case ParserState::kGenNum: 776 if (std::isdigit(byte)) { 777 gennum = gennum * 10 + 778 FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte)); 779 } else if (PDFCharIsWhitespace(byte)) { 780 state = ParserState::kPostGenNum; 781 } else { 782 --i; 783 state = ParserState::kDefault; 784 } 785 break; 786 787 case ParserState::kPostGenNum: 788 if (byte == 'o') { 789 state = ParserState::kBeginObj; 790 inside_index = 1; 791 } else if (std::isdigit(byte)) { 792 objnum = gennum; 793 gennum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte)); 794 start_pos = start_pos1; 795 start_pos1 = pos + i; 796 state = ParserState::kGenNum; 797 } else if (byte == 't') { 798 state = ParserState::kTrailer; 799 inside_index = 1; 800 } else if (!PDFCharIsWhitespace(byte)) { 801 --i; 802 state = ParserState::kDefault; 803 } 804 break; 805 806 case ParserState::kBeginObj: 807 switch (inside_index) { 808 case 1: 809 if (byte != 'b') { 810 --i; 811 state = ParserState::kDefault; 812 } else { 813 inside_index++; 814 } 815 break; 816 case 2: 817 if (byte != 'j') { 818 --i; 819 state = ParserState::kDefault; 820 } else { 821 inside_index++; 822 } 823 break; 824 case 3: 825 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { 826 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset; 827 last_obj = start_pos; 828 FX_FILESIZE obj_end = 0; 829 std::unique_ptr<CPDF_Object> pObject = 830 ParseIndirectObjectAtByStrict(m_pDocument.Get(), obj_pos, 831 objnum, &obj_end); 832 if (CPDF_Stream* pStream = ToStream(pObject.get())) { 833 if (CPDF_Dictionary* pDict = pStream->GetDict()) { 834 if ((pDict->KeyExist("Type")) && 835 (pDict->GetStringFor("Type") == "XRef" && 836 pDict->KeyExist("Size"))) { 837 CPDF_Object* pRoot = pDict->GetObjectFor("Root"); 838 if (pRoot && pRoot->GetDict() && 839 pRoot->GetDict()->GetObjectFor("Pages")) { 840 m_TrailerData->SetMainTrailer( 841 ToDictionary(pDict->Clone())); 842 } 843 } 844 } 845 } 846 847 FX_FILESIZE offset = 0; 848 m_pSyntax->SetPos(obj_pos); 849 offset = m_pSyntax->FindTag("obj", 0); 850 if (offset == -1) 851 offset = 0; 852 else 853 offset += 3; 854 855 FX_FILESIZE nLen = obj_end - obj_pos - offset; 856 if ((uint32_t)nLen > size - i) { 857 pos = obj_end + m_pSyntax->m_HeaderOffset; 858 bOverFlow = true; 859 } else { 860 i += (uint32_t)nLen; 861 } 862 863 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) && 864 m_ObjectInfo[objnum].pos) { 865 if (pObject) { 866 m_ObjectInfo[objnum].pos = obj_pos; 867 m_ObjectInfo[objnum].gennum = gennum; 868 } 869 } else { 870 m_ObjectInfo[objnum].pos = obj_pos; 871 m_ObjectInfo[objnum].type = ObjectType::kNotCompressed; 872 m_ObjectInfo[objnum].gennum = gennum; 873 } 874 } 875 --i; 876 state = ParserState::kDefault; 877 break; 878 } 879 break; 880 881 case ParserState::kTrailer: 882 if (inside_index == 7) { 883 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { 884 last_trailer = pos + i - 7; 885 m_pSyntax->SetPos(pos + i - m_pSyntax->m_HeaderOffset); 886 887 std::unique_ptr<CPDF_Object> pObj = 888 m_pSyntax->GetObjectBody(m_pDocument.Get()); 889 if (pObj) { 890 if (pObj->IsDictionary() || pObj->AsStream()) { 891 CPDF_Stream* pStream = pObj->AsStream(); 892 if (CPDF_Dictionary* pTrailer = 893 pStream ? pStream->GetDict() : pObj->AsDictionary()) { 894 if (GetTrailer()) { 895 CPDF_Object* pRoot = pTrailer->GetObjectFor("Root"); 896 CPDF_Reference* pRef = ToReference(pRoot); 897 if (!pRoot || 898 (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) && 899 m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) { 900 auto it = pTrailer->begin(); 901 while (it != pTrailer->end()) { 902 const ByteString& key = it->first; 903 CPDF_Object* pElement = it->second.get(); 904 ++it; 905 uint32_t dwObjNum = 906 pElement ? pElement->GetObjNum() : 0; 907 if (dwObjNum) { 908 GetTrailer()->SetNewFor<CPDF_Reference>( 909 key, m_pDocument.Get(), dwObjNum); 910 } else { 911 GetTrailer()->SetFor(key, pElement->Clone()); 912 } 913 } 914 } 915 } else { 916 m_TrailerData->SetMainTrailer( 917 ToDictionary(pObj->IsStream() ? pTrailer->Clone() 918 : std::move(pObj))); 919 920 FX_FILESIZE dwSavePos = m_pSyntax->GetPos(); 921 ByteString strWord = m_pSyntax->GetKeyword(); 922 if (!strWord.Compare("startxref")) { 923 bool bNumber; 924 ByteString bsOffset = m_pSyntax->GetNextWord(&bNumber); 925 if (bNumber) 926 m_LastXRefOffset = FXSYS_atoi(bsOffset.c_str()); 927 } 928 m_pSyntax->SetPos(dwSavePos); 929 } 930 } 931 } 932 } 933 } 934 --i; 935 state = ParserState::kDefault; 936 } else if (byte == "trailer"[inside_index]) { 937 inside_index++; 938 } else { 939 --i; 940 state = ParserState::kDefault; 941 } 942 break; 943 944 case ParserState::kXref: 945 if (inside_index == 4) { 946 last_xref = pos + i - 4; 947 state = ParserState::kWhitespace; 948 } else if (byte == "xref"[inside_index]) { 949 inside_index++; 950 } else { 951 --i; 952 state = ParserState::kDefault; 953 } 954 break; 955 956 case ParserState::kComment: 957 if (PDFCharIsLineEnding(byte)) 958 state = ParserState::kDefault; 959 break; 960 961 case ParserState::kString: 962 if (byte == ')') { 963 if (depth > 0) 964 depth--; 965 } else if (byte == '(') { 966 depth++; 967 } 968 969 if (!depth) 970 state = ParserState::kDefault; 971 break; 972 973 case ParserState::kHexString: 974 if (byte == '>' || (byte == '<' && inside_index == 1)) 975 state = ParserState::kDefault; 976 inside_index = 0; 977 break; 978 979 case ParserState::kEscapedString: 980 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { 981 --i; 982 state = ParserState::kDefault; 983 } 984 break; 985 986 case ParserState::kEndObj: 987 if (PDFCharIsWhitespace(byte)) { 988 state = ParserState::kDefault; 989 } else if (byte == '%' || byte == '(' || byte == '<' || 990 byte == '\\') { 991 state = ParserState::kDefault; 992 --i; 993 } else if (inside_index == 6) { 994 state = ParserState::kDefault; 995 --i; 996 } else if (byte == "endobj"[inside_index]) { 997 inside_index++; 998 } 999 break; 1000 } 1001 1002 if (bOverFlow) { 1003 size = 0; 1004 break; 1005 } 1006 } 1007 pos += size; 1008 1009 // If the position has not changed at all or went backwards in a loop 1010 // iteration, then break out to prevent infinite looping. 1011 if (pos <= saved_pos) 1012 break; 1013 } 1014 1015 if (last_xref != -1 && last_xref > last_obj) 1016 last_trailer = last_xref; 1017 else if (last_trailer == -1 || last_xref < last_obj) 1018 last_trailer = m_pSyntax->m_FileLen; 1019 1020 return GetTrailer() && !m_ObjectInfo.empty(); 1021 } 1022 1023 bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) { 1024 std::unique_ptr<CPDF_Object> pObject( 1025 ParseIndirectObjectAt(m_pDocument.Get(), *pos, 0)); 1026 if (!pObject) 1027 return false; 1028 1029 uint32_t objnum = pObject->GetObjNum(); 1030 if (!objnum) 1031 return false; 1032 1033 CPDF_Object* pUnownedObject = pObject.get(); 1034 if (m_pDocument) { 1035 const CPDF_Dictionary* pRootDict = m_pDocument->GetRoot(); 1036 if (pRootDict && pRootDict->GetObjNum() == objnum) 1037 return false; 1038 if (!m_pDocument->ReplaceIndirectObjectIfHigherGeneration( 1039 objnum, std::move(pObject))) { 1040 return false; 1041 } 1042 } 1043 1044 CPDF_Stream* pStream = pUnownedObject->AsStream(); 1045 if (!pStream) 1046 return false; 1047 1048 CPDF_Dictionary* pDict = pStream->GetDict(); 1049 *pos = pDict->GetIntegerFor("Prev"); 1050 int32_t size = pDict->GetIntegerFor("Size"); 1051 if (size < 0) 1052 return false; 1053 1054 std::unique_ptr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone()); 1055 if (bMainXRef) { 1056 m_TrailerData->SetMainTrailer(std::move(pNewTrailer)); 1057 ShrinkObjectMap(size); 1058 for (auto& it : m_ObjectInfo) 1059 it.second.type = ObjectType::kFree; 1060 } else { 1061 m_TrailerData->AppendTrailer(std::move(pNewTrailer)); 1062 } 1063 1064 std::vector<std::pair<int32_t, int32_t>> arrIndex; 1065 CPDF_Array* pArray = pDict->GetArrayFor("Index"); 1066 if (pArray) { 1067 for (size_t i = 0; i < pArray->GetCount() / 2; i++) { 1068 CPDF_Object* pStartNumObj = pArray->GetObjectAt(i * 2); 1069 CPDF_Object* pCountObj = pArray->GetObjectAt(i * 2 + 1); 1070 1071 if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) { 1072 int nStartNum = pStartNumObj->GetInteger(); 1073 int nCount = pCountObj->GetInteger(); 1074 if (nStartNum >= 0 && nCount > 0) 1075 arrIndex.push_back(std::make_pair(nStartNum, nCount)); 1076 } 1077 } 1078 } 1079 1080 if (arrIndex.size() == 0) 1081 arrIndex.push_back(std::make_pair(0, size)); 1082 1083 pArray = pDict->GetArrayFor("W"); 1084 if (!pArray) 1085 return false; 1086 1087 std::vector<uint32_t> WidthArray; 1088 FX_SAFE_UINT32 dwAccWidth = 0; 1089 for (size_t i = 0; i < pArray->GetCount(); ++i) { 1090 WidthArray.push_back(pArray->GetIntegerAt(i)); 1091 dwAccWidth += WidthArray[i]; 1092 } 1093 1094 if (!dwAccWidth.IsValid() || WidthArray.size() < 3) 1095 return false; 1096 1097 uint32_t totalWidth = dwAccWidth.ValueOrDie(); 1098 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream); 1099 pAcc->LoadAllDataFiltered(); 1100 1101 const uint8_t* pData = pAcc->GetData(); 1102 uint32_t dwTotalSize = pAcc->GetSize(); 1103 uint32_t segindex = 0; 1104 for (uint32_t i = 0; i < arrIndex.size(); i++) { 1105 int32_t startnum = arrIndex[i].first; 1106 if (startnum < 0) 1107 continue; 1108 1109 uint32_t count = pdfium::base::checked_cast<uint32_t>(arrIndex[i].second); 1110 FX_SAFE_UINT32 dwCaculatedSize = segindex; 1111 dwCaculatedSize += count; 1112 dwCaculatedSize *= totalWidth; 1113 if (!dwCaculatedSize.IsValid() || 1114 dwCaculatedSize.ValueOrDie() > dwTotalSize) { 1115 continue; 1116 } 1117 1118 const uint8_t* segstart = pData + segindex * totalWidth; 1119 FX_SAFE_UINT32 dwMaxObjNum = startnum; 1120 dwMaxObjNum += count; 1121 uint32_t dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1; 1122 if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) 1123 continue; 1124 1125 for (uint32_t j = 0; j < count; j++) { 1126 ObjectType type = ObjectType::kNotCompressed; 1127 const uint8_t* entrystart = segstart + j * totalWidth; 1128 if (WidthArray[0]) { 1129 const int cross_ref_stream_obj_type = 1130 GetVarInt(entrystart, WidthArray[0]); 1131 type = GetObjectTypeFromCrossRefStreamType(cross_ref_stream_obj_type); 1132 } 1133 1134 if (GetObjectType(startnum + j) == ObjectType::kNull) { 1135 FX_FILESIZE offset = 1136 GetVarInt(entrystart + WidthArray[0], WidthArray[1]); 1137 m_ObjectInfo[startnum + j].pos = offset; 1138 continue; 1139 } 1140 1141 if (GetObjectType(startnum + j) != ObjectType::kFree) 1142 continue; 1143 1144 ObjectInfo& info = m_ObjectInfo[startnum + j]; 1145 1146 info.type = type; 1147 if (type == ObjectType::kFree) { 1148 info.pos = 0; 1149 } else { 1150 const FX_FILESIZE entry_value = 1151 GetVarInt(entrystart + WidthArray[0], WidthArray[1]); 1152 if (type == ObjectType::kNotCompressed) { 1153 const auto object_offset = entry_value; 1154 info.pos = object_offset; 1155 } else { 1156 const auto archive_obj_num = entry_value; 1157 info.archive_obj_num = archive_obj_num; 1158 if (archive_obj_num < 0 || !IsValidObjectNumber(archive_obj_num)) 1159 return false; 1160 m_ObjectInfo[archive_obj_num].type = ObjectType::kNull; 1161 } 1162 } 1163 } 1164 segindex += count; 1165 } 1166 return true; 1167 } 1168 1169 const CPDF_Array* CPDF_Parser::GetIDArray() const { 1170 return GetTrailer() ? GetTrailer()->GetArrayFor("ID") : nullptr; 1171 } 1172 1173 CPDF_Dictionary* CPDF_Parser::GetTrailer() const { 1174 return m_TrailerData->GetMainTrailer(); 1175 } 1176 1177 std::unique_ptr<CPDF_Dictionary> CPDF_Parser::GetCombinedTrailer() const { 1178 return m_TrailerData->GetCombinedTrailer(); 1179 } 1180 1181 uint32_t CPDF_Parser::GetInfoObjNum() { 1182 return m_TrailerData->GetInfoObjNum(); 1183 } 1184 1185 uint32_t CPDF_Parser::GetRootObjNum() { 1186 return m_TrailerData->GetRootObjNum(); 1187 } 1188 1189 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObject( 1190 CPDF_IndirectObjectHolder* pObjList, 1191 uint32_t objnum) { 1192 if (!IsValidObjectNumber(objnum)) 1193 return nullptr; 1194 1195 // Prevent circular parsing the same object. 1196 if (pdfium::ContainsKey(m_ParsingObjNums, objnum)) 1197 return nullptr; 1198 1199 pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum); 1200 if (GetObjectType(objnum) == ObjectType::kNotCompressed || 1201 GetObjectType(objnum) == ObjectType::kNull) { 1202 FX_FILESIZE pos = m_ObjectInfo[objnum].pos; 1203 if (pos <= 0) 1204 return nullptr; 1205 return ParseIndirectObjectAt(pObjList, pos, objnum); 1206 } 1207 if (GetObjectType(objnum) != ObjectType::kCompressed) 1208 return nullptr; 1209 1210 RetainPtr<CPDF_StreamAcc> pObjStream = 1211 GetObjectStream(m_ObjectInfo[objnum].pos); 1212 if (!pObjStream) 1213 return nullptr; 1214 1215 auto file = pdfium::MakeRetain<CFX_MemoryStream>( 1216 const_cast<uint8_t*>(pObjStream->GetData()), 1217 static_cast<size_t>(pObjStream->GetSize()), false); 1218 CPDF_SyntaxParser syntax; 1219 syntax.InitParser(file, 0); 1220 const int32_t offset = GetStreamFirst(pObjStream); 1221 1222 // Read object numbers from |pObjStream| into a cache. 1223 if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) { 1224 for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) { 1225 uint32_t thisnum = syntax.GetDirectNum(); 1226 uint32_t thisoff = syntax.GetDirectNum(); 1227 m_ObjCache[pObjStream][thisnum] = thisoff; 1228 } 1229 } 1230 1231 const auto it = m_ObjCache[pObjStream].find(objnum); 1232 if (it == m_ObjCache[pObjStream].end()) 1233 return nullptr; 1234 1235 syntax.SetPos(offset + it->second); 1236 return syntax.GetObjectBody(pObjList); 1237 } 1238 1239 RetainPtr<CPDF_StreamAcc> CPDF_Parser::GetObjectStream(uint32_t objnum) { 1240 auto it = m_ObjectStreamMap.find(objnum); 1241 if (it != m_ObjectStreamMap.end()) 1242 return it->second; 1243 1244 if (!m_pDocument) 1245 return nullptr; 1246 1247 const CPDF_Stream* pStream = 1248 ToStream(m_pDocument->GetOrParseIndirectObject(objnum)); 1249 if (!pStream) 1250 return nullptr; 1251 1252 auto pStreamAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream); 1253 pStreamAcc->LoadAllDataFiltered(); 1254 m_ObjectStreamMap[objnum] = pStreamAcc; 1255 return pStreamAcc; 1256 } 1257 1258 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt( 1259 CPDF_IndirectObjectHolder* pObjList, 1260 FX_FILESIZE pos, 1261 uint32_t objnum) { 1262 return ParseIndirectObjectAtInternal( 1263 pObjList, pos, objnum, CPDF_SyntaxParser::ParseType::kLoose, nullptr); 1264 } 1265 1266 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAtInternal( 1267 CPDF_IndirectObjectHolder* pObjList, 1268 FX_FILESIZE pos, 1269 uint32_t objnum, 1270 CPDF_SyntaxParser::ParseType parse_type, 1271 FX_FILESIZE* pResultPos) { 1272 const FX_FILESIZE saved_pos = m_pSyntax->GetPos(); 1273 m_pSyntax->SetPos(pos); 1274 auto result = m_pSyntax->GetIndirectObject(pObjList, parse_type); 1275 1276 if (pResultPos) 1277 *pResultPos = m_pSyntax->GetPos(); 1278 m_pSyntax->SetPos(saved_pos); 1279 1280 if (result && objnum && result->GetObjNum() != objnum) 1281 return nullptr; 1282 1283 const bool should_decrypt = m_pSecurityHandler && 1284 m_pSecurityHandler->GetCryptoHandler() && 1285 objnum != m_MetadataObjnum; 1286 if (should_decrypt) 1287 result = m_pSecurityHandler->GetCryptoHandler()->DecryptObjectTree( 1288 std::move(result)); 1289 1290 return result; 1291 } 1292 1293 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAtByStrict( 1294 CPDF_IndirectObjectHolder* pObjList, 1295 FX_FILESIZE pos, 1296 uint32_t objnum, 1297 FX_FILESIZE* pResultPos) { 1298 return ParseIndirectObjectAtInternal( 1299 pObjList, pos, objnum, CPDF_SyntaxParser::ParseType::kStrict, pResultPos); 1300 } 1301 1302 uint32_t CPDF_Parser::GetFirstPageNo() const { 1303 return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0; 1304 } 1305 1306 std::unique_ptr<CPDF_Dictionary> CPDF_Parser::LoadTrailerV4() { 1307 if (m_pSyntax->GetKeyword() != "trailer") 1308 return nullptr; 1309 1310 return ToDictionary(m_pSyntax->GetObjectBody(m_pDocument.Get())); 1311 } 1312 1313 uint32_t CPDF_Parser::GetPermissions() const { 1314 if (!m_pSecurityHandler) 1315 return 0xFFFFFFFF; 1316 1317 uint32_t dwPermission = m_pSecurityHandler->GetPermissions(); 1318 if (m_pEncryptDict && m_pEncryptDict->GetStringFor("Filter") == "Standard") { 1319 // See PDF Reference 1.7, page 123, table 3.20. 1320 dwPermission &= 0xFFFFFFFC; 1321 dwPermission |= 0xFFFFF0C0; 1322 } 1323 return dwPermission; 1324 } 1325 1326 std::unique_ptr<CPDF_LinearizedHeader> CPDF_Parser::ParseLinearizedHeader() { 1327 return CPDF_LinearizedHeader::Parse(m_pSyntax.get()); 1328 } 1329 1330 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( 1331 const RetainPtr<IFX_SeekableReadStream>& pFileAccess, 1332 CPDF_Document* pDocument) { 1333 ASSERT(!m_bHasParsed); 1334 m_bXRefStream = false; 1335 m_LastXRefOffset = 0; 1336 1337 if (!InitSyntaxParser(pFileAccess)) 1338 return FORMAT_ERROR; 1339 1340 m_pLinearized = ParseLinearizedHeader(); 1341 if (!m_pLinearized) 1342 return StartParseInternal(std::move(pDocument)); 1343 1344 m_bHasParsed = true; 1345 m_pDocument = pDocument; 1346 1347 m_LastXRefOffset = m_pLinearized->GetLastXRefOffset(); 1348 FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset; 1349 bool bXRefRebuilt = false; 1350 bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false); 1351 if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) { 1352 if (!RebuildCrossRef()) 1353 return FORMAT_ERROR; 1354 1355 bXRefRebuilt = true; 1356 m_LastXRefOffset = 0; 1357 } 1358 if (bLoadV4) { 1359 std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4(); 1360 if (!trailer) 1361 return SUCCESS; 1362 1363 m_TrailerData->SetMainTrailer(std::move(trailer)); 1364 int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size"); 1365 if (xrefsize > 0) 1366 ShrinkObjectMap(xrefsize); 1367 } 1368 1369 Error eRet = SetEncryptHandler(); 1370 if (eRet != SUCCESS) 1371 return eRet; 1372 1373 m_pDocument->LoadLinearizedDoc(m_pLinearized.get()); 1374 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { 1375 if (bXRefRebuilt) 1376 return FORMAT_ERROR; 1377 1378 ReleaseEncryptHandler(); 1379 if (!RebuildCrossRef()) 1380 return FORMAT_ERROR; 1381 1382 eRet = SetEncryptHandler(); 1383 if (eRet != SUCCESS) 1384 return eRet; 1385 1386 m_pDocument->LoadLinearizedDoc(m_pLinearized.get()); 1387 if (!m_pDocument->GetRoot()) 1388 return FORMAT_ERROR; 1389 } 1390 1391 if (GetRootObjNum() == 0) { 1392 ReleaseEncryptHandler(); 1393 if (!RebuildCrossRef() || GetRootObjNum() == 0) 1394 return FORMAT_ERROR; 1395 1396 eRet = SetEncryptHandler(); 1397 if (eRet != SUCCESS) 1398 return eRet; 1399 } 1400 1401 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) { 1402 if (CPDF_Reference* pMetadata = 1403 ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata"))) 1404 m_MetadataObjnum = pMetadata->GetRefObjNum(); 1405 } 1406 return SUCCESS; 1407 } 1408 1409 bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) { 1410 if (!LoadCrossRefV5(&xrefpos, false)) 1411 return false; 1412 1413 std::set<FX_FILESIZE> seen_xrefpos; 1414 while (xrefpos) { 1415 seen_xrefpos.insert(xrefpos); 1416 if (!LoadCrossRefV5(&xrefpos, false)) 1417 return false; 1418 1419 // Check for circular references. 1420 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) 1421 return false; 1422 } 1423 m_ObjectStreamMap.clear(); 1424 m_bXRefStream = true; 1425 return true; 1426 } 1427 1428 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() { 1429 const FX_SAFE_FILESIZE main_xref_offset = GetTrailer()->GetIntegerFor("Prev"); 1430 if (!main_xref_offset.IsValid()) 1431 return FORMAT_ERROR; 1432 1433 if (main_xref_offset.ValueOrDie() == 0) 1434 return SUCCESS; 1435 1436 const AutoRestorer<uint32_t> save_metadata_objnum(&m_MetadataObjnum); 1437 m_MetadataObjnum = 0; 1438 m_ObjectStreamMap.clear(); 1439 m_ObjCache.clear(); 1440 1441 if (!LoadLinearizedAllCrossRefV4(main_xref_offset.ValueOrDie()) && 1442 !LoadLinearizedAllCrossRefV5(main_xref_offset.ValueOrDie())) { 1443 m_LastXRefOffset = 0; 1444 return FORMAT_ERROR; 1445 } 1446 1447 return SUCCESS; 1448 } 1449 1450 CPDF_Parser::ObjectType CPDF_Parser::GetObjectTypeFromCrossRefStreamType( 1451 int cross_ref_stream_type) const { 1452 switch (cross_ref_stream_type) { 1453 case 0: 1454 return CPDF_Parser::ObjectType::kFree; 1455 case 1: 1456 return CPDF_Parser::ObjectType::kNotCompressed; 1457 case 2: 1458 return CPDF_Parser::ObjectType::kCompressed; 1459 default: 1460 return CPDF_Parser::ObjectType::kNull; 1461 } 1462 } 1463