1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfapi/parser/cpdf_data_avail.h" 8 9 #include <algorithm> 10 #include <memory> 11 #include <utility> 12 13 #include "core/fpdfapi/cpdf_modulemgr.h" 14 #include "core/fpdfapi/parser/cpdf_array.h" 15 #include "core/fpdfapi/parser/cpdf_dictionary.h" 16 #include "core/fpdfapi/parser/cpdf_document.h" 17 #include "core/fpdfapi/parser/cpdf_hint_tables.h" 18 #include "core/fpdfapi/parser/cpdf_linearized_header.h" 19 #include "core/fpdfapi/parser/cpdf_name.h" 20 #include "core/fpdfapi/parser/cpdf_number.h" 21 #include "core/fpdfapi/parser/cpdf_reference.h" 22 #include "core/fpdfapi/parser/cpdf_stream.h" 23 #include "core/fpdfapi/parser/fpdf_parser_utility.h" 24 #include "core/fxcrt/fx_ext.h" 25 #include "core/fxcrt/fx_safe_types.h" 26 #include "third_party/base/numerics/safe_conversions.h" 27 #include "third_party/base/ptr_util.h" 28 #include "third_party/base/stl_util.h" 29 30 CPDF_DataAvail::FileAvail::~FileAvail() {} 31 32 CPDF_DataAvail::DownloadHints::~DownloadHints() {} 33 34 // static 35 int CPDF_DataAvail::s_CurrentDataAvailRecursionDepth = 0; 36 37 CPDF_DataAvail::CPDF_DataAvail( 38 FileAvail* pFileAvail, 39 const CFX_RetainPtr<IFX_SeekableReadStream>& pFileRead, 40 bool bSupportHintTable) 41 : m_pFileAvail(pFileAvail), m_pFileRead(pFileRead) { 42 m_Pos = 0; 43 m_dwFileLen = 0; 44 if (m_pFileRead) { 45 m_dwFileLen = (uint32_t)m_pFileRead->GetSize(); 46 } 47 m_dwCurrentOffset = 0; 48 m_dwXRefOffset = 0; 49 m_dwTrailerOffset = 0; 50 m_bufferOffset = 0; 51 m_bufferSize = 0; 52 m_PagesObjNum = 0; 53 m_dwCurrentXRefSteam = 0; 54 m_dwAcroFormObjNum = 0; 55 m_dwInfoObjNum = 0; 56 m_pDocument = 0; 57 m_dwEncryptObjNum = 0; 58 m_dwPrevXRefOffset = 0; 59 m_dwLastXRefOffset = 0; 60 m_bDocAvail = false; 61 m_bMainXRefLoadTried = false; 62 m_bDocAvail = false; 63 m_bPagesLoad = false; 64 m_bPagesTreeLoad = false; 65 m_bMainXRefLoadedOK = false; 66 m_bAnnotsLoad = false; 67 m_bHaveAcroForm = false; 68 m_bAcroFormLoad = false; 69 m_bPageLoadedOK = false; 70 m_bNeedDownLoadResource = false; 71 m_bLinearizedFormParamLoad = false; 72 m_pTrailer = nullptr; 73 m_pCurrentParser = nullptr; 74 m_pAcroForm = nullptr; 75 m_pPageDict = nullptr; 76 m_pPageResource = nullptr; 77 m_docStatus = PDF_DATAAVAIL_HEADER; 78 m_bTotalLoadPageTree = false; 79 m_bCurPageDictLoadOK = false; 80 m_bLinearedDataOK = false; 81 m_bSupportHintTable = bSupportHintTable; 82 } 83 84 CPDF_DataAvail::~CPDF_DataAvail() { 85 m_pHintTables.reset(); 86 for (CPDF_Object* pObject : m_arrayAcroforms) 87 delete pObject; 88 } 89 90 void CPDF_DataAvail::SetDocument(CPDF_Document* pDoc) { 91 m_pDocument = pDoc; 92 } 93 94 uint32_t CPDF_DataAvail::GetObjectSize(uint32_t objnum, FX_FILESIZE& offset) { 95 CPDF_Parser* pParser = m_pDocument->GetParser(); 96 if (!pParser || !pParser->IsValidObjectNumber(objnum)) 97 return 0; 98 99 if (pParser->GetObjectType(objnum) == 2) 100 objnum = pParser->GetObjectPositionOrZero(objnum); 101 102 if (pParser->GetObjectType(objnum) != 1 && 103 pParser->GetObjectType(objnum) != 255) { 104 return 0; 105 } 106 107 offset = pParser->GetObjectPositionOrZero(objnum); 108 if (offset == 0) 109 return 0; 110 111 auto it = pParser->m_SortedOffset.find(offset); 112 if (it == pParser->m_SortedOffset.end() || 113 ++it == pParser->m_SortedOffset.end()) { 114 return 0; 115 } 116 return *it - offset; 117 } 118 119 bool CPDF_DataAvail::AreObjectsAvailable(std::vector<CPDF_Object*>& obj_array, 120 bool bParsePage, 121 DownloadHints* pHints, 122 std::vector<CPDF_Object*>& ret_array) { 123 if (obj_array.empty()) 124 return true; 125 126 uint32_t count = 0; 127 std::vector<CPDF_Object*> new_obj_array; 128 for (CPDF_Object* pObj : obj_array) { 129 if (!pObj) 130 continue; 131 132 int32_t type = pObj->GetType(); 133 switch (type) { 134 case CPDF_Object::ARRAY: { 135 CPDF_Array* pArray = pObj->AsArray(); 136 for (size_t k = 0; k < pArray->GetCount(); ++k) 137 new_obj_array.push_back(pArray->GetObjectAt(k)); 138 } break; 139 case CPDF_Object::STREAM: 140 pObj = pObj->GetDict(); 141 case CPDF_Object::DICTIONARY: { 142 CPDF_Dictionary* pDict = pObj->GetDict(); 143 if (pDict && pDict->GetStringFor("Type") == "Page" && !bParsePage) 144 continue; 145 146 for (const auto& it : *pDict) { 147 if (it.first != "Parent") 148 new_obj_array.push_back(it.second.get()); 149 } 150 } break; 151 case CPDF_Object::REFERENCE: { 152 CPDF_Reference* pRef = pObj->AsReference(); 153 uint32_t dwNum = pRef->GetRefObjNum(); 154 155 FX_FILESIZE offset; 156 uint32_t size = GetObjectSize(dwNum, offset); 157 if (size == 0 || offset < 0 || offset >= m_dwFileLen) 158 break; 159 160 if (!IsDataAvail(offset, size, pHints)) { 161 ret_array.push_back(pObj); 162 count++; 163 } else if (!pdfium::ContainsKey(m_ObjectSet, dwNum)) { 164 m_ObjectSet.insert(dwNum); 165 CPDF_Object* pReferred = 166 m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum()); 167 if (pReferred) 168 new_obj_array.push_back(pReferred); 169 } 170 } break; 171 } 172 } 173 174 if (count > 0) { 175 for (CPDF_Object* pObj : new_obj_array) { 176 CPDF_Reference* pRef = pObj->AsReference(); 177 if (pRef && pdfium::ContainsKey(m_ObjectSet, pRef->GetRefObjNum())) 178 continue; 179 ret_array.push_back(pObj); 180 } 181 return false; 182 } 183 184 obj_array = new_obj_array; 185 return AreObjectsAvailable(obj_array, false, pHints, ret_array); 186 } 187 188 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail( 189 DownloadHints* pHints) { 190 if (!m_dwFileLen && m_pFileRead) { 191 m_dwFileLen = (uint32_t)m_pFileRead->GetSize(); 192 if (!m_dwFileLen) 193 return DataError; 194 } 195 196 while (!m_bDocAvail) { 197 if (!CheckDocStatus(pHints)) 198 return DataNotAvailable; 199 } 200 201 return DataAvailable; 202 } 203 204 bool CPDF_DataAvail::CheckAcroFormSubObject(DownloadHints* pHints) { 205 if (m_objs_array.empty()) { 206 m_ObjectSet.clear(); 207 std::vector<CPDF_Object*> obj_array = m_arrayAcroforms; 208 if (!AreObjectsAvailable(obj_array, false, pHints, m_objs_array)) 209 return false; 210 211 m_objs_array.clear(); 212 return true; 213 } 214 215 std::vector<CPDF_Object*> new_objs_array; 216 if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) { 217 m_objs_array = new_objs_array; 218 return false; 219 } 220 221 for (CPDF_Object* pObject : m_arrayAcroforms) 222 delete pObject; 223 224 m_arrayAcroforms.clear(); 225 return true; 226 } 227 228 bool CPDF_DataAvail::CheckAcroForm(DownloadHints* pHints) { 229 bool bExist = false; 230 m_pAcroForm = GetObject(m_dwAcroFormObjNum, pHints, &bExist).release(); 231 if (!bExist) { 232 m_docStatus = PDF_DATAAVAIL_PAGETREE; 233 return true; 234 } 235 236 if (!m_pAcroForm) { 237 if (m_docStatus == PDF_DATAAVAIL_ERROR) { 238 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 239 return true; 240 } 241 return false; 242 } 243 244 m_arrayAcroforms.push_back(m_pAcroForm); 245 m_docStatus = PDF_DATAAVAIL_PAGETREE; 246 return true; 247 } 248 249 bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) { 250 switch (m_docStatus) { 251 case PDF_DATAAVAIL_HEADER: 252 return CheckHeader(pHints); 253 case PDF_DATAAVAIL_FIRSTPAGE: 254 return CheckFirstPage(pHints); 255 case PDF_DATAAVAIL_HINTTABLE: 256 return CheckHintTables(pHints); 257 case PDF_DATAAVAIL_END: 258 return CheckEnd(pHints); 259 case PDF_DATAAVAIL_CROSSREF: 260 return CheckCrossRef(pHints); 261 case PDF_DATAAVAIL_CROSSREF_ITEM: 262 return CheckCrossRefItem(pHints); 263 case PDF_DATAAVAIL_CROSSREF_STREAM: 264 return CheckAllCrossRefStream(pHints); 265 case PDF_DATAAVAIL_TRAILER: 266 return CheckTrailer(pHints); 267 case PDF_DATAAVAIL_TRAILER_APPEND: 268 return CheckTrailerAppend(pHints); 269 case PDF_DATAAVAIL_LOADALLCROSSREF: 270 return LoadAllXref(pHints); 271 case PDF_DATAAVAIL_LOADALLFILE: 272 return LoadAllFile(pHints); 273 case PDF_DATAAVAIL_ROOT: 274 return CheckRoot(pHints); 275 case PDF_DATAAVAIL_INFO: 276 return CheckInfo(pHints); 277 case PDF_DATAAVAIL_ACROFORM: 278 return CheckAcroForm(pHints); 279 case PDF_DATAAVAIL_PAGETREE: 280 if (m_bTotalLoadPageTree) 281 return CheckPages(pHints); 282 return LoadDocPages(pHints); 283 case PDF_DATAAVAIL_PAGE: 284 if (m_bTotalLoadPageTree) 285 return CheckPage(pHints); 286 m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD; 287 return true; 288 case PDF_DATAAVAIL_ERROR: 289 return LoadAllFile(pHints); 290 case PDF_DATAAVAIL_PAGE_LATERLOAD: 291 m_docStatus = PDF_DATAAVAIL_PAGE; 292 default: 293 m_bDocAvail = true; 294 return true; 295 } 296 } 297 298 bool CPDF_DataAvail::CheckPageStatus(DownloadHints* pHints) { 299 switch (m_docStatus) { 300 case PDF_DATAAVAIL_PAGETREE: 301 return CheckPages(pHints); 302 case PDF_DATAAVAIL_PAGE: 303 return CheckPage(pHints); 304 case PDF_DATAAVAIL_ERROR: 305 return LoadAllFile(pHints); 306 default: 307 m_bPagesTreeLoad = true; 308 m_bPagesLoad = true; 309 return true; 310 } 311 } 312 313 bool CPDF_DataAvail::LoadAllFile(DownloadHints* pHints) { 314 if (m_pFileAvail->IsDataAvail(0, (uint32_t)m_dwFileLen)) { 315 m_docStatus = PDF_DATAAVAIL_DONE; 316 return true; 317 } 318 319 pHints->AddSegment(0, (uint32_t)m_dwFileLen); 320 return false; 321 } 322 323 bool CPDF_DataAvail::LoadAllXref(DownloadHints* pHints) { 324 m_parser.m_pSyntax->InitParser(m_pFileRead, (uint32_t)m_dwHeaderOffset); 325 if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) && 326 !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) { 327 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 328 return false; 329 } 330 331 m_dwRootObjNum = m_parser.GetRootObjNum(); 332 m_dwInfoObjNum = m_parser.GetInfoObjNum(); 333 m_pCurrentParser = &m_parser; 334 m_docStatus = PDF_DATAAVAIL_ROOT; 335 return true; 336 } 337 338 std::unique_ptr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum, 339 DownloadHints* pHints, 340 bool* pExistInFile) { 341 uint32_t size = 0; 342 FX_FILESIZE offset = 0; 343 CPDF_Parser* pParser = nullptr; 344 345 if (pExistInFile) 346 *pExistInFile = true; 347 348 if (m_pDocument) { 349 size = GetObjectSize(objnum, offset); 350 pParser = m_pDocument->GetParser(); 351 } else { 352 size = (uint32_t)m_parser.GetObjectSize(objnum); 353 offset = m_parser.GetObjectOffset(objnum); 354 pParser = &m_parser; 355 } 356 357 if (!IsDataAvail(offset, size, pHints)) 358 return nullptr; 359 360 std::unique_ptr<CPDF_Object> pRet; 361 if (pParser) 362 pRet = pParser->ParseIndirectObject(nullptr, objnum); 363 364 if (!pRet && pExistInFile) 365 *pExistInFile = false; 366 367 return pRet; 368 } 369 370 bool CPDF_DataAvail::CheckInfo(DownloadHints* pHints) { 371 bool bExist = false; 372 std::unique_ptr<CPDF_Object> pInfo = 373 GetObject(m_dwInfoObjNum, pHints, &bExist); 374 if (bExist && !pInfo) { 375 if (m_docStatus == PDF_DATAAVAIL_ERROR) { 376 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 377 return true; 378 } 379 if (m_Pos == m_dwFileLen) 380 m_docStatus = PDF_DATAAVAIL_ERROR; 381 return false; 382 } 383 m_docStatus = 384 m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE; 385 return true; 386 } 387 388 bool CPDF_DataAvail::CheckRoot(DownloadHints* pHints) { 389 bool bExist = false; 390 m_pRoot = GetObject(m_dwRootObjNum, pHints, &bExist); 391 if (!bExist) { 392 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 393 return true; 394 } 395 396 if (!m_pRoot) { 397 if (m_docStatus == PDF_DATAAVAIL_ERROR) { 398 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 399 return true; 400 } 401 return false; 402 } 403 404 CPDF_Dictionary* pDict = m_pRoot->GetDict(); 405 if (!pDict) { 406 m_docStatus = PDF_DATAAVAIL_ERROR; 407 return false; 408 } 409 410 CPDF_Reference* pRef = ToReference(pDict->GetObjectFor("Pages")); 411 if (!pRef) { 412 m_docStatus = PDF_DATAAVAIL_ERROR; 413 return false; 414 } 415 416 m_PagesObjNum = pRef->GetRefObjNum(); 417 CPDF_Reference* pAcroFormRef = 418 ToReference(m_pRoot->GetDict()->GetObjectFor("AcroForm")); 419 if (pAcroFormRef) { 420 m_bHaveAcroForm = true; 421 m_dwAcroFormObjNum = pAcroFormRef->GetRefObjNum(); 422 } 423 424 if (m_dwInfoObjNum) { 425 m_docStatus = PDF_DATAAVAIL_INFO; 426 } else { 427 m_docStatus = 428 m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE; 429 } 430 return true; 431 } 432 433 bool CPDF_DataAvail::PreparePageItem() { 434 CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); 435 CPDF_Reference* pRef = 436 ToReference(pRoot ? pRoot->GetObjectFor("Pages") : nullptr); 437 if (!pRef) { 438 m_docStatus = PDF_DATAAVAIL_ERROR; 439 return false; 440 } 441 442 m_PagesObjNum = pRef->GetRefObjNum(); 443 m_pCurrentParser = m_pDocument->GetParser(); 444 m_docStatus = PDF_DATAAVAIL_PAGETREE; 445 return true; 446 } 447 448 bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) { 449 return m_pageMapCheckState.insert(dwPage).second; 450 } 451 452 void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) { 453 m_pageMapCheckState.erase(dwPage); 454 } 455 456 bool CPDF_DataAvail::CheckPage(DownloadHints* pHints) { 457 std::vector<uint32_t> UnavailObjList; 458 for (uint32_t dwPageObjNum : m_PageObjList) { 459 bool bExists = false; 460 std::unique_ptr<CPDF_Object> pObj = 461 GetObject(dwPageObjNum, pHints, &bExists); 462 if (!pObj) { 463 if (bExists) 464 UnavailObjList.push_back(dwPageObjNum); 465 continue; 466 } 467 CPDF_Array* pArray = ToArray(pObj.get()); 468 if (pArray) { 469 for (const auto& pArrayObj : *pArray) { 470 if (CPDF_Reference* pRef = ToReference(pArrayObj.get())) 471 UnavailObjList.push_back(pRef->GetRefObjNum()); 472 } 473 } 474 if (!pObj->IsDictionary()) 475 continue; 476 477 CFX_ByteString type = pObj->GetDict()->GetStringFor("Type"); 478 if (type == "Pages") { 479 m_PagesArray.push_back(std::move(pObj)); 480 continue; 481 } 482 } 483 m_PageObjList.clear(); 484 if (!UnavailObjList.empty()) { 485 m_PageObjList = std::move(UnavailObjList); 486 return false; 487 } 488 size_t iPages = m_PagesArray.size(); 489 for (size_t i = 0; i < iPages; ++i) { 490 std::unique_ptr<CPDF_Object> pPages = std::move(m_PagesArray[i]); 491 if (pPages && !GetPageKids(m_pCurrentParser, pPages.get())) { 492 m_PagesArray.clear(); 493 m_docStatus = PDF_DATAAVAIL_ERROR; 494 return false; 495 } 496 } 497 m_PagesArray.clear(); 498 if (m_PageObjList.empty()) 499 m_docStatus = PDF_DATAAVAIL_DONE; 500 501 return true; 502 } 503 504 bool CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) { 505 if (!pParser) { 506 m_docStatus = PDF_DATAAVAIL_ERROR; 507 return false; 508 } 509 510 CPDF_Dictionary* pDict = pPages->GetDict(); 511 CPDF_Object* pKids = pDict ? pDict->GetObjectFor("Kids") : nullptr; 512 if (!pKids) 513 return true; 514 515 switch (pKids->GetType()) { 516 case CPDF_Object::REFERENCE: 517 m_PageObjList.push_back(pKids->AsReference()->GetRefObjNum()); 518 break; 519 case CPDF_Object::ARRAY: { 520 CPDF_Array* pKidsArray = pKids->AsArray(); 521 for (size_t i = 0; i < pKidsArray->GetCount(); ++i) { 522 if (CPDF_Reference* pRef = ToReference(pKidsArray->GetObjectAt(i))) 523 m_PageObjList.push_back(pRef->GetRefObjNum()); 524 } 525 } break; 526 default: 527 m_docStatus = PDF_DATAAVAIL_ERROR; 528 return false; 529 } 530 return true; 531 } 532 533 bool CPDF_DataAvail::CheckPages(DownloadHints* pHints) { 534 bool bExists = false; 535 std::unique_ptr<CPDF_Object> pPages = 536 GetObject(m_PagesObjNum, pHints, &bExists); 537 if (!bExists) { 538 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 539 return true; 540 } 541 542 if (!pPages) { 543 if (m_docStatus == PDF_DATAAVAIL_ERROR) { 544 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 545 return true; 546 } 547 return false; 548 } 549 550 if (!GetPageKids(m_pCurrentParser, pPages.get())) { 551 m_docStatus = PDF_DATAAVAIL_ERROR; 552 return false; 553 } 554 555 m_docStatus = PDF_DATAAVAIL_PAGE; 556 return true; 557 } 558 559 bool CPDF_DataAvail::CheckHeader(DownloadHints* pHints) { 560 ASSERT(m_dwFileLen >= 0); 561 const uint32_t kReqSize = std::min(static_cast<uint32_t>(m_dwFileLen), 1024U); 562 563 if (m_pFileAvail->IsDataAvail(0, kReqSize)) { 564 uint8_t buffer[1024]; 565 m_pFileRead->ReadBlock(buffer, 0, kReqSize); 566 567 if (IsLinearizedFile(buffer, kReqSize)) { 568 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE; 569 } else { 570 if (m_docStatus == PDF_DATAAVAIL_ERROR) 571 return false; 572 m_docStatus = PDF_DATAAVAIL_END; 573 } 574 return true; 575 } 576 577 pHints->AddSegment(0, kReqSize); 578 return false; 579 } 580 581 bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) { 582 if (!m_pLinearized->GetFirstPageEndOffset() || 583 !m_pLinearized->GetFileSize() || !m_pLinearized->GetLastXRefOffset()) { 584 m_docStatus = PDF_DATAAVAIL_ERROR; 585 return false; 586 } 587 588 uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset(); 589 dwEnd += 512; 590 if ((FX_FILESIZE)dwEnd > m_dwFileLen) 591 dwEnd = (uint32_t)m_dwFileLen; 592 593 int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen); 594 int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0; 595 if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) { 596 pHints->AddSegment(iStartPos, iSize); 597 return false; 598 } 599 600 m_docStatus = 601 m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE; 602 return true; 603 } 604 605 bool CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset, 606 uint32_t size, 607 DownloadHints* pHints) { 608 if (offset < 0 || offset > m_dwFileLen) 609 return true; 610 611 FX_SAFE_FILESIZE safeSize = offset; 612 safeSize += size; 613 safeSize += 512; 614 if (!safeSize.IsValid() || safeSize.ValueOrDie() > m_dwFileLen) 615 size = m_dwFileLen - offset; 616 else 617 size += 512; 618 619 if (!m_pFileAvail->IsDataAvail(offset, size)) { 620 if (pHints) 621 pHints->AddSegment(offset, size); 622 return false; 623 } 624 return true; 625 } 626 627 bool CPDF_DataAvail::CheckHintTables(DownloadHints* pHints) { 628 if (m_pLinearized->GetPageCount() <= 1) { 629 m_docStatus = PDF_DATAAVAIL_DONE; 630 return true; 631 } 632 if (!m_pLinearized->HasHintTable()) { 633 m_docStatus = PDF_DATAAVAIL_ERROR; 634 return false; 635 } 636 637 FX_FILESIZE szHintStart = m_pLinearized->GetHintStart(); 638 FX_FILESIZE szHintLength = m_pLinearized->GetHintLength(); 639 640 if (!IsDataAvail(szHintStart, szHintLength, pHints)) 641 return false; 642 643 m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset); 644 645 std::unique_ptr<CPDF_HintTables> pHintTables( 646 new CPDF_HintTables(this, m_pLinearized.get())); 647 std::unique_ptr<CPDF_Object> pHintStream( 648 ParseIndirectObjectAt(szHintStart, 0)); 649 CPDF_Stream* pStream = ToStream(pHintStream.get()); 650 if (pStream && pHintTables->LoadHintStream(pStream)) 651 m_pHintTables = std::move(pHintTables); 652 653 m_docStatus = PDF_DATAAVAIL_DONE; 654 return true; 655 } 656 657 std::unique_ptr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt( 658 FX_FILESIZE pos, 659 uint32_t objnum, 660 CPDF_IndirectObjectHolder* pObjList) { 661 FX_FILESIZE SavedPos = m_syntaxParser.SavePos(); 662 m_syntaxParser.RestorePos(pos); 663 664 bool bIsNumber; 665 CFX_ByteString word = m_syntaxParser.GetNextWord(&bIsNumber); 666 if (!bIsNumber) 667 return nullptr; 668 669 uint32_t parser_objnum = FXSYS_atoui(word.c_str()); 670 if (objnum && parser_objnum != objnum) 671 return nullptr; 672 673 word = m_syntaxParser.GetNextWord(&bIsNumber); 674 if (!bIsNumber) 675 return nullptr; 676 677 uint32_t gennum = FXSYS_atoui(word.c_str()); 678 if (m_syntaxParser.GetKeyword() != "obj") { 679 m_syntaxParser.RestorePos(SavedPos); 680 return nullptr; 681 } 682 683 std::unique_ptr<CPDF_Object> pObj = 684 m_syntaxParser.GetObject(pObjList, parser_objnum, gennum, true); 685 m_syntaxParser.RestorePos(SavedPos); 686 return pObj; 687 } 688 689 CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() { 690 const uint32_t kReqSize = 1024; 691 if (!m_pFileAvail->IsDataAvail(0, kReqSize)) 692 return LinearizationUnknown; 693 694 if (!m_pFileRead) 695 return NotLinearized; 696 697 FX_FILESIZE dwSize = m_pFileRead->GetSize(); 698 if (dwSize < (FX_FILESIZE)kReqSize) 699 return LinearizationUnknown; 700 701 uint8_t buffer[1024]; 702 m_pFileRead->ReadBlock(buffer, 0, kReqSize); 703 if (IsLinearizedFile(buffer, kReqSize)) 704 return Linearized; 705 706 return NotLinearized; 707 } 708 709 bool CPDF_DataAvail::IsLinearized() { 710 return !!m_pLinearized; 711 } 712 713 bool CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, uint32_t dwLen) { 714 if (m_pLinearized) 715 return true; 716 717 CFX_RetainPtr<IFX_MemoryStream> file = 718 IFX_MemoryStream::Create(pData, (size_t)dwLen, false); 719 int32_t offset = GetHeaderOffset(file); 720 if (offset == -1) { 721 m_docStatus = PDF_DATAAVAIL_ERROR; 722 return false; 723 } 724 725 m_dwHeaderOffset = offset; 726 m_syntaxParser.InitParser(file, offset); 727 m_syntaxParser.RestorePos(m_syntaxParser.m_HeaderOffset + 9); 728 729 bool bNumber; 730 CFX_ByteString wordObjNum = m_syntaxParser.GetNextWord(&bNumber); 731 if (!bNumber) 732 return false; 733 734 uint32_t objnum = FXSYS_atoui(wordObjNum.c_str()); 735 m_pLinearized = CPDF_LinearizedHeader::CreateForObject( 736 ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum)); 737 if (!m_pLinearized || 738 m_pLinearized->GetFileSize() != m_pFileRead->GetSize()) { 739 m_pLinearized.reset(); 740 return false; 741 } 742 return true; 743 } 744 745 bool CPDF_DataAvail::CheckEnd(DownloadHints* pHints) { 746 uint32_t req_pos = (uint32_t)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0); 747 uint32_t dwSize = (uint32_t)(m_dwFileLen - req_pos); 748 749 if (m_pFileAvail->IsDataAvail(req_pos, dwSize)) { 750 uint8_t buffer[1024]; 751 m_pFileRead->ReadBlock(buffer, req_pos, dwSize); 752 753 CFX_RetainPtr<IFX_MemoryStream> file = 754 IFX_MemoryStream::Create(buffer, (size_t)dwSize, false); 755 m_syntaxParser.InitParser(file, 0); 756 m_syntaxParser.RestorePos(dwSize - 1); 757 758 if (m_syntaxParser.SearchWord("startxref", true, false, dwSize)) { 759 m_syntaxParser.GetNextWord(nullptr); 760 761 bool bNumber; 762 CFX_ByteString xrefpos_str = m_syntaxParser.GetNextWord(&bNumber); 763 if (!bNumber) { 764 m_docStatus = PDF_DATAAVAIL_ERROR; 765 return false; 766 } 767 768 m_dwXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str.c_str()); 769 if (!m_dwXRefOffset || m_dwXRefOffset > m_dwFileLen) { 770 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 771 return true; 772 } 773 774 m_dwLastXRefOffset = m_dwXRefOffset; 775 SetStartOffset(m_dwXRefOffset); 776 m_docStatus = PDF_DATAAVAIL_CROSSREF; 777 return true; 778 } 779 780 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 781 return true; 782 } 783 784 pHints->AddSegment(req_pos, dwSize); 785 return false; 786 } 787 788 int32_t CPDF_DataAvail::CheckCrossRefStream(DownloadHints* pHints, 789 FX_FILESIZE& xref_offset) { 790 xref_offset = 0; 791 uint32_t req_size = 792 (uint32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); 793 794 if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) { 795 int32_t iSize = (int32_t)(m_Pos + req_size - m_dwCurrentXRefSteam); 796 CFX_BinaryBuf buf(iSize); 797 uint8_t* pBuf = buf.GetBuffer(); 798 799 m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize); 800 801 CFX_RetainPtr<IFX_MemoryStream> file = 802 IFX_MemoryStream::Create(pBuf, (size_t)iSize, false); 803 m_parser.m_pSyntax->InitParser(file, 0); 804 805 bool bNumber; 806 CFX_ByteString objnum = m_parser.m_pSyntax->GetNextWord(&bNumber); 807 if (!bNumber) 808 return -1; 809 810 uint32_t objNum = FXSYS_atoui(objnum.c_str()); 811 std::unique_ptr<CPDF_Object> pObj = 812 m_parser.ParseIndirectObjectAt(nullptr, 0, objNum); 813 814 if (!pObj) { 815 m_Pos += m_parser.m_pSyntax->SavePos(); 816 return 0; 817 } 818 819 CPDF_Dictionary* pDict = pObj->GetDict(); 820 CPDF_Name* pName = ToName(pDict ? pDict->GetObjectFor("Type") : nullptr); 821 if (pName && pName->GetString() == "XRef") { 822 m_Pos += m_parser.m_pSyntax->SavePos(); 823 xref_offset = pObj->GetDict()->GetIntegerFor("Prev"); 824 return 1; 825 } 826 return -1; 827 } 828 pHints->AddSegment(m_Pos, req_size); 829 return 0; 830 } 831 832 void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset) { 833 m_Pos = dwOffset; 834 } 835 836 bool CPDF_DataAvail::GetNextToken(CFX_ByteString& token) { 837 uint8_t ch; 838 if (!GetNextChar(ch)) 839 return false; 840 841 while (1) { 842 while (PDFCharIsWhitespace(ch)) { 843 if (!GetNextChar(ch)) 844 return false; 845 } 846 847 if (ch != '%') 848 break; 849 850 while (1) { 851 if (!GetNextChar(ch)) 852 return false; 853 if (PDFCharIsLineEnding(ch)) 854 break; 855 } 856 } 857 858 uint8_t buffer[256]; 859 uint32_t index = 0; 860 if (PDFCharIsDelimiter(ch)) { 861 buffer[index++] = ch; 862 if (ch == '/') { 863 while (1) { 864 if (!GetNextChar(ch)) 865 return false; 866 867 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { 868 m_Pos--; 869 CFX_ByteString ret(buffer, index); 870 token = ret; 871 return true; 872 } 873 874 if (index < sizeof(buffer)) 875 buffer[index++] = ch; 876 } 877 } else if (ch == '<') { 878 if (!GetNextChar(ch)) 879 return false; 880 881 if (ch == '<') 882 buffer[index++] = ch; 883 else 884 m_Pos--; 885 } else if (ch == '>') { 886 if (!GetNextChar(ch)) 887 return false; 888 889 if (ch == '>') 890 buffer[index++] = ch; 891 else 892 m_Pos--; 893 } 894 895 CFX_ByteString ret(buffer, index); 896 token = ret; 897 return true; 898 } 899 900 while (1) { 901 if (index < sizeof(buffer)) 902 buffer[index++] = ch; 903 904 if (!GetNextChar(ch)) 905 return false; 906 907 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { 908 m_Pos--; 909 break; 910 } 911 } 912 913 token = CFX_ByteString(buffer, index); 914 return true; 915 } 916 917 bool CPDF_DataAvail::GetNextChar(uint8_t& ch) { 918 FX_FILESIZE pos = m_Pos; 919 if (pos >= m_dwFileLen) 920 return false; 921 922 if (m_bufferOffset >= pos || 923 (FX_FILESIZE)(m_bufferOffset + m_bufferSize) <= pos) { 924 FX_FILESIZE read_pos = pos; 925 uint32_t read_size = 512; 926 if ((FX_FILESIZE)read_size > m_dwFileLen) 927 read_size = (uint32_t)m_dwFileLen; 928 929 if ((FX_FILESIZE)(read_pos + read_size) > m_dwFileLen) 930 read_pos = m_dwFileLen - read_size; 931 932 if (!m_pFileRead->ReadBlock(m_bufferData, read_pos, read_size)) 933 return false; 934 935 m_bufferOffset = read_pos; 936 m_bufferSize = read_size; 937 } 938 ch = m_bufferData[pos - m_bufferOffset]; 939 m_Pos++; 940 return true; 941 } 942 943 bool CPDF_DataAvail::CheckCrossRefItem(DownloadHints* pHints) { 944 int32_t iSize = 0; 945 CFX_ByteString token; 946 while (1) { 947 if (!GetNextToken(token)) { 948 iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); 949 pHints->AddSegment(m_Pos, iSize); 950 return false; 951 } 952 953 if (token == "trailer") { 954 m_dwTrailerOffset = m_Pos; 955 m_docStatus = PDF_DATAAVAIL_TRAILER; 956 return true; 957 } 958 } 959 } 960 961 bool CPDF_DataAvail::CheckAllCrossRefStream(DownloadHints* pHints) { 962 FX_FILESIZE xref_offset = 0; 963 964 int32_t nRet = CheckCrossRefStream(pHints, xref_offset); 965 if (nRet == 1) { 966 if (!xref_offset) { 967 m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF; 968 } else { 969 m_dwCurrentXRefSteam = xref_offset; 970 m_Pos = xref_offset; 971 } 972 return true; 973 } 974 975 if (nRet == -1) 976 m_docStatus = PDF_DATAAVAIL_ERROR; 977 return false; 978 } 979 980 bool CPDF_DataAvail::CheckCrossRef(DownloadHints* pHints) { 981 int32_t iSize = 0; 982 CFX_ByteString token; 983 if (!GetNextToken(token)) { 984 iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); 985 pHints->AddSegment(m_Pos, iSize); 986 return false; 987 } 988 989 if (token == "xref") { 990 while (1) { 991 if (!GetNextToken(token)) { 992 iSize = 993 (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); 994 pHints->AddSegment(m_Pos, iSize); 995 m_docStatus = PDF_DATAAVAIL_CROSSREF_ITEM; 996 return false; 997 } 998 999 if (token == "trailer") { 1000 m_dwTrailerOffset = m_Pos; 1001 m_docStatus = PDF_DATAAVAIL_TRAILER; 1002 return true; 1003 } 1004 } 1005 } else { 1006 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 1007 return true; 1008 } 1009 return false; 1010 } 1011 1012 bool CPDF_DataAvail::CheckTrailerAppend(DownloadHints* pHints) { 1013 if (m_Pos < m_dwFileLen) { 1014 FX_FILESIZE dwAppendPos = m_Pos + m_syntaxParser.SavePos(); 1015 int32_t iSize = (int32_t)( 1016 dwAppendPos + 512 > m_dwFileLen ? m_dwFileLen - dwAppendPos : 512); 1017 1018 if (!m_pFileAvail->IsDataAvail(dwAppendPos, iSize)) { 1019 pHints->AddSegment(dwAppendPos, iSize); 1020 return false; 1021 } 1022 } 1023 1024 if (m_dwPrevXRefOffset) { 1025 SetStartOffset(m_dwPrevXRefOffset); 1026 m_docStatus = PDF_DATAAVAIL_CROSSREF; 1027 } else { 1028 m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF; 1029 } 1030 return true; 1031 } 1032 1033 bool CPDF_DataAvail::CheckTrailer(DownloadHints* pHints) { 1034 int32_t iTrailerSize = 1035 (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); 1036 if (m_pFileAvail->IsDataAvail(m_Pos, iTrailerSize)) { 1037 int32_t iSize = (int32_t)(m_Pos + iTrailerSize - m_dwTrailerOffset); 1038 CFX_BinaryBuf buf(iSize); 1039 uint8_t* pBuf = buf.GetBuffer(); 1040 if (!pBuf) { 1041 m_docStatus = PDF_DATAAVAIL_ERROR; 1042 return false; 1043 } 1044 1045 if (!m_pFileRead->ReadBlock(pBuf, m_dwTrailerOffset, iSize)) 1046 return false; 1047 1048 CFX_RetainPtr<IFX_MemoryStream> file = 1049 IFX_MemoryStream::Create(pBuf, (size_t)iSize, false); 1050 m_syntaxParser.InitParser(file, 0); 1051 1052 std::unique_ptr<CPDF_Object> pTrailer( 1053 m_syntaxParser.GetObject(nullptr, 0, 0, true)); 1054 if (!pTrailer) { 1055 m_Pos += m_syntaxParser.SavePos(); 1056 pHints->AddSegment(m_Pos, iTrailerSize); 1057 return false; 1058 } 1059 1060 if (!pTrailer->IsDictionary()) 1061 return false; 1062 1063 CPDF_Dictionary* pTrailerDict = pTrailer->GetDict(); 1064 CPDF_Object* pEncrypt = pTrailerDict->GetObjectFor("Encrypt"); 1065 if (ToReference(pEncrypt)) { 1066 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 1067 return true; 1068 } 1069 1070 uint32_t xrefpos = GetDirectInteger(pTrailerDict, "Prev"); 1071 if (xrefpos) { 1072 m_dwPrevXRefOffset = GetDirectInteger(pTrailerDict, "XRefStm"); 1073 if (m_dwPrevXRefOffset) { 1074 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 1075 } else { 1076 m_dwPrevXRefOffset = xrefpos; 1077 if (m_dwPrevXRefOffset >= m_dwFileLen) { 1078 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 1079 } else { 1080 SetStartOffset(m_dwPrevXRefOffset); 1081 m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND; 1082 } 1083 } 1084 return true; 1085 } 1086 m_dwPrevXRefOffset = 0; 1087 m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND; 1088 return true; 1089 } 1090 pHints->AddSegment(m_Pos, iTrailerSize); 1091 return false; 1092 } 1093 1094 bool CPDF_DataAvail::CheckPage(uint32_t dwPage, DownloadHints* pHints) { 1095 while (true) { 1096 switch (m_docStatus) { 1097 case PDF_DATAAVAIL_PAGETREE: 1098 if (!LoadDocPages(pHints)) 1099 return false; 1100 break; 1101 case PDF_DATAAVAIL_PAGE: 1102 if (!LoadDocPage(dwPage, pHints)) 1103 return false; 1104 break; 1105 case PDF_DATAAVAIL_ERROR: 1106 return LoadAllFile(pHints); 1107 default: 1108 m_bPagesTreeLoad = true; 1109 m_bPagesLoad = true; 1110 m_bCurPageDictLoadOK = true; 1111 m_docStatus = PDF_DATAAVAIL_PAGE; 1112 return true; 1113 } 1114 } 1115 } 1116 1117 bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo, 1118 PageNode* pPageNode, 1119 DownloadHints* pHints) { 1120 bool bExists = false; 1121 std::unique_ptr<CPDF_Object> pPages = GetObject(dwPageNo, pHints, &bExists); 1122 if (!bExists) { 1123 m_docStatus = PDF_DATAAVAIL_ERROR; 1124 return false; 1125 } 1126 1127 if (!pPages) 1128 return false; 1129 1130 CPDF_Array* pArray = pPages->AsArray(); 1131 if (!pArray) { 1132 m_docStatus = PDF_DATAAVAIL_ERROR; 1133 return false; 1134 } 1135 1136 pPageNode->m_type = PDF_PAGENODE_PAGES; 1137 for (size_t i = 0; i < pArray->GetCount(); ++i) { 1138 CPDF_Reference* pKid = ToReference(pArray->GetObjectAt(i)); 1139 if (!pKid) 1140 continue; 1141 1142 auto pNode = pdfium::MakeUnique<PageNode>(); 1143 pNode->m_dwPageNo = pKid->GetRefObjNum(); 1144 pPageNode->m_ChildNodes.push_back(std::move(pNode)); 1145 } 1146 return true; 1147 } 1148 1149 bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo, 1150 PageNode* pPageNode, 1151 DownloadHints* pHints) { 1152 bool bExists = false; 1153 std::unique_ptr<CPDF_Object> pPage = GetObject(dwPageNo, pHints, &bExists); 1154 if (!bExists) { 1155 m_docStatus = PDF_DATAAVAIL_ERROR; 1156 return false; 1157 } 1158 1159 if (!pPage) 1160 return false; 1161 1162 if (pPage->IsArray()) { 1163 pPageNode->m_dwPageNo = dwPageNo; 1164 pPageNode->m_type = PDF_PAGENODE_ARRAY; 1165 return true; 1166 } 1167 1168 if (!pPage->IsDictionary()) { 1169 m_docStatus = PDF_DATAAVAIL_ERROR; 1170 return false; 1171 } 1172 1173 pPageNode->m_dwPageNo = dwPageNo; 1174 CPDF_Dictionary* pDict = pPage->GetDict(); 1175 CFX_ByteString type = pDict->GetStringFor("Type"); 1176 if (type == "Pages") { 1177 pPageNode->m_type = PDF_PAGENODE_PAGES; 1178 CPDF_Object* pKids = pDict->GetObjectFor("Kids"); 1179 if (!pKids) { 1180 m_docStatus = PDF_DATAAVAIL_PAGE; 1181 return true; 1182 } 1183 1184 switch (pKids->GetType()) { 1185 case CPDF_Object::REFERENCE: { 1186 CPDF_Reference* pKid = pKids->AsReference(); 1187 auto pNode = pdfium::MakeUnique<PageNode>(); 1188 pNode->m_dwPageNo = pKid->GetRefObjNum(); 1189 pPageNode->m_ChildNodes.push_back(std::move(pNode)); 1190 } break; 1191 case CPDF_Object::ARRAY: { 1192 CPDF_Array* pKidsArray = pKids->AsArray(); 1193 for (size_t i = 0; i < pKidsArray->GetCount(); ++i) { 1194 CPDF_Reference* pKid = ToReference(pKidsArray->GetObjectAt(i)); 1195 if (!pKid) 1196 continue; 1197 1198 auto pNode = pdfium::MakeUnique<PageNode>(); 1199 pNode->m_dwPageNo = pKid->GetRefObjNum(); 1200 pPageNode->m_ChildNodes.push_back(std::move(pNode)); 1201 } 1202 } break; 1203 default: 1204 break; 1205 } 1206 } else if (type == "Page") { 1207 pPageNode->m_type = PDF_PAGENODE_PAGE; 1208 } else { 1209 m_docStatus = PDF_DATAAVAIL_ERROR; 1210 return false; 1211 } 1212 return true; 1213 } 1214 1215 bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode, 1216 int32_t iPage, 1217 int32_t& iCount, 1218 DownloadHints* pHints, 1219 int level) { 1220 if (level >= kMaxPageRecursionDepth) 1221 return false; 1222 1223 int32_t iSize = pdfium::CollectionSize<int32_t>(pageNode.m_ChildNodes); 1224 if (iSize <= 0 || iPage >= iSize) { 1225 m_docStatus = PDF_DATAAVAIL_ERROR; 1226 return false; 1227 } 1228 for (int32_t i = 0; i < iSize; ++i) { 1229 PageNode* pNode = pageNode.m_ChildNodes[i].get(); 1230 if (!pNode) 1231 continue; 1232 1233 if (pNode->m_type == PDF_PAGENODE_UNKNOWN) { 1234 // Updates the type for the unknown page node. 1235 if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode, pHints)) 1236 return false; 1237 } 1238 if (pNode->m_type == PDF_PAGENODE_ARRAY) { 1239 // Updates a more specific type for the array page node. 1240 if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode, pHints)) 1241 return false; 1242 } 1243 switch (pNode->m_type) { 1244 case PDF_PAGENODE_PAGE: 1245 iCount++; 1246 if (iPage == iCount && m_pDocument) 1247 m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo); 1248 break; 1249 case PDF_PAGENODE_PAGES: 1250 if (!CheckPageNode(*pNode, iPage, iCount, pHints, level + 1)) 1251 return false; 1252 break; 1253 case PDF_PAGENODE_UNKNOWN: 1254 case PDF_PAGENODE_ARRAY: 1255 // Already converted above, error if we get here. 1256 return false; 1257 } 1258 if (iPage == iCount) { 1259 m_docStatus = PDF_DATAAVAIL_DONE; 1260 return true; 1261 } 1262 } 1263 return true; 1264 } 1265 1266 bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage, DownloadHints* pHints) { 1267 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage); 1268 int32_t iPage = safePage.ValueOrDie(); 1269 if (m_pDocument->GetPageCount() <= iPage || 1270 m_pDocument->IsPageLoaded(iPage)) { 1271 m_docStatus = PDF_DATAAVAIL_DONE; 1272 return true; 1273 } 1274 if (m_PageNode.m_type == PDF_PAGENODE_PAGE) { 1275 m_docStatus = iPage == 0 ? PDF_DATAAVAIL_DONE : PDF_DATAAVAIL_ERROR; 1276 return true; 1277 } 1278 int32_t iCount = -1; 1279 return CheckPageNode(m_PageNode, iPage, iCount, pHints, 0); 1280 } 1281 1282 bool CPDF_DataAvail::CheckPageCount(DownloadHints* pHints) { 1283 bool bExists = false; 1284 std::unique_ptr<CPDF_Object> pPages = 1285 GetObject(m_PagesObjNum, pHints, &bExists); 1286 if (!bExists) { 1287 m_docStatus = PDF_DATAAVAIL_ERROR; 1288 return false; 1289 } 1290 if (!pPages) 1291 return false; 1292 1293 CPDF_Dictionary* pPagesDict = pPages->GetDict(); 1294 if (!pPagesDict) { 1295 m_docStatus = PDF_DATAAVAIL_ERROR; 1296 return false; 1297 } 1298 if (!pPagesDict->KeyExist("Kids")) 1299 return true; 1300 1301 return pPagesDict->GetIntegerFor("Count") > 0; 1302 } 1303 1304 bool CPDF_DataAvail::LoadDocPages(DownloadHints* pHints) { 1305 if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode, pHints)) 1306 return false; 1307 1308 if (CheckPageCount(pHints)) { 1309 m_docStatus = PDF_DATAAVAIL_PAGE; 1310 return true; 1311 } 1312 1313 m_bTotalLoadPageTree = true; 1314 return false; 1315 } 1316 1317 bool CPDF_DataAvail::LoadPages(DownloadHints* pHints) { 1318 while (!m_bPagesTreeLoad) { 1319 if (!CheckPageStatus(pHints)) 1320 return false; 1321 } 1322 1323 if (m_bPagesLoad) 1324 return true; 1325 1326 m_pDocument->LoadPages(); 1327 return false; 1328 } 1329 1330 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData( 1331 DownloadHints* pHints) { 1332 if (m_bLinearedDataOK) 1333 return DataAvailable; 1334 ASSERT(m_pLinearized); 1335 if (!m_pLinearized->GetLastXRefOffset()) 1336 return DataError; 1337 1338 if (!m_bMainXRefLoadTried) { 1339 FX_SAFE_UINT32 data_size = m_dwFileLen; 1340 data_size -= m_pLinearized->GetLastXRefOffset(); 1341 if (!data_size.IsValid()) 1342 return DataError; 1343 1344 if (!m_pFileAvail->IsDataAvail(m_pLinearized->GetLastXRefOffset(), 1345 data_size.ValueOrDie())) { 1346 pHints->AddSegment(m_pLinearized->GetLastXRefOffset(), 1347 data_size.ValueOrDie()); 1348 return DataNotAvailable; 1349 } 1350 1351 CPDF_Parser::Error eRet = 1352 m_pDocument->GetParser()->LoadLinearizedMainXRefTable(); 1353 m_bMainXRefLoadTried = true; 1354 if (eRet != CPDF_Parser::SUCCESS) 1355 return DataError; 1356 1357 if (!PreparePageItem()) 1358 return DataNotAvailable; 1359 1360 m_bMainXRefLoadedOK = true; 1361 m_bLinearedDataOK = true; 1362 } 1363 1364 return m_bLinearedDataOK ? DataAvailable : DataNotAvailable; 1365 } 1366 1367 bool CPDF_DataAvail::CheckPageAnnots(uint32_t dwPage, DownloadHints* pHints) { 1368 if (m_objs_array.empty()) { 1369 m_ObjectSet.clear(); 1370 1371 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage); 1372 CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie()); 1373 if (!pPageDict) 1374 return true; 1375 1376 CPDF_Object* pAnnots = pPageDict->GetObjectFor("Annots"); 1377 if (!pAnnots) 1378 return true; 1379 1380 std::vector<CPDF_Object*> obj_array; 1381 obj_array.push_back(pAnnots); 1382 if (!AreObjectsAvailable(obj_array, false, pHints, m_objs_array)) 1383 return false; 1384 1385 m_objs_array.clear(); 1386 return true; 1387 } 1388 1389 std::vector<CPDF_Object*> new_objs_array; 1390 if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) { 1391 m_objs_array = new_objs_array; 1392 return false; 1393 } 1394 m_objs_array.clear(); 1395 return true; 1396 } 1397 1398 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage( 1399 uint32_t dwPage, 1400 DownloadHints* pHints) { 1401 if (!m_bAnnotsLoad) { 1402 if (!CheckPageAnnots(dwPage, pHints)) 1403 return DataNotAvailable; 1404 m_bAnnotsLoad = true; 1405 } 1406 const bool is_page_valid = ValidatePage(dwPage); 1407 (void)is_page_valid; 1408 ASSERT(is_page_valid); 1409 return DataAvailable; 1410 } 1411 1412 bool CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) { 1413 CFX_AutoRestorer<int> restorer(&s_CurrentDataAvailRecursionDepth); 1414 if (++s_CurrentDataAvailRecursionDepth > kMaxDataAvailRecursionDepth) 1415 return false; 1416 1417 CPDF_Object* pParent = pDict->GetObjectFor("Parent"); 1418 if (!pParent) 1419 return false; 1420 1421 CPDF_Dictionary* pParentDict = pParent->GetDict(); 1422 if (!pParentDict) 1423 return false; 1424 1425 CPDF_Object* pRet = pParentDict->GetObjectFor("Resources"); 1426 if (pRet) { 1427 m_pPageResource = pRet; 1428 return true; 1429 } 1430 1431 return HaveResourceAncestor(pParentDict); 1432 } 1433 1434 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail( 1435 uint32_t dwPage, 1436 DownloadHints* pHints) { 1437 if (!m_pDocument) 1438 return DataError; 1439 1440 if (IsFirstCheck(dwPage)) { 1441 m_bCurPageDictLoadOK = false; 1442 m_bPageLoadedOK = false; 1443 m_bAnnotsLoad = false; 1444 m_bNeedDownLoadResource = false; 1445 m_objs_array.clear(); 1446 m_ObjectSet.clear(); 1447 } 1448 1449 if (pdfium::ContainsKey(m_pagesLoadState, dwPage)) 1450 return DataAvailable; 1451 1452 if (m_pLinearized) { 1453 if (dwPage == m_pLinearized->GetFirstPageNo()) { 1454 DocAvailStatus nRet = CheckLinearizedFirstPage(dwPage, pHints); 1455 if (nRet == DataAvailable) 1456 m_pagesLoadState.insert(dwPage); 1457 return nRet; 1458 } 1459 1460 DocAvailStatus nResult = CheckLinearizedData(pHints); 1461 if (nResult != DataAvailable) 1462 return nResult; 1463 1464 if (m_pHintTables) { 1465 nResult = m_pHintTables->CheckPage(dwPage, pHints); 1466 if (nResult != DataAvailable) 1467 return nResult; 1468 m_pagesLoadState.insert(dwPage); 1469 return GetPage(dwPage) ? DataAvailable : DataError; 1470 } 1471 1472 if (m_bMainXRefLoadedOK) { 1473 if (m_bTotalLoadPageTree) { 1474 if (!LoadPages(pHints)) 1475 return DataNotAvailable; 1476 } else { 1477 if (!m_bCurPageDictLoadOK && !CheckPage(dwPage, pHints)) 1478 return DataNotAvailable; 1479 } 1480 } else { 1481 if (!LoadAllFile(pHints)) 1482 return DataNotAvailable; 1483 m_pDocument->GetParser()->RebuildCrossRef(); 1484 ResetFirstCheck(dwPage); 1485 return DataAvailable; 1486 } 1487 } else { 1488 if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && 1489 !CheckPage(dwPage, pHints)) { 1490 return DataNotAvailable; 1491 } 1492 } 1493 1494 if (m_bHaveAcroForm && !m_bAcroFormLoad) { 1495 if (!CheckAcroFormSubObject(pHints)) 1496 return DataNotAvailable; 1497 m_bAcroFormLoad = true; 1498 } 1499 1500 if (!m_bPageLoadedOK) { 1501 if (m_objs_array.empty()) { 1502 m_ObjectSet.clear(); 1503 1504 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage); 1505 m_pPageDict = m_pDocument->GetPage(safePage.ValueOrDie()); 1506 if (!m_pPageDict) { 1507 ResetFirstCheck(dwPage); 1508 // This is XFA page. 1509 return DataAvailable; 1510 } 1511 1512 std::vector<CPDF_Object*> obj_array; 1513 obj_array.push_back(m_pPageDict); 1514 if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array)) 1515 return DataNotAvailable; 1516 1517 m_objs_array.clear(); 1518 } else { 1519 std::vector<CPDF_Object*> new_objs_array; 1520 if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) { 1521 m_objs_array = new_objs_array; 1522 return DataNotAvailable; 1523 } 1524 } 1525 m_objs_array.clear(); 1526 m_bPageLoadedOK = true; 1527 } 1528 1529 if (!m_bAnnotsLoad) { 1530 if (!CheckPageAnnots(dwPage, pHints)) 1531 return DataNotAvailable; 1532 m_bAnnotsLoad = true; 1533 } 1534 1535 if (m_pPageDict && !m_bNeedDownLoadResource) { 1536 m_pPageResource = m_pPageDict->GetObjectFor("Resources"); 1537 m_bNeedDownLoadResource = 1538 m_pPageResource || HaveResourceAncestor(m_pPageDict); 1539 } 1540 1541 if (m_bNeedDownLoadResource) { 1542 if (!CheckResources(pHints)) 1543 return DataNotAvailable; 1544 m_bNeedDownLoadResource = false; 1545 } 1546 1547 m_bPageLoadedOK = false; 1548 m_bAnnotsLoad = false; 1549 m_bCurPageDictLoadOK = false; 1550 1551 ResetFirstCheck(dwPage); 1552 m_pagesLoadState.insert(dwPage); 1553 const bool is_page_valid = ValidatePage(dwPage); 1554 (void)is_page_valid; 1555 ASSERT(is_page_valid); 1556 return DataAvailable; 1557 } 1558 1559 bool CPDF_DataAvail::CheckResources(DownloadHints* pHints) { 1560 if (m_objs_array.empty()) { 1561 std::vector<CPDF_Object*> obj_array; 1562 obj_array.push_back(m_pPageResource); 1563 if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array)) 1564 return false; 1565 1566 m_objs_array.clear(); 1567 return true; 1568 } 1569 std::vector<CPDF_Object*> new_objs_array; 1570 if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) { 1571 m_objs_array = new_objs_array; 1572 return false; 1573 } 1574 m_objs_array.clear(); 1575 return true; 1576 } 1577 1578 void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, 1579 uint32_t* pSize) { 1580 if (pPos) 1581 *pPos = m_dwLastXRefOffset; 1582 if (pSize) 1583 *pSize = (uint32_t)(m_dwFileLen - m_dwLastXRefOffset); 1584 } 1585 1586 int CPDF_DataAvail::GetPageCount() const { 1587 if (m_pLinearized) 1588 return m_pLinearized->GetPageCount(); 1589 return m_pDocument ? m_pDocument->GetPageCount() : 0; 1590 } 1591 1592 CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) { 1593 if (!m_pDocument || index < 0 || index >= GetPageCount()) 1594 return nullptr; 1595 CPDF_Dictionary* page = m_pDocument->GetPage(index); 1596 if (page) 1597 return page; 1598 if (!m_pLinearized || !m_pHintTables) 1599 return nullptr; 1600 1601 if (index == static_cast<int>(m_pLinearized->GetFirstPageNo())) 1602 return nullptr; 1603 FX_FILESIZE szPageStartPos = 0; 1604 FX_FILESIZE szPageLength = 0; 1605 uint32_t dwObjNum = 0; 1606 const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos, 1607 &szPageLength, &dwObjNum); 1608 if (!bPagePosGot || !dwObjNum) 1609 return nullptr; 1610 // We should say to the document, which object is the page. 1611 m_pDocument->SetPageObjNum(index, dwObjNum); 1612 // Page object already can be parsed in document. 1613 if (!m_pDocument->GetIndirectObject(dwObjNum)) { 1614 m_syntaxParser.InitParser( 1615 m_pFileRead, pdfium::base::checked_cast<uint32_t>(szPageStartPos)); 1616 m_pDocument->ReplaceIndirectObjectIfHigherGeneration( 1617 dwObjNum, ParseIndirectObjectAt(0, dwObjNum, m_pDocument)); 1618 } 1619 const bool is_page_valid = ValidatePage(index); 1620 (void)is_page_valid; 1621 ASSERT(is_page_valid); 1622 return m_pDocument->GetPage(index); 1623 } 1624 1625 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail( 1626 DownloadHints* pHints) { 1627 if (!m_pDocument) 1628 return FormAvailable; 1629 if (m_pLinearized) { 1630 DocAvailStatus nDocStatus = CheckLinearizedData(pHints); 1631 if (nDocStatus == DataError) 1632 return FormError; 1633 if (nDocStatus == DataNotAvailable) 1634 return FormNotAvailable; 1635 } 1636 1637 if (!m_bLinearizedFormParamLoad) { 1638 CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); 1639 if (!pRoot) 1640 return FormAvailable; 1641 1642 CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm"); 1643 if (!pAcroForm) 1644 return FormNotExist; 1645 1646 m_objs_array.push_back(pAcroForm->GetDict()); 1647 m_bLinearizedFormParamLoad = true; 1648 } 1649 1650 std::vector<CPDF_Object*> new_objs_array; 1651 if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) { 1652 m_objs_array = new_objs_array; 1653 return FormNotAvailable; 1654 } 1655 1656 m_objs_array.clear(); 1657 const bool is_form_valid = ValidateForm(); 1658 (void)is_form_valid; 1659 ASSERT(is_form_valid); 1660 return FormAvailable; 1661 } 1662 1663 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) { 1664 FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage); 1665 CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie()); 1666 if (!pPageDict) 1667 return false; 1668 std::vector<CPDF_Object*> obj_array; 1669 obj_array.push_back(pPageDict); 1670 std::vector<CPDF_Object*> dummy; 1671 return AreObjectsAvailable(obj_array, true, nullptr, dummy); 1672 } 1673 1674 bool CPDF_DataAvail::ValidateForm() { 1675 CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); 1676 if (!pRoot) 1677 return true; 1678 CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm"); 1679 if (!pAcroForm) 1680 return false; 1681 std::vector<CPDF_Object*> obj_array; 1682 obj_array.push_back(pAcroForm); 1683 std::vector<CPDF_Object*> dummy; 1684 return AreObjectsAvailable(obj_array, true, nullptr, dummy); 1685 } 1686 1687 CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {} 1688 1689 CPDF_DataAvail::PageNode::~PageNode() {} 1690