Home | History | Annotate | Download | only in parser
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/parser/cpdf_data_avail.h"
      8 
      9 #include <algorithm>
     10 #include <memory>
     11 #include <utility>
     12 
     13 #include "core/fpdfapi/cpdf_modulemgr.h"
     14 #include "core/fpdfapi/parser/cpdf_array.h"
     15 #include "core/fpdfapi/parser/cpdf_cross_ref_avail.h"
     16 #include "core/fpdfapi/parser/cpdf_dictionary.h"
     17 #include "core/fpdfapi/parser/cpdf_document.h"
     18 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
     19 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
     20 #include "core/fpdfapi/parser/cpdf_name.h"
     21 #include "core/fpdfapi/parser/cpdf_number.h"
     22 #include "core/fpdfapi/parser/cpdf_page_object_avail.h"
     23 #include "core/fpdfapi/parser/cpdf_read_validator.h"
     24 #include "core/fpdfapi/parser/cpdf_reference.h"
     25 #include "core/fpdfapi/parser/cpdf_stream.h"
     26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
     27 #include "core/fxcrt/cfx_memorystream.h"
     28 #include "core/fxcrt/fx_extension.h"
     29 #include "core/fxcrt/fx_safe_types.h"
     30 #include "third_party/base/numerics/safe_conversions.h"
     31 #include "third_party/base/ptr_util.h"
     32 #include "third_party/base/stl_util.h"
     33 
     34 namespace {
     35 
     36 // static
     37 const CPDF_Object* GetResourceObject(const CPDF_Dictionary* pDict) {
     38   constexpr size_t kMaxHierarchyDepth = 64;
     39   size_t depth = 0;
     40 
     41   const CPDF_Dictionary* dictionary_to_check = pDict;
     42   while (dictionary_to_check) {
     43     const CPDF_Object* result = dictionary_to_check->GetObjectFor("Resources");
     44     if (result)
     45       return result;
     46     const CPDF_Object* parent = dictionary_to_check->GetObjectFor("Parent");
     47     dictionary_to_check = parent ? parent->GetDict() : nullptr;
     48 
     49     if (++depth > kMaxHierarchyDepth) {
     50       // We have cycle in parents hierarchy.
     51       return nullptr;
     52     }
     53   }
     54   return nullptr;
     55 }
     56 
     57 class HintsScope {
     58  public:
     59   HintsScope(CPDF_ReadValidator* validator,
     60              CPDF_DataAvail::DownloadHints* hints)
     61       : validator_(validator) {
     62     ASSERT(validator_);
     63     validator_->SetDownloadHints(hints);
     64   }
     65 
     66   ~HintsScope() { validator_->SetDownloadHints(nullptr); }
     67 
     68  private:
     69   UnownedPtr<CPDF_ReadValidator> validator_;
     70 };
     71 
     72 }  // namespace
     73 
     74 CPDF_DataAvail::FileAvail::~FileAvail() {}
     75 
     76 CPDF_DataAvail::DownloadHints::~DownloadHints() {}
     77 
     78 CPDF_DataAvail::CPDF_DataAvail(
     79     FileAvail* pFileAvail,
     80     const RetainPtr<IFX_SeekableReadStream>& pFileRead,
     81     bool bSupportHintTable)
     82     : m_pFileAvail(pFileAvail),
     83       m_pFileRead(
     84           pdfium::MakeRetain<CPDF_ReadValidator>(pFileRead, m_pFileAvail)),
     85       m_dwFileLen(m_pFileRead->GetSize()),
     86       m_bSupportHintTable(bSupportHintTable) {}
     87 
     88 CPDF_DataAvail::~CPDF_DataAvail() {
     89   m_pHintTables.reset();
     90 }
     91 
     92 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
     93     DownloadHints* pHints) {
     94   if (!m_dwFileLen)
     95     return DataError;
     96 
     97   const HintsScope hints_scope(m_pFileRead.Get(), pHints);
     98 
     99   while (!m_bDocAvail) {
    100     if (!CheckDocStatus())
    101       return DataNotAvailable;
    102   }
    103 
    104   return DataAvailable;
    105 }
    106 
    107 bool CPDF_DataAvail::CheckDocStatus() {
    108   switch (m_docStatus) {
    109     case PDF_DATAAVAIL_HEADER:
    110       return CheckHeader();
    111     case PDF_DATAAVAIL_FIRSTPAGE:
    112       return CheckFirstPage();
    113     case PDF_DATAAVAIL_HINTTABLE:
    114       return CheckHintTables();
    115     case PDF_DATAAVAIL_LOADALLCROSSREF:
    116       return CheckAndLoadAllXref();
    117     case PDF_DATAAVAIL_LOADALLFILE:
    118       return LoadAllFile();
    119     case PDF_DATAAVAIL_ROOT:
    120       return CheckRoot();
    121     case PDF_DATAAVAIL_INFO:
    122       return CheckInfo();
    123     case PDF_DATAAVAIL_PAGETREE:
    124       if (m_bTotalLoadPageTree)
    125         return CheckPages();
    126       return LoadDocPages();
    127     case PDF_DATAAVAIL_PAGE:
    128       if (m_bTotalLoadPageTree)
    129         return CheckPage();
    130       m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD;
    131       return true;
    132     case PDF_DATAAVAIL_ERROR:
    133       return LoadAllFile();
    134     case PDF_DATAAVAIL_PAGE_LATERLOAD:
    135       m_docStatus = PDF_DATAAVAIL_PAGE;
    136     default:
    137       m_bDocAvail = true;
    138       return true;
    139   }
    140 }
    141 
    142 bool CPDF_DataAvail::CheckPageStatus() {
    143   switch (m_docStatus) {
    144     case PDF_DATAAVAIL_PAGETREE:
    145       return CheckPages();
    146     case PDF_DATAAVAIL_PAGE:
    147       return CheckPage();
    148     case PDF_DATAAVAIL_ERROR:
    149       return LoadAllFile();
    150     default:
    151       m_bPagesTreeLoad = true;
    152       m_bPagesLoad = true;
    153       return true;
    154   }
    155 }
    156 
    157 bool CPDF_DataAvail::LoadAllFile() {
    158   if (GetValidator()->CheckWholeFileAndRequestIfUnavailable()) {
    159     m_docStatus = PDF_DATAAVAIL_DONE;
    160     return true;
    161   }
    162   return false;
    163 }
    164 
    165 bool CPDF_DataAvail::CheckAndLoadAllXref() {
    166   if (!m_pCrossRefAvail) {
    167     const CPDF_ReadValidator::Session read_session(GetValidator().Get());
    168     const FX_FILESIZE last_xref_offset = m_parser.ParseStartXRef();
    169     if (GetValidator()->has_read_problems())
    170       return false;
    171 
    172     if (last_xref_offset <= 0) {
    173       m_docStatus = PDF_DATAAVAIL_ERROR;
    174       return false;
    175     }
    176 
    177     m_pCrossRefAvail = pdfium::MakeUnique<CPDF_CrossRefAvail>(GetSyntaxParser(),
    178                                                               last_xref_offset);
    179   }
    180 
    181   switch (m_pCrossRefAvail->CheckAvail()) {
    182     case DocAvailStatus::DataAvailable:
    183       break;
    184     case DocAvailStatus::DataNotAvailable:
    185       return false;
    186     case DocAvailStatus::DataError:
    187       m_docStatus = PDF_DATAAVAIL_ERROR;
    188       return false;
    189     default:
    190       NOTREACHED();
    191       return false;
    192   }
    193 
    194   if (!m_parser.LoadAllCrossRefV4(m_pCrossRefAvail->last_crossref_offset()) &&
    195       !m_parser.LoadAllCrossRefV5(m_pCrossRefAvail->last_crossref_offset())) {
    196     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
    197     return false;
    198   }
    199 
    200   m_dwRootObjNum = m_parser.GetRootObjNum();
    201   m_dwInfoObjNum = m_parser.GetInfoObjNum();
    202   m_pCurrentParser = &m_parser;
    203   m_docStatus = PDF_DATAAVAIL_ROOT;
    204   return true;
    205 }
    206 
    207 std::unique_ptr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
    208                                                        bool* pExistInFile) {
    209   CPDF_Parser* pParser = nullptr;
    210 
    211   if (pExistInFile)
    212     *pExistInFile = true;
    213 
    214   pParser = m_pDocument ? m_pDocument->GetParser() : &m_parser;
    215 
    216   std::unique_ptr<CPDF_Object> pRet;
    217   if (pParser) {
    218     const CPDF_ReadValidator::Session read_session(GetValidator().Get());
    219     pRet = pParser->ParseIndirectObject(nullptr, objnum);
    220     if (GetValidator()->has_read_problems())
    221       return nullptr;
    222   }
    223 
    224   if (!pRet && pExistInFile)
    225     *pExistInFile = false;
    226 
    227   return pRet;
    228 }
    229 
    230 bool CPDF_DataAvail::CheckInfo() {
    231   bool bExist = false;
    232   std::unique_ptr<CPDF_Object> pInfo = GetObject(m_dwInfoObjNum, &bExist);
    233   if (bExist && !pInfo) {
    234     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
    235       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
    236       return true;
    237     }
    238     return false;
    239   }
    240   m_docStatus = PDF_DATAAVAIL_PAGETREE;
    241   return true;
    242 }
    243 
    244 bool CPDF_DataAvail::CheckRoot() {
    245   bool bExist = false;
    246   m_pRoot = GetObject(m_dwRootObjNum, &bExist);
    247   if (!bExist) {
    248     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
    249     return true;
    250   }
    251 
    252   if (!m_pRoot) {
    253     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
    254       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
    255       return true;
    256     }
    257     return false;
    258   }
    259 
    260   CPDF_Dictionary* pDict = m_pRoot->GetDict();
    261   if (!pDict) {
    262     m_docStatus = PDF_DATAAVAIL_ERROR;
    263     return false;
    264   }
    265 
    266   CPDF_Reference* pRef = ToReference(pDict->GetObjectFor("Pages"));
    267   if (!pRef) {
    268     m_docStatus = PDF_DATAAVAIL_ERROR;
    269     return false;
    270   }
    271 
    272   m_PagesObjNum = pRef->GetRefObjNum();
    273 
    274   m_docStatus = m_dwInfoObjNum ? PDF_DATAAVAIL_INFO : PDF_DATAAVAIL_PAGETREE;
    275   return true;
    276 }
    277 
    278 bool CPDF_DataAvail::PreparePageItem() {
    279   const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
    280   CPDF_Reference* pRef =
    281       ToReference(pRoot ? pRoot->GetObjectFor("Pages") : nullptr);
    282   if (!pRef) {
    283     m_docStatus = PDF_DATAAVAIL_ERROR;
    284     return false;
    285   }
    286 
    287   m_PagesObjNum = pRef->GetRefObjNum();
    288   m_pCurrentParser = m_pDocument->GetParser();
    289   m_docStatus = PDF_DATAAVAIL_PAGETREE;
    290   return true;
    291 }
    292 
    293 bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
    294   return m_pageMapCheckState.insert(dwPage).second;
    295 }
    296 
    297 void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
    298   m_pageMapCheckState.erase(dwPage);
    299 }
    300 
    301 bool CPDF_DataAvail::CheckPage() {
    302   std::vector<uint32_t> UnavailObjList;
    303   for (uint32_t dwPageObjNum : m_PageObjList) {
    304     bool bExists = false;
    305     std::unique_ptr<CPDF_Object> pObj = GetObject(dwPageObjNum, &bExists);
    306     if (!pObj) {
    307       if (bExists)
    308         UnavailObjList.push_back(dwPageObjNum);
    309       continue;
    310     }
    311     CPDF_Array* pArray = ToArray(pObj.get());
    312     if (pArray) {
    313       for (const auto& pArrayObj : *pArray) {
    314         if (CPDF_Reference* pRef = ToReference(pArrayObj.get()))
    315           UnavailObjList.push_back(pRef->GetRefObjNum());
    316       }
    317     }
    318     if (!pObj->IsDictionary())
    319       continue;
    320 
    321     ByteString type = pObj->GetDict()->GetStringFor("Type");
    322     if (type == "Pages") {
    323       m_PagesArray.push_back(std::move(pObj));
    324       continue;
    325     }
    326   }
    327   m_PageObjList.clear();
    328   if (!UnavailObjList.empty()) {
    329     m_PageObjList = std::move(UnavailObjList);
    330     return false;
    331   }
    332   size_t iPages = m_PagesArray.size();
    333   for (size_t i = 0; i < iPages; ++i) {
    334     std::unique_ptr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
    335     if (pPages && !GetPageKids(m_pCurrentParser, pPages.get())) {
    336       m_PagesArray.clear();
    337       m_docStatus = PDF_DATAAVAIL_ERROR;
    338       return false;
    339     }
    340   }
    341   m_PagesArray.clear();
    342   if (m_PageObjList.empty())
    343     m_docStatus = PDF_DATAAVAIL_DONE;
    344 
    345   return true;
    346 }
    347 
    348 bool CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) {
    349   if (!pParser) {
    350     m_docStatus = PDF_DATAAVAIL_ERROR;
    351     return false;
    352   }
    353 
    354   CPDF_Dictionary* pDict = pPages->GetDict();
    355   CPDF_Object* pKids = pDict ? pDict->GetObjectFor("Kids") : nullptr;
    356   if (!pKids)
    357     return true;
    358 
    359   switch (pKids->GetType()) {
    360     case CPDF_Object::REFERENCE:
    361       m_PageObjList.push_back(pKids->AsReference()->GetRefObjNum());
    362       break;
    363     case CPDF_Object::ARRAY: {
    364       CPDF_Array* pKidsArray = pKids->AsArray();
    365       for (size_t i = 0; i < pKidsArray->GetCount(); ++i) {
    366         if (CPDF_Reference* pRef = ToReference(pKidsArray->GetObjectAt(i)))
    367           m_PageObjList.push_back(pRef->GetRefObjNum());
    368       }
    369       break;
    370     }
    371     default:
    372       m_docStatus = PDF_DATAAVAIL_ERROR;
    373       return false;
    374   }
    375   return true;
    376 }
    377 
    378 bool CPDF_DataAvail::CheckPages() {
    379   bool bExists = false;
    380   std::unique_ptr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
    381   if (!bExists) {
    382     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
    383     return true;
    384   }
    385 
    386   if (!pPages) {
    387     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
    388       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
    389       return true;
    390     }
    391     return false;
    392   }
    393 
    394   if (!GetPageKids(m_pCurrentParser, pPages.get())) {
    395     m_docStatus = PDF_DATAAVAIL_ERROR;
    396     return false;
    397   }
    398 
    399   m_docStatus = PDF_DATAAVAIL_PAGE;
    400   return true;
    401 }
    402 
    403 bool CPDF_DataAvail::CheckHeader() {
    404   switch (CheckHeaderAndLinearized()) {
    405     case DocAvailStatus::DataAvailable:
    406       m_docStatus = m_pLinearized ? PDF_DATAAVAIL_FIRSTPAGE
    407                                   : PDF_DATAAVAIL_LOADALLCROSSREF;
    408       return true;
    409     case DocAvailStatus::DataNotAvailable:
    410       return false;
    411     case DocAvailStatus::DataError:
    412       m_docStatus = PDF_DATAAVAIL_ERROR;
    413       return true;
    414     default:
    415       NOTREACHED();
    416       return false;
    417   }
    418 }
    419 
    420 bool CPDF_DataAvail::CheckFirstPage() {
    421   if (!m_pLinearized->GetFirstPageEndOffset() ||
    422       !m_pLinearized->GetFileSize() ||
    423       !m_pLinearized->GetMainXRefTableFirstEntryOffset()) {
    424     m_docStatus = PDF_DATAAVAIL_ERROR;
    425     return false;
    426   }
    427 
    428   uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
    429   dwEnd += 512;
    430   if ((FX_FILESIZE)dwEnd > m_dwFileLen)
    431     dwEnd = (uint32_t)m_dwFileLen;
    432 
    433   const FX_FILESIZE start_pos = m_dwFileLen > 1024 ? 1024 : m_dwFileLen;
    434   const size_t data_size = dwEnd > 1024 ? static_cast<size_t>(dwEnd - 1024) : 0;
    435   if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(start_pos,
    436                                                              data_size))
    437     return false;
    438 
    439   m_docStatus =
    440       m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE;
    441   return true;
    442 }
    443 
    444 bool CPDF_DataAvail::CheckHintTables() {
    445   if (m_pLinearized->GetPageCount() <= 1) {
    446     m_docStatus = PDF_DATAAVAIL_DONE;
    447     return true;
    448   }
    449   if (!m_pLinearized->HasHintTable()) {
    450     m_docStatus = PDF_DATAAVAIL_ERROR;
    451     return false;
    452   }
    453 
    454   const FX_FILESIZE szHintStart = m_pLinearized->GetHintStart();
    455   const uint32_t szHintLength = m_pLinearized->GetHintLength();
    456 
    457   if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(szHintStart,
    458                                                              szHintLength))
    459     return false;
    460 
    461   auto pHintTables = pdfium::MakeUnique<CPDF_HintTables>(GetValidator().Get(),
    462                                                          m_pLinearized.get());
    463   std::unique_ptr<CPDF_Object> pHintStream =
    464       ParseIndirectObjectAt(szHintStart, 0);
    465   CPDF_Stream* pStream = ToStream(pHintStream.get());
    466   if (pStream && pHintTables->LoadHintStream(pStream))
    467     m_pHintTables = std::move(pHintTables);
    468 
    469   m_docStatus = PDF_DATAAVAIL_DONE;
    470   return true;
    471 }
    472 
    473 std::unique_ptr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
    474     FX_FILESIZE pos,
    475     uint32_t objnum,
    476     CPDF_IndirectObjectHolder* pObjList) {
    477   const FX_FILESIZE SavedPos = GetSyntaxParser()->GetPos();
    478   GetSyntaxParser()->SetPos(pos);
    479   std::unique_ptr<CPDF_Object> result = GetSyntaxParser()->GetIndirectObject(
    480       pObjList, CPDF_SyntaxParser::ParseType::kLoose);
    481   GetSyntaxParser()->SetPos(SavedPos);
    482   return (result && (!objnum || result->GetObjNum() == objnum))
    483              ? std::move(result)
    484              : nullptr;
    485 }
    486 
    487 CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
    488   switch (CheckHeaderAndLinearized()) {
    489     case DocAvailStatus::DataAvailable:
    490       return m_pLinearized ? DocLinearizationStatus::Linearized
    491                            : DocLinearizationStatus::NotLinearized;
    492     case DocAvailStatus::DataNotAvailable:
    493       return DocLinearizationStatus::LinearizationUnknown;
    494     case DocAvailStatus::DataError:
    495       return DocLinearizationStatus::NotLinearized;
    496     default:
    497       NOTREACHED();
    498       return DocLinearizationStatus::LinearizationUnknown;
    499   }
    500 }
    501 
    502 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckHeaderAndLinearized() {
    503   if (m_bHeaderAvail)
    504     return DocAvailStatus::DataAvailable;
    505 
    506   const CPDF_ReadValidator::Session read_session(GetValidator().Get());
    507   const int32_t header_offset = GetHeaderOffset(GetValidator());
    508   if (GetValidator()->has_read_problems())
    509     return DocAvailStatus::DataNotAvailable;
    510 
    511   if (header_offset == kInvalidHeaderOffset)
    512     return DocAvailStatus::DataError;
    513 
    514   m_parser.m_pSyntax->InitParserWithValidator(GetValidator(), header_offset);
    515   m_pLinearized = m_parser.ParseLinearizedHeader();
    516   if (GetValidator()->has_read_problems())
    517     return DocAvailStatus::DataNotAvailable;
    518 
    519   m_bHeaderAvail = true;
    520   return DocAvailStatus::DataAvailable;
    521 }
    522 
    523 bool CPDF_DataAvail::CheckPage(uint32_t dwPage) {
    524   while (true) {
    525     switch (m_docStatus) {
    526       case PDF_DATAAVAIL_PAGETREE:
    527         if (!LoadDocPages())
    528           return false;
    529         break;
    530       case PDF_DATAAVAIL_PAGE:
    531         if (!LoadDocPage(dwPage))
    532           return false;
    533         break;
    534       case PDF_DATAAVAIL_ERROR:
    535         return LoadAllFile();
    536       default:
    537         m_bPagesTreeLoad = true;
    538         m_bPagesLoad = true;
    539         m_bCurPageDictLoadOK = true;
    540         m_docStatus = PDF_DATAAVAIL_PAGE;
    541         return true;
    542     }
    543   }
    544 }
    545 
    546 bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
    547                                         PageNode* pPageNode) {
    548   bool bExists = false;
    549   std::unique_ptr<CPDF_Object> pPages = GetObject(dwPageNo, &bExists);
    550   if (!bExists) {
    551     m_docStatus = PDF_DATAAVAIL_ERROR;
    552     return false;
    553   }
    554 
    555   if (!pPages)
    556     return false;
    557 
    558   CPDF_Array* pArray = pPages->AsArray();
    559   if (!pArray) {
    560     m_docStatus = PDF_DATAAVAIL_ERROR;
    561     return false;
    562   }
    563 
    564   pPageNode->m_type = PDF_PAGENODE_PAGES;
    565   for (size_t i = 0; i < pArray->GetCount(); ++i) {
    566     CPDF_Reference* pKid = ToReference(pArray->GetObjectAt(i));
    567     if (!pKid)
    568       continue;
    569 
    570     auto pNode = pdfium::MakeUnique<PageNode>();
    571     pNode->m_dwPageNo = pKid->GetRefObjNum();
    572     pPageNode->m_ChildNodes.push_back(std::move(pNode));
    573   }
    574   return true;
    575 }
    576 
    577 bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
    578                                           PageNode* pPageNode) {
    579   bool bExists = false;
    580   std::unique_ptr<CPDF_Object> pPage = GetObject(dwPageNo, &bExists);
    581   if (!bExists) {
    582     m_docStatus = PDF_DATAAVAIL_ERROR;
    583     return false;
    584   }
    585 
    586   if (!pPage)
    587     return false;
    588 
    589   if (pPage->IsArray()) {
    590     pPageNode->m_dwPageNo = dwPageNo;
    591     pPageNode->m_type = PDF_PAGENODE_ARRAY;
    592     return true;
    593   }
    594 
    595   if (!pPage->IsDictionary()) {
    596     m_docStatus = PDF_DATAAVAIL_ERROR;
    597     return false;
    598   }
    599 
    600   pPageNode->m_dwPageNo = dwPageNo;
    601   CPDF_Dictionary* pDict = pPage->GetDict();
    602   const ByteString type = pDict->GetStringFor("Type");
    603   if (type == "Page") {
    604     pPageNode->m_type = PDF_PAGENODE_PAGE;
    605     return true;
    606   }
    607 
    608   if (type != "Pages") {
    609     m_docStatus = PDF_DATAAVAIL_ERROR;
    610     return false;
    611   }
    612 
    613   pPageNode->m_type = PDF_PAGENODE_PAGES;
    614   CPDF_Object* pKids = pDict->GetObjectFor("Kids");
    615   if (!pKids) {
    616     m_docStatus = PDF_DATAAVAIL_PAGE;
    617     return true;
    618   }
    619 
    620   switch (pKids->GetType()) {
    621     case CPDF_Object::REFERENCE: {
    622       CPDF_Reference* pKid = pKids->AsReference();
    623       auto pNode = pdfium::MakeUnique<PageNode>();
    624       pNode->m_dwPageNo = pKid->GetRefObjNum();
    625       pPageNode->m_ChildNodes.push_back(std::move(pNode));
    626       break;
    627     }
    628     case CPDF_Object::ARRAY: {
    629       CPDF_Array* pKidsArray = pKids->AsArray();
    630       for (size_t i = 0; i < pKidsArray->GetCount(); ++i) {
    631         CPDF_Reference* pKid = ToReference(pKidsArray->GetObjectAt(i));
    632         if (!pKid)
    633           continue;
    634 
    635         auto pNode = pdfium::MakeUnique<PageNode>();
    636         pNode->m_dwPageNo = pKid->GetRefObjNum();
    637         pPageNode->m_ChildNodes.push_back(std::move(pNode));
    638       }
    639       break;
    640     }
    641     default:
    642       break;
    643   }
    644   return true;
    645 }
    646 
    647 bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
    648                                    int32_t iPage,
    649                                    int32_t& iCount,
    650                                    int level) {
    651   if (level >= kMaxPageRecursionDepth)
    652     return false;
    653 
    654   int32_t iSize = pdfium::CollectionSize<int32_t>(pageNode.m_ChildNodes);
    655   if (iSize <= 0 || iPage >= iSize) {
    656     m_docStatus = PDF_DATAAVAIL_ERROR;
    657     return false;
    658   }
    659   for (int32_t i = 0; i < iSize; ++i) {
    660     PageNode* pNode = pageNode.m_ChildNodes[i].get();
    661     if (!pNode)
    662       continue;
    663 
    664     if (pNode->m_type == PDF_PAGENODE_UNKNOWN) {
    665       // Updates the type for the unknown page node.
    666       if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode))
    667         return false;
    668     }
    669     if (pNode->m_type == PDF_PAGENODE_ARRAY) {
    670       // Updates a more specific type for the array page node.
    671       if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode))
    672         return false;
    673     }
    674     switch (pNode->m_type) {
    675       case PDF_PAGENODE_PAGE:
    676         iCount++;
    677         if (iPage == iCount && m_pDocument)
    678           m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
    679         break;
    680       case PDF_PAGENODE_PAGES:
    681         if (!CheckPageNode(*pNode, iPage, iCount, level + 1))
    682           return false;
    683         break;
    684       case PDF_PAGENODE_UNKNOWN:
    685       case PDF_PAGENODE_ARRAY:
    686         // Already converted above, error if we get here.
    687         return false;
    688     }
    689     if (iPage == iCount) {
    690       m_docStatus = PDF_DATAAVAIL_DONE;
    691       return true;
    692     }
    693   }
    694   return true;
    695 }
    696 
    697 bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage) {
    698   FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
    699   int32_t iPage = safePage.ValueOrDie();
    700   if (m_pDocument->GetPageCount() <= iPage ||
    701       m_pDocument->IsPageLoaded(iPage)) {
    702     m_docStatus = PDF_DATAAVAIL_DONE;
    703     return true;
    704   }
    705   if (m_PageNode.m_type == PDF_PAGENODE_PAGE) {
    706     m_docStatus = iPage == 0 ? PDF_DATAAVAIL_DONE : PDF_DATAAVAIL_ERROR;
    707     return true;
    708   }
    709   int32_t iCount = -1;
    710   return CheckPageNode(m_PageNode, iPage, iCount, 0);
    711 }
    712 
    713 bool CPDF_DataAvail::CheckPageCount() {
    714   bool bExists = false;
    715   std::unique_ptr<CPDF_Object> pPages = GetObject(m_PagesObjNum, &bExists);
    716   if (!bExists) {
    717     m_docStatus = PDF_DATAAVAIL_ERROR;
    718     return false;
    719   }
    720   if (!pPages)
    721     return false;
    722 
    723   CPDF_Dictionary* pPagesDict = pPages->GetDict();
    724   if (!pPagesDict) {
    725     m_docStatus = PDF_DATAAVAIL_ERROR;
    726     return false;
    727   }
    728   if (!pPagesDict->KeyExist("Kids"))
    729     return true;
    730 
    731   return pPagesDict->GetIntegerFor("Count") > 0;
    732 }
    733 
    734 bool CPDF_DataAvail::LoadDocPages() {
    735   if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode))
    736     return false;
    737 
    738   if (CheckPageCount()) {
    739     m_docStatus = PDF_DATAAVAIL_PAGE;
    740     return true;
    741   }
    742 
    743   m_bTotalLoadPageTree = true;
    744   return false;
    745 }
    746 
    747 bool CPDF_DataAvail::LoadPages() {
    748   while (!m_bPagesTreeLoad) {
    749     if (!CheckPageStatus())
    750       return false;
    751   }
    752 
    753   if (m_bPagesLoad)
    754     return true;
    755 
    756   m_pDocument->LoadPages();
    757   return false;
    758 }
    759 
    760 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData() {
    761   if (m_bLinearedDataOK)
    762     return DataAvailable;
    763   ASSERT(m_pLinearized);
    764   if (!m_pLinearized->GetMainXRefTableFirstEntryOffset() || !m_pDocument ||
    765       !m_pDocument->GetParser() || !m_pDocument->GetParser()->GetTrailer()) {
    766     return DataError;
    767   }
    768 
    769   if (!m_bMainXRefLoadTried) {
    770     const FX_SAFE_FILESIZE main_xref_offset =
    771         m_pDocument->GetParser()->GetTrailer()->GetIntegerFor("Prev");
    772     if (!main_xref_offset.IsValid())
    773       return DataError;
    774 
    775     if (main_xref_offset.ValueOrDie() == 0)
    776       return DataAvailable;
    777 
    778     FX_SAFE_SIZE_T data_size = m_dwFileLen;
    779     data_size -= main_xref_offset.ValueOrDie();
    780     if (!data_size.IsValid())
    781       return DataError;
    782 
    783     if (!GetValidator()->CheckDataRangeAndRequestIfUnavailable(
    784             main_xref_offset.ValueOrDie(), data_size.ValueOrDie()))
    785       return DataNotAvailable;
    786 
    787     CPDF_Parser::Error eRet =
    788         m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
    789     m_bMainXRefLoadTried = true;
    790     if (eRet != CPDF_Parser::SUCCESS)
    791       return DataError;
    792 
    793     if (!PreparePageItem())
    794       return DataNotAvailable;
    795 
    796     m_bMainXRefLoadedOK = true;
    797     m_bLinearedDataOK = true;
    798   }
    799 
    800   return m_bLinearedDataOK ? DataAvailable : DataNotAvailable;
    801 }
    802 
    803 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
    804     uint32_t dwPage,
    805     DownloadHints* pHints) {
    806   if (!m_pDocument)
    807     return DataError;
    808 
    809   const FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
    810   if (!safePage.IsValid())
    811     return DataError;
    812 
    813   if (safePage.ValueOrDie() >= m_pDocument->GetPageCount()) {
    814     // This is XFA page.
    815     return DataAvailable;
    816   }
    817 
    818   if (IsFirstCheck(dwPage)) {
    819     m_bCurPageDictLoadOK = false;
    820   }
    821 
    822   if (pdfium::ContainsKey(m_pagesLoadState, dwPage))
    823     return DataAvailable;
    824 
    825   const HintsScope hints_scope(GetValidator().Get(), pHints);
    826 
    827   if (m_pLinearized) {
    828     if (dwPage == m_pLinearized->GetFirstPageNo()) {
    829       CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
    830       if (!pPageDict)
    831         return DataError;
    832 
    833       auto page_num_obj = std::make_pair(
    834           dwPage, pdfium::MakeUnique<CPDF_PageObjectAvail>(
    835                       GetValidator().Get(), m_pDocument, pPageDict));
    836 
    837       CPDF_PageObjectAvail* page_obj_avail =
    838           m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
    839       // TODO(art-snake): Check resources.
    840       return page_obj_avail->CheckAvail();
    841     }
    842 
    843     DocAvailStatus nResult = CheckLinearizedData();
    844     if (nResult != DataAvailable)
    845       return nResult;
    846 
    847     if (m_pHintTables) {
    848       nResult = m_pHintTables->CheckPage(dwPage);
    849       if (nResult != DataAvailable)
    850         return nResult;
    851       if (GetPage(dwPage)) {
    852         m_pagesLoadState.insert(dwPage);
    853         return DataAvailable;
    854       }
    855     }
    856 
    857     if (!m_bMainXRefLoadedOK) {
    858       if (!LoadAllFile())
    859         return DataNotAvailable;
    860       m_pDocument->GetParser()->RebuildCrossRef();
    861       ResetFirstCheck(dwPage);
    862       return DataAvailable;
    863     }
    864     if (m_bTotalLoadPageTree) {
    865       if (!LoadPages())
    866         return DataNotAvailable;
    867     } else {
    868       if (!m_bCurPageDictLoadOK && !CheckPage(dwPage))
    869         return DataNotAvailable;
    870     }
    871   } else {
    872     if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK && !CheckPage(dwPage)) {
    873       return DataNotAvailable;
    874     }
    875   }
    876 
    877   if (CheckAcroForm() == DocFormStatus::FormNotAvailable)
    878     return DataNotAvailable;
    879 
    880   CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
    881   if (!pPageDict)
    882     return DataError;
    883 
    884   {
    885     auto page_num_obj = std::make_pair(
    886         dwPage, pdfium::MakeUnique<CPDF_PageObjectAvail>(
    887                     GetValidator().Get(), m_pDocument, pPageDict));
    888     CPDF_PageObjectAvail* page_obj_avail =
    889         m_PagesObjAvail.insert(std::move(page_num_obj)).first->second.get();
    890     const DocAvailStatus status = page_obj_avail->CheckAvail();
    891     if (status != DocAvailStatus::DataAvailable)
    892       return status;
    893   }
    894 
    895   const DocAvailStatus resources_status = CheckResources(pPageDict);
    896   if (resources_status != DocAvailStatus::DataAvailable)
    897     return resources_status;
    898 
    899   m_bCurPageDictLoadOK = false;
    900   ResetFirstCheck(dwPage);
    901   m_pagesLoadState.insert(dwPage);
    902   return DataAvailable;
    903 }
    904 
    905 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckResources(
    906     const CPDF_Dictionary* page) {
    907   ASSERT(page);
    908   const CPDF_ReadValidator::Session read_session(GetValidator().Get());
    909   const CPDF_Object* resources = GetResourceObject(page);
    910   if (GetValidator()->has_read_problems())
    911     return DocAvailStatus::DataNotAvailable;
    912 
    913   if (!resources)
    914     return DocAvailStatus::DataAvailable;
    915 
    916   CPDF_PageObjectAvail* resource_avail =
    917       m_PagesResourcesAvail
    918           .insert(std::make_pair(
    919               resources, pdfium::MakeUnique<CPDF_PageObjectAvail>(
    920                              GetValidator().Get(), m_pDocument, resources)))
    921           .first->second.get();
    922   return resource_avail->CheckAvail();
    923 }
    924 
    925 RetainPtr<IFX_SeekableReadStream> CPDF_DataAvail::GetFileRead() const {
    926   return m_pFileRead;
    927 }
    928 
    929 RetainPtr<CPDF_ReadValidator> CPDF_DataAvail::GetValidator() const {
    930   return m_pFileRead;
    931 }
    932 
    933 CPDF_SyntaxParser* CPDF_DataAvail::GetSyntaxParser() const {
    934   return m_pDocument ? m_pDocument->GetParser()->m_pSyntax.get()
    935                      : m_parser.m_pSyntax.get();
    936 }
    937 
    938 int CPDF_DataAvail::GetPageCount() const {
    939   if (m_pLinearized)
    940     return m_pLinearized->GetPageCount();
    941   return m_pDocument ? m_pDocument->GetPageCount() : 0;
    942 }
    943 
    944 CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) {
    945   if (!m_pDocument || index < 0 || index >= GetPageCount())
    946     return nullptr;
    947   CPDF_Dictionary* page = m_pDocument->GetPage(index);
    948   if (page)
    949     return page;
    950   if (!m_pLinearized || !m_pHintTables)
    951     return nullptr;
    952 
    953   if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
    954     return nullptr;
    955   FX_FILESIZE szPageStartPos = 0;
    956   FX_FILESIZE szPageLength = 0;
    957   uint32_t dwObjNum = 0;
    958   const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
    959                                                      &szPageLength, &dwObjNum);
    960   if (!bPagePosGot || !dwObjNum)
    961     return nullptr;
    962   // We should say to the document, which object is the page.
    963   m_pDocument->SetPageObjNum(index, dwObjNum);
    964   // Page object already can be parsed in document.
    965   if (!m_pDocument->GetIndirectObject(dwObjNum)) {
    966     m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
    967         dwObjNum, ParseIndirectObjectAt(szPageStartPos, dwObjNum, m_pDocument));
    968   }
    969   if (!ValidatePage(index))
    970     return nullptr;
    971   return m_pDocument->GetPage(index);
    972 }
    973 
    974 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
    975     DownloadHints* pHints) {
    976   const HintsScope hints_scope(GetValidator().Get(), pHints);
    977   return CheckAcroForm();
    978 }
    979 
    980 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::CheckAcroForm() {
    981   if (!m_pDocument)
    982     return FormAvailable;
    983 
    984   if (m_pLinearized) {
    985     DocAvailStatus nDocStatus = CheckLinearizedData();
    986     if (nDocStatus == DataError)
    987       return FormError;
    988     if (nDocStatus == DataNotAvailable)
    989       return FormNotAvailable;
    990   }
    991 
    992   if (!m_pFormAvail) {
    993     const CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
    994     if (!pRoot)
    995       return FormAvailable;
    996 
    997     CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
    998     if (!pAcroForm)
    999       return FormNotExist;
   1000 
   1001     m_pFormAvail = pdfium::MakeUnique<CPDF_PageObjectAvail>(
   1002         GetValidator().Get(), m_pDocument, pAcroForm);
   1003   }
   1004   switch (m_pFormAvail->CheckAvail()) {
   1005     case DocAvailStatus::DataError:
   1006       return DocFormStatus::FormError;
   1007     case DocAvailStatus::DataNotAvailable:
   1008       return DocFormStatus::FormNotAvailable;
   1009     case DocAvailStatus::DataAvailable:
   1010       return DocFormStatus::FormAvailable;
   1011     default:
   1012       NOTREACHED();
   1013   }
   1014   return DocFormStatus::FormError;
   1015 }
   1016 
   1017 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) {
   1018   FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
   1019   CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
   1020   if (!pPageDict)
   1021     return false;
   1022   CPDF_PageObjectAvail obj_avail(GetValidator().Get(), m_pDocument, pPageDict);
   1023   return obj_avail.CheckAvail() == DocAvailStatus::DataAvailable;
   1024 }
   1025 
   1026 std::pair<CPDF_Parser::Error, std::unique_ptr<CPDF_Document>>
   1027 CPDF_DataAvail::ParseDocument(const char* password) {
   1028   if (m_pDocument) {
   1029     // We already returned parsed document.
   1030     return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
   1031   }
   1032   auto parser = pdfium::MakeUnique<CPDF_Parser>();
   1033   parser->SetPassword(password);
   1034   auto document = pdfium::MakeUnique<CPDF_Document>(std::move(parser));
   1035 
   1036   CPDF_ReadValidator::Session read_session(GetValidator().Get());
   1037   CPDF_Parser::Error error = document->GetParser()->StartLinearizedParse(
   1038       GetFileRead(), document.get());
   1039 
   1040   // Additional check, that all ok.
   1041   if (GetValidator()->has_read_problems()) {
   1042     NOTREACHED();
   1043     return std::make_pair(CPDF_Parser::HANDLER_ERROR, nullptr);
   1044   }
   1045 
   1046   if (error != CPDF_Parser::SUCCESS)
   1047     return std::make_pair(error, nullptr);
   1048 
   1049   m_pDocument = document.get();
   1050   return std::make_pair(CPDF_Parser::SUCCESS, std::move(document));
   1051 }
   1052 
   1053 CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {}
   1054 
   1055 CPDF_DataAvail::PageNode::~PageNode() {}
   1056