Home | History | Annotate | Download | only in parser
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/parser/cpdf_data_avail.h"
      8 
      9 #include <algorithm>
     10 #include <memory>
     11 #include <utility>
     12 
     13 #include "core/fpdfapi/cpdf_modulemgr.h"
     14 #include "core/fpdfapi/parser/cpdf_array.h"
     15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
     16 #include "core/fpdfapi/parser/cpdf_document.h"
     17 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
     18 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
     19 #include "core/fpdfapi/parser/cpdf_name.h"
     20 #include "core/fpdfapi/parser/cpdf_number.h"
     21 #include "core/fpdfapi/parser/cpdf_reference.h"
     22 #include "core/fpdfapi/parser/cpdf_stream.h"
     23 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
     24 #include "core/fxcrt/fx_ext.h"
     25 #include "core/fxcrt/fx_safe_types.h"
     26 #include "third_party/base/numerics/safe_conversions.h"
     27 #include "third_party/base/ptr_util.h"
     28 #include "third_party/base/stl_util.h"
     29 
     30 CPDF_DataAvail::FileAvail::~FileAvail() {}
     31 
     32 CPDF_DataAvail::DownloadHints::~DownloadHints() {}
     33 
     34 // static
     35 int CPDF_DataAvail::s_CurrentDataAvailRecursionDepth = 0;
     36 
     37 CPDF_DataAvail::CPDF_DataAvail(
     38     FileAvail* pFileAvail,
     39     const CFX_RetainPtr<IFX_SeekableReadStream>& pFileRead,
     40     bool bSupportHintTable)
     41     : m_pFileAvail(pFileAvail), m_pFileRead(pFileRead) {
     42   m_Pos = 0;
     43   m_dwFileLen = 0;
     44   if (m_pFileRead) {
     45     m_dwFileLen = (uint32_t)m_pFileRead->GetSize();
     46   }
     47   m_dwCurrentOffset = 0;
     48   m_dwXRefOffset = 0;
     49   m_dwTrailerOffset = 0;
     50   m_bufferOffset = 0;
     51   m_bufferSize = 0;
     52   m_PagesObjNum = 0;
     53   m_dwCurrentXRefSteam = 0;
     54   m_dwAcroFormObjNum = 0;
     55   m_dwInfoObjNum = 0;
     56   m_pDocument = 0;
     57   m_dwEncryptObjNum = 0;
     58   m_dwPrevXRefOffset = 0;
     59   m_dwLastXRefOffset = 0;
     60   m_bDocAvail = false;
     61   m_bMainXRefLoadTried = false;
     62   m_bDocAvail = false;
     63   m_bPagesLoad = false;
     64   m_bPagesTreeLoad = false;
     65   m_bMainXRefLoadedOK = false;
     66   m_bAnnotsLoad = false;
     67   m_bHaveAcroForm = false;
     68   m_bAcroFormLoad = false;
     69   m_bPageLoadedOK = false;
     70   m_bNeedDownLoadResource = false;
     71   m_bLinearizedFormParamLoad = false;
     72   m_pTrailer = nullptr;
     73   m_pCurrentParser = nullptr;
     74   m_pAcroForm = nullptr;
     75   m_pPageDict = nullptr;
     76   m_pPageResource = nullptr;
     77   m_docStatus = PDF_DATAAVAIL_HEADER;
     78   m_bTotalLoadPageTree = false;
     79   m_bCurPageDictLoadOK = false;
     80   m_bLinearedDataOK = false;
     81   m_bSupportHintTable = bSupportHintTable;
     82 }
     83 
     84 CPDF_DataAvail::~CPDF_DataAvail() {
     85   m_pHintTables.reset();
     86   for (CPDF_Object* pObject : m_arrayAcroforms)
     87     delete pObject;
     88 }
     89 
     90 void CPDF_DataAvail::SetDocument(CPDF_Document* pDoc) {
     91   m_pDocument = pDoc;
     92 }
     93 
     94 uint32_t CPDF_DataAvail::GetObjectSize(uint32_t objnum, FX_FILESIZE& offset) {
     95   CPDF_Parser* pParser = m_pDocument->GetParser();
     96   if (!pParser || !pParser->IsValidObjectNumber(objnum))
     97     return 0;
     98 
     99   if (pParser->GetObjectType(objnum) == 2)
    100     objnum = pParser->GetObjectPositionOrZero(objnum);
    101 
    102   if (pParser->GetObjectType(objnum) != 1 &&
    103       pParser->GetObjectType(objnum) != 255) {
    104     return 0;
    105   }
    106 
    107   offset = pParser->GetObjectPositionOrZero(objnum);
    108   if (offset == 0)
    109     return 0;
    110 
    111   auto it = pParser->m_SortedOffset.find(offset);
    112   if (it == pParser->m_SortedOffset.end() ||
    113       ++it == pParser->m_SortedOffset.end()) {
    114     return 0;
    115   }
    116   return *it - offset;
    117 }
    118 
    119 bool CPDF_DataAvail::AreObjectsAvailable(std::vector<CPDF_Object*>& obj_array,
    120                                          bool bParsePage,
    121                                          DownloadHints* pHints,
    122                                          std::vector<CPDF_Object*>& ret_array) {
    123   if (obj_array.empty())
    124     return true;
    125 
    126   uint32_t count = 0;
    127   std::vector<CPDF_Object*> new_obj_array;
    128   for (CPDF_Object* pObj : obj_array) {
    129     if (!pObj)
    130       continue;
    131 
    132     int32_t type = pObj->GetType();
    133     switch (type) {
    134       case CPDF_Object::ARRAY: {
    135         CPDF_Array* pArray = pObj->AsArray();
    136         for (size_t k = 0; k < pArray->GetCount(); ++k)
    137           new_obj_array.push_back(pArray->GetObjectAt(k));
    138       } break;
    139       case CPDF_Object::STREAM:
    140         pObj = pObj->GetDict();
    141       case CPDF_Object::DICTIONARY: {
    142         CPDF_Dictionary* pDict = pObj->GetDict();
    143         if (pDict && pDict->GetStringFor("Type") == "Page" && !bParsePage)
    144           continue;
    145 
    146         for (const auto& it : *pDict) {
    147           if (it.first != "Parent")
    148             new_obj_array.push_back(it.second.get());
    149         }
    150       } break;
    151       case CPDF_Object::REFERENCE: {
    152         CPDF_Reference* pRef = pObj->AsReference();
    153         uint32_t dwNum = pRef->GetRefObjNum();
    154 
    155         FX_FILESIZE offset;
    156         uint32_t size = GetObjectSize(dwNum, offset);
    157         if (size == 0 || offset < 0 || offset >= m_dwFileLen)
    158           break;
    159 
    160         if (!IsDataAvail(offset, size, pHints)) {
    161           ret_array.push_back(pObj);
    162           count++;
    163         } else if (!pdfium::ContainsKey(m_ObjectSet, dwNum)) {
    164           m_ObjectSet.insert(dwNum);
    165           CPDF_Object* pReferred =
    166               m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum());
    167           if (pReferred)
    168             new_obj_array.push_back(pReferred);
    169         }
    170       } break;
    171     }
    172   }
    173 
    174   if (count > 0) {
    175     for (CPDF_Object* pObj : new_obj_array) {
    176       CPDF_Reference* pRef = pObj->AsReference();
    177       if (pRef && pdfium::ContainsKey(m_ObjectSet, pRef->GetRefObjNum()))
    178         continue;
    179       ret_array.push_back(pObj);
    180     }
    181     return false;
    182   }
    183 
    184   obj_array = new_obj_array;
    185   return AreObjectsAvailable(obj_array, false, pHints, ret_array);
    186 }
    187 
    188 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail(
    189     DownloadHints* pHints) {
    190   if (!m_dwFileLen && m_pFileRead) {
    191     m_dwFileLen = (uint32_t)m_pFileRead->GetSize();
    192     if (!m_dwFileLen)
    193       return DataError;
    194   }
    195 
    196   while (!m_bDocAvail) {
    197     if (!CheckDocStatus(pHints))
    198       return DataNotAvailable;
    199   }
    200 
    201   return DataAvailable;
    202 }
    203 
    204 bool CPDF_DataAvail::CheckAcroFormSubObject(DownloadHints* pHints) {
    205   if (m_objs_array.empty()) {
    206     m_ObjectSet.clear();
    207     std::vector<CPDF_Object*> obj_array = m_arrayAcroforms;
    208     if (!AreObjectsAvailable(obj_array, false, pHints, m_objs_array))
    209       return false;
    210 
    211     m_objs_array.clear();
    212     return true;
    213   }
    214 
    215   std::vector<CPDF_Object*> new_objs_array;
    216   if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
    217     m_objs_array = new_objs_array;
    218     return false;
    219   }
    220 
    221   for (CPDF_Object* pObject : m_arrayAcroforms)
    222     delete pObject;
    223 
    224   m_arrayAcroforms.clear();
    225   return true;
    226 }
    227 
    228 bool CPDF_DataAvail::CheckAcroForm(DownloadHints* pHints) {
    229   bool bExist = false;
    230   m_pAcroForm = GetObject(m_dwAcroFormObjNum, pHints, &bExist).release();
    231   if (!bExist) {
    232     m_docStatus = PDF_DATAAVAIL_PAGETREE;
    233     return true;
    234   }
    235 
    236   if (!m_pAcroForm) {
    237     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
    238       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
    239       return true;
    240     }
    241     return false;
    242   }
    243 
    244   m_arrayAcroforms.push_back(m_pAcroForm);
    245   m_docStatus = PDF_DATAAVAIL_PAGETREE;
    246   return true;
    247 }
    248 
    249 bool CPDF_DataAvail::CheckDocStatus(DownloadHints* pHints) {
    250   switch (m_docStatus) {
    251     case PDF_DATAAVAIL_HEADER:
    252       return CheckHeader(pHints);
    253     case PDF_DATAAVAIL_FIRSTPAGE:
    254       return CheckFirstPage(pHints);
    255     case PDF_DATAAVAIL_HINTTABLE:
    256       return CheckHintTables(pHints);
    257     case PDF_DATAAVAIL_END:
    258       return CheckEnd(pHints);
    259     case PDF_DATAAVAIL_CROSSREF:
    260       return CheckCrossRef(pHints);
    261     case PDF_DATAAVAIL_CROSSREF_ITEM:
    262       return CheckCrossRefItem(pHints);
    263     case PDF_DATAAVAIL_CROSSREF_STREAM:
    264       return CheckAllCrossRefStream(pHints);
    265     case PDF_DATAAVAIL_TRAILER:
    266       return CheckTrailer(pHints);
    267     case PDF_DATAAVAIL_TRAILER_APPEND:
    268       return CheckTrailerAppend(pHints);
    269     case PDF_DATAAVAIL_LOADALLCROSSREF:
    270       return LoadAllXref(pHints);
    271     case PDF_DATAAVAIL_LOADALLFILE:
    272       return LoadAllFile(pHints);
    273     case PDF_DATAAVAIL_ROOT:
    274       return CheckRoot(pHints);
    275     case PDF_DATAAVAIL_INFO:
    276       return CheckInfo(pHints);
    277     case PDF_DATAAVAIL_ACROFORM:
    278       return CheckAcroForm(pHints);
    279     case PDF_DATAAVAIL_PAGETREE:
    280       if (m_bTotalLoadPageTree)
    281         return CheckPages(pHints);
    282       return LoadDocPages(pHints);
    283     case PDF_DATAAVAIL_PAGE:
    284       if (m_bTotalLoadPageTree)
    285         return CheckPage(pHints);
    286       m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD;
    287       return true;
    288     case PDF_DATAAVAIL_ERROR:
    289       return LoadAllFile(pHints);
    290     case PDF_DATAAVAIL_PAGE_LATERLOAD:
    291       m_docStatus = PDF_DATAAVAIL_PAGE;
    292     default:
    293       m_bDocAvail = true;
    294       return true;
    295   }
    296 }
    297 
    298 bool CPDF_DataAvail::CheckPageStatus(DownloadHints* pHints) {
    299   switch (m_docStatus) {
    300     case PDF_DATAAVAIL_PAGETREE:
    301       return CheckPages(pHints);
    302     case PDF_DATAAVAIL_PAGE:
    303       return CheckPage(pHints);
    304     case PDF_DATAAVAIL_ERROR:
    305       return LoadAllFile(pHints);
    306     default:
    307       m_bPagesTreeLoad = true;
    308       m_bPagesLoad = true;
    309       return true;
    310   }
    311 }
    312 
    313 bool CPDF_DataAvail::LoadAllFile(DownloadHints* pHints) {
    314   if (m_pFileAvail->IsDataAvail(0, (uint32_t)m_dwFileLen)) {
    315     m_docStatus = PDF_DATAAVAIL_DONE;
    316     return true;
    317   }
    318 
    319   pHints->AddSegment(0, (uint32_t)m_dwFileLen);
    320   return false;
    321 }
    322 
    323 bool CPDF_DataAvail::LoadAllXref(DownloadHints* pHints) {
    324   m_parser.m_pSyntax->InitParser(m_pFileRead, (uint32_t)m_dwHeaderOffset);
    325   if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) &&
    326       !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) {
    327     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
    328     return false;
    329   }
    330 
    331   m_dwRootObjNum = m_parser.GetRootObjNum();
    332   m_dwInfoObjNum = m_parser.GetInfoObjNum();
    333   m_pCurrentParser = &m_parser;
    334   m_docStatus = PDF_DATAAVAIL_ROOT;
    335   return true;
    336 }
    337 
    338 std::unique_ptr<CPDF_Object> CPDF_DataAvail::GetObject(uint32_t objnum,
    339                                                        DownloadHints* pHints,
    340                                                        bool* pExistInFile) {
    341   uint32_t size = 0;
    342   FX_FILESIZE offset = 0;
    343   CPDF_Parser* pParser = nullptr;
    344 
    345   if (pExistInFile)
    346     *pExistInFile = true;
    347 
    348   if (m_pDocument) {
    349     size = GetObjectSize(objnum, offset);
    350     pParser = m_pDocument->GetParser();
    351   } else {
    352     size = (uint32_t)m_parser.GetObjectSize(objnum);
    353     offset = m_parser.GetObjectOffset(objnum);
    354     pParser = &m_parser;
    355   }
    356 
    357   if (!IsDataAvail(offset, size, pHints))
    358     return nullptr;
    359 
    360   std::unique_ptr<CPDF_Object> pRet;
    361   if (pParser)
    362     pRet = pParser->ParseIndirectObject(nullptr, objnum);
    363 
    364   if (!pRet && pExistInFile)
    365     *pExistInFile = false;
    366 
    367   return pRet;
    368 }
    369 
    370 bool CPDF_DataAvail::CheckInfo(DownloadHints* pHints) {
    371   bool bExist = false;
    372   std::unique_ptr<CPDF_Object> pInfo =
    373       GetObject(m_dwInfoObjNum, pHints, &bExist);
    374   if (bExist && !pInfo) {
    375     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
    376       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
    377       return true;
    378     }
    379     if (m_Pos == m_dwFileLen)
    380       m_docStatus = PDF_DATAAVAIL_ERROR;
    381     return false;
    382   }
    383   m_docStatus =
    384       m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE;
    385   return true;
    386 }
    387 
    388 bool CPDF_DataAvail::CheckRoot(DownloadHints* pHints) {
    389   bool bExist = false;
    390   m_pRoot = GetObject(m_dwRootObjNum, pHints, &bExist);
    391   if (!bExist) {
    392     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
    393     return true;
    394   }
    395 
    396   if (!m_pRoot) {
    397     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
    398       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
    399       return true;
    400     }
    401     return false;
    402   }
    403 
    404   CPDF_Dictionary* pDict = m_pRoot->GetDict();
    405   if (!pDict) {
    406     m_docStatus = PDF_DATAAVAIL_ERROR;
    407     return false;
    408   }
    409 
    410   CPDF_Reference* pRef = ToReference(pDict->GetObjectFor("Pages"));
    411   if (!pRef) {
    412     m_docStatus = PDF_DATAAVAIL_ERROR;
    413     return false;
    414   }
    415 
    416   m_PagesObjNum = pRef->GetRefObjNum();
    417   CPDF_Reference* pAcroFormRef =
    418       ToReference(m_pRoot->GetDict()->GetObjectFor("AcroForm"));
    419   if (pAcroFormRef) {
    420     m_bHaveAcroForm = true;
    421     m_dwAcroFormObjNum = pAcroFormRef->GetRefObjNum();
    422   }
    423 
    424   if (m_dwInfoObjNum) {
    425     m_docStatus = PDF_DATAAVAIL_INFO;
    426   } else {
    427     m_docStatus =
    428         m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE;
    429   }
    430   return true;
    431 }
    432 
    433 bool CPDF_DataAvail::PreparePageItem() {
    434   CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
    435   CPDF_Reference* pRef =
    436       ToReference(pRoot ? pRoot->GetObjectFor("Pages") : nullptr);
    437   if (!pRef) {
    438     m_docStatus = PDF_DATAAVAIL_ERROR;
    439     return false;
    440   }
    441 
    442   m_PagesObjNum = pRef->GetRefObjNum();
    443   m_pCurrentParser = m_pDocument->GetParser();
    444   m_docStatus = PDF_DATAAVAIL_PAGETREE;
    445   return true;
    446 }
    447 
    448 bool CPDF_DataAvail::IsFirstCheck(uint32_t dwPage) {
    449   return m_pageMapCheckState.insert(dwPage).second;
    450 }
    451 
    452 void CPDF_DataAvail::ResetFirstCheck(uint32_t dwPage) {
    453   m_pageMapCheckState.erase(dwPage);
    454 }
    455 
    456 bool CPDF_DataAvail::CheckPage(DownloadHints* pHints) {
    457   std::vector<uint32_t> UnavailObjList;
    458   for (uint32_t dwPageObjNum : m_PageObjList) {
    459     bool bExists = false;
    460     std::unique_ptr<CPDF_Object> pObj =
    461         GetObject(dwPageObjNum, pHints, &bExists);
    462     if (!pObj) {
    463       if (bExists)
    464         UnavailObjList.push_back(dwPageObjNum);
    465       continue;
    466     }
    467     CPDF_Array* pArray = ToArray(pObj.get());
    468     if (pArray) {
    469       for (const auto& pArrayObj : *pArray) {
    470         if (CPDF_Reference* pRef = ToReference(pArrayObj.get()))
    471           UnavailObjList.push_back(pRef->GetRefObjNum());
    472       }
    473     }
    474     if (!pObj->IsDictionary())
    475       continue;
    476 
    477     CFX_ByteString type = pObj->GetDict()->GetStringFor("Type");
    478     if (type == "Pages") {
    479       m_PagesArray.push_back(std::move(pObj));
    480       continue;
    481     }
    482   }
    483   m_PageObjList.clear();
    484   if (!UnavailObjList.empty()) {
    485     m_PageObjList = std::move(UnavailObjList);
    486     return false;
    487   }
    488   size_t iPages = m_PagesArray.size();
    489   for (size_t i = 0; i < iPages; ++i) {
    490     std::unique_ptr<CPDF_Object> pPages = std::move(m_PagesArray[i]);
    491     if (pPages && !GetPageKids(m_pCurrentParser, pPages.get())) {
    492       m_PagesArray.clear();
    493       m_docStatus = PDF_DATAAVAIL_ERROR;
    494       return false;
    495     }
    496   }
    497   m_PagesArray.clear();
    498   if (m_PageObjList.empty())
    499     m_docStatus = PDF_DATAAVAIL_DONE;
    500 
    501   return true;
    502 }
    503 
    504 bool CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) {
    505   if (!pParser) {
    506     m_docStatus = PDF_DATAAVAIL_ERROR;
    507     return false;
    508   }
    509 
    510   CPDF_Dictionary* pDict = pPages->GetDict();
    511   CPDF_Object* pKids = pDict ? pDict->GetObjectFor("Kids") : nullptr;
    512   if (!pKids)
    513     return true;
    514 
    515   switch (pKids->GetType()) {
    516     case CPDF_Object::REFERENCE:
    517       m_PageObjList.push_back(pKids->AsReference()->GetRefObjNum());
    518       break;
    519     case CPDF_Object::ARRAY: {
    520       CPDF_Array* pKidsArray = pKids->AsArray();
    521       for (size_t i = 0; i < pKidsArray->GetCount(); ++i) {
    522         if (CPDF_Reference* pRef = ToReference(pKidsArray->GetObjectAt(i)))
    523           m_PageObjList.push_back(pRef->GetRefObjNum());
    524       }
    525     } break;
    526     default:
    527       m_docStatus = PDF_DATAAVAIL_ERROR;
    528       return false;
    529   }
    530   return true;
    531 }
    532 
    533 bool CPDF_DataAvail::CheckPages(DownloadHints* pHints) {
    534   bool bExists = false;
    535   std::unique_ptr<CPDF_Object> pPages =
    536       GetObject(m_PagesObjNum, pHints, &bExists);
    537   if (!bExists) {
    538     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
    539     return true;
    540   }
    541 
    542   if (!pPages) {
    543     if (m_docStatus == PDF_DATAAVAIL_ERROR) {
    544       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
    545       return true;
    546     }
    547     return false;
    548   }
    549 
    550   if (!GetPageKids(m_pCurrentParser, pPages.get())) {
    551     m_docStatus = PDF_DATAAVAIL_ERROR;
    552     return false;
    553   }
    554 
    555   m_docStatus = PDF_DATAAVAIL_PAGE;
    556   return true;
    557 }
    558 
    559 bool CPDF_DataAvail::CheckHeader(DownloadHints* pHints) {
    560   ASSERT(m_dwFileLen >= 0);
    561   const uint32_t kReqSize = std::min(static_cast<uint32_t>(m_dwFileLen), 1024U);
    562 
    563   if (m_pFileAvail->IsDataAvail(0, kReqSize)) {
    564     uint8_t buffer[1024];
    565     m_pFileRead->ReadBlock(buffer, 0, kReqSize);
    566 
    567     if (IsLinearizedFile(buffer, kReqSize)) {
    568       m_docStatus = PDF_DATAAVAIL_FIRSTPAGE;
    569     } else {
    570       if (m_docStatus == PDF_DATAAVAIL_ERROR)
    571         return false;
    572       m_docStatus = PDF_DATAAVAIL_END;
    573     }
    574     return true;
    575   }
    576 
    577   pHints->AddSegment(0, kReqSize);
    578   return false;
    579 }
    580 
    581 bool CPDF_DataAvail::CheckFirstPage(DownloadHints* pHints) {
    582   if (!m_pLinearized->GetFirstPageEndOffset() ||
    583       !m_pLinearized->GetFileSize() || !m_pLinearized->GetLastXRefOffset()) {
    584     m_docStatus = PDF_DATAAVAIL_ERROR;
    585     return false;
    586   }
    587 
    588   uint32_t dwEnd = m_pLinearized->GetFirstPageEndOffset();
    589   dwEnd += 512;
    590   if ((FX_FILESIZE)dwEnd > m_dwFileLen)
    591     dwEnd = (uint32_t)m_dwFileLen;
    592 
    593   int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen);
    594   int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0;
    595   if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) {
    596     pHints->AddSegment(iStartPos, iSize);
    597     return false;
    598   }
    599 
    600   m_docStatus =
    601       m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE;
    602   return true;
    603 }
    604 
    605 bool CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset,
    606                                  uint32_t size,
    607                                  DownloadHints* pHints) {
    608   if (offset < 0 || offset > m_dwFileLen)
    609     return true;
    610 
    611   FX_SAFE_FILESIZE safeSize = offset;
    612   safeSize += size;
    613   safeSize += 512;
    614   if (!safeSize.IsValid() || safeSize.ValueOrDie() > m_dwFileLen)
    615     size = m_dwFileLen - offset;
    616   else
    617     size += 512;
    618 
    619   if (!m_pFileAvail->IsDataAvail(offset, size)) {
    620     if (pHints)
    621       pHints->AddSegment(offset, size);
    622     return false;
    623   }
    624   return true;
    625 }
    626 
    627 bool CPDF_DataAvail::CheckHintTables(DownloadHints* pHints) {
    628   if (m_pLinearized->GetPageCount() <= 1) {
    629     m_docStatus = PDF_DATAAVAIL_DONE;
    630     return true;
    631   }
    632   if (!m_pLinearized->HasHintTable()) {
    633     m_docStatus = PDF_DATAAVAIL_ERROR;
    634     return false;
    635   }
    636 
    637   FX_FILESIZE szHintStart = m_pLinearized->GetHintStart();
    638   FX_FILESIZE szHintLength = m_pLinearized->GetHintLength();
    639 
    640   if (!IsDataAvail(szHintStart, szHintLength, pHints))
    641     return false;
    642 
    643   m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset);
    644 
    645   std::unique_ptr<CPDF_HintTables> pHintTables(
    646       new CPDF_HintTables(this, m_pLinearized.get()));
    647   std::unique_ptr<CPDF_Object> pHintStream(
    648       ParseIndirectObjectAt(szHintStart, 0));
    649   CPDF_Stream* pStream = ToStream(pHintStream.get());
    650   if (pStream && pHintTables->LoadHintStream(pStream))
    651     m_pHintTables = std::move(pHintTables);
    652 
    653   m_docStatus = PDF_DATAAVAIL_DONE;
    654   return true;
    655 }
    656 
    657 std::unique_ptr<CPDF_Object> CPDF_DataAvail::ParseIndirectObjectAt(
    658     FX_FILESIZE pos,
    659     uint32_t objnum,
    660     CPDF_IndirectObjectHolder* pObjList) {
    661   FX_FILESIZE SavedPos = m_syntaxParser.SavePos();
    662   m_syntaxParser.RestorePos(pos);
    663 
    664   bool bIsNumber;
    665   CFX_ByteString word = m_syntaxParser.GetNextWord(&bIsNumber);
    666   if (!bIsNumber)
    667     return nullptr;
    668 
    669   uint32_t parser_objnum = FXSYS_atoui(word.c_str());
    670   if (objnum && parser_objnum != objnum)
    671     return nullptr;
    672 
    673   word = m_syntaxParser.GetNextWord(&bIsNumber);
    674   if (!bIsNumber)
    675     return nullptr;
    676 
    677   uint32_t gennum = FXSYS_atoui(word.c_str());
    678   if (m_syntaxParser.GetKeyword() != "obj") {
    679     m_syntaxParser.RestorePos(SavedPos);
    680     return nullptr;
    681   }
    682 
    683   std::unique_ptr<CPDF_Object> pObj =
    684       m_syntaxParser.GetObject(pObjList, parser_objnum, gennum, true);
    685   m_syntaxParser.RestorePos(SavedPos);
    686   return pObj;
    687 }
    688 
    689 CPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() {
    690   const uint32_t kReqSize = 1024;
    691   if (!m_pFileAvail->IsDataAvail(0, kReqSize))
    692     return LinearizationUnknown;
    693 
    694   if (!m_pFileRead)
    695     return NotLinearized;
    696 
    697   FX_FILESIZE dwSize = m_pFileRead->GetSize();
    698   if (dwSize < (FX_FILESIZE)kReqSize)
    699     return LinearizationUnknown;
    700 
    701   uint8_t buffer[1024];
    702   m_pFileRead->ReadBlock(buffer, 0, kReqSize);
    703   if (IsLinearizedFile(buffer, kReqSize))
    704     return Linearized;
    705 
    706   return NotLinearized;
    707 }
    708 
    709 bool CPDF_DataAvail::IsLinearized() {
    710   return !!m_pLinearized;
    711 }
    712 
    713 bool CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, uint32_t dwLen) {
    714   if (m_pLinearized)
    715     return true;
    716 
    717   CFX_RetainPtr<IFX_MemoryStream> file =
    718       IFX_MemoryStream::Create(pData, (size_t)dwLen, false);
    719   int32_t offset = GetHeaderOffset(file);
    720   if (offset == -1) {
    721     m_docStatus = PDF_DATAAVAIL_ERROR;
    722     return false;
    723   }
    724 
    725   m_dwHeaderOffset = offset;
    726   m_syntaxParser.InitParser(file, offset);
    727   m_syntaxParser.RestorePos(m_syntaxParser.m_HeaderOffset + 9);
    728 
    729   bool bNumber;
    730   CFX_ByteString wordObjNum = m_syntaxParser.GetNextWord(&bNumber);
    731   if (!bNumber)
    732     return false;
    733 
    734   uint32_t objnum = FXSYS_atoui(wordObjNum.c_str());
    735   m_pLinearized = CPDF_LinearizedHeader::CreateForObject(
    736       ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum));
    737   if (!m_pLinearized ||
    738       m_pLinearized->GetFileSize() != m_pFileRead->GetSize()) {
    739     m_pLinearized.reset();
    740     return false;
    741   }
    742   return true;
    743 }
    744 
    745 bool CPDF_DataAvail::CheckEnd(DownloadHints* pHints) {
    746   uint32_t req_pos = (uint32_t)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0);
    747   uint32_t dwSize = (uint32_t)(m_dwFileLen - req_pos);
    748 
    749   if (m_pFileAvail->IsDataAvail(req_pos, dwSize)) {
    750     uint8_t buffer[1024];
    751     m_pFileRead->ReadBlock(buffer, req_pos, dwSize);
    752 
    753     CFX_RetainPtr<IFX_MemoryStream> file =
    754         IFX_MemoryStream::Create(buffer, (size_t)dwSize, false);
    755     m_syntaxParser.InitParser(file, 0);
    756     m_syntaxParser.RestorePos(dwSize - 1);
    757 
    758     if (m_syntaxParser.SearchWord("startxref", true, false, dwSize)) {
    759       m_syntaxParser.GetNextWord(nullptr);
    760 
    761       bool bNumber;
    762       CFX_ByteString xrefpos_str = m_syntaxParser.GetNextWord(&bNumber);
    763       if (!bNumber) {
    764         m_docStatus = PDF_DATAAVAIL_ERROR;
    765         return false;
    766       }
    767 
    768       m_dwXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str.c_str());
    769       if (!m_dwXRefOffset || m_dwXRefOffset > m_dwFileLen) {
    770         m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
    771         return true;
    772       }
    773 
    774       m_dwLastXRefOffset = m_dwXRefOffset;
    775       SetStartOffset(m_dwXRefOffset);
    776       m_docStatus = PDF_DATAAVAIL_CROSSREF;
    777       return true;
    778     }
    779 
    780     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
    781     return true;
    782   }
    783 
    784   pHints->AddSegment(req_pos, dwSize);
    785   return false;
    786 }
    787 
    788 int32_t CPDF_DataAvail::CheckCrossRefStream(DownloadHints* pHints,
    789                                             FX_FILESIZE& xref_offset) {
    790   xref_offset = 0;
    791   uint32_t req_size =
    792       (uint32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
    793 
    794   if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) {
    795     int32_t iSize = (int32_t)(m_Pos + req_size - m_dwCurrentXRefSteam);
    796     CFX_BinaryBuf buf(iSize);
    797     uint8_t* pBuf = buf.GetBuffer();
    798 
    799     m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize);
    800 
    801     CFX_RetainPtr<IFX_MemoryStream> file =
    802         IFX_MemoryStream::Create(pBuf, (size_t)iSize, false);
    803     m_parser.m_pSyntax->InitParser(file, 0);
    804 
    805     bool bNumber;
    806     CFX_ByteString objnum = m_parser.m_pSyntax->GetNextWord(&bNumber);
    807     if (!bNumber)
    808       return -1;
    809 
    810     uint32_t objNum = FXSYS_atoui(objnum.c_str());
    811     std::unique_ptr<CPDF_Object> pObj =
    812         m_parser.ParseIndirectObjectAt(nullptr, 0, objNum);
    813 
    814     if (!pObj) {
    815       m_Pos += m_parser.m_pSyntax->SavePos();
    816       return 0;
    817     }
    818 
    819     CPDF_Dictionary* pDict = pObj->GetDict();
    820     CPDF_Name* pName = ToName(pDict ? pDict->GetObjectFor("Type") : nullptr);
    821     if (pName && pName->GetString() == "XRef") {
    822       m_Pos += m_parser.m_pSyntax->SavePos();
    823       xref_offset = pObj->GetDict()->GetIntegerFor("Prev");
    824       return 1;
    825     }
    826     return -1;
    827   }
    828   pHints->AddSegment(m_Pos, req_size);
    829   return 0;
    830 }
    831 
    832 void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset) {
    833   m_Pos = dwOffset;
    834 }
    835 
    836 bool CPDF_DataAvail::GetNextToken(CFX_ByteString& token) {
    837   uint8_t ch;
    838   if (!GetNextChar(ch))
    839     return false;
    840 
    841   while (1) {
    842     while (PDFCharIsWhitespace(ch)) {
    843       if (!GetNextChar(ch))
    844         return false;
    845     }
    846 
    847     if (ch != '%')
    848       break;
    849 
    850     while (1) {
    851       if (!GetNextChar(ch))
    852         return false;
    853       if (PDFCharIsLineEnding(ch))
    854         break;
    855     }
    856   }
    857 
    858   uint8_t buffer[256];
    859   uint32_t index = 0;
    860   if (PDFCharIsDelimiter(ch)) {
    861     buffer[index++] = ch;
    862     if (ch == '/') {
    863       while (1) {
    864         if (!GetNextChar(ch))
    865           return false;
    866 
    867         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
    868           m_Pos--;
    869           CFX_ByteString ret(buffer, index);
    870           token = ret;
    871           return true;
    872         }
    873 
    874         if (index < sizeof(buffer))
    875           buffer[index++] = ch;
    876       }
    877     } else if (ch == '<') {
    878       if (!GetNextChar(ch))
    879         return false;
    880 
    881       if (ch == '<')
    882         buffer[index++] = ch;
    883       else
    884         m_Pos--;
    885     } else if (ch == '>') {
    886       if (!GetNextChar(ch))
    887         return false;
    888 
    889       if (ch == '>')
    890         buffer[index++] = ch;
    891       else
    892         m_Pos--;
    893     }
    894 
    895     CFX_ByteString ret(buffer, index);
    896     token = ret;
    897     return true;
    898   }
    899 
    900   while (1) {
    901     if (index < sizeof(buffer))
    902       buffer[index++] = ch;
    903 
    904     if (!GetNextChar(ch))
    905       return false;
    906 
    907     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
    908       m_Pos--;
    909       break;
    910     }
    911   }
    912 
    913   token = CFX_ByteString(buffer, index);
    914   return true;
    915 }
    916 
    917 bool CPDF_DataAvail::GetNextChar(uint8_t& ch) {
    918   FX_FILESIZE pos = m_Pos;
    919   if (pos >= m_dwFileLen)
    920     return false;
    921 
    922   if (m_bufferOffset >= pos ||
    923       (FX_FILESIZE)(m_bufferOffset + m_bufferSize) <= pos) {
    924     FX_FILESIZE read_pos = pos;
    925     uint32_t read_size = 512;
    926     if ((FX_FILESIZE)read_size > m_dwFileLen)
    927       read_size = (uint32_t)m_dwFileLen;
    928 
    929     if ((FX_FILESIZE)(read_pos + read_size) > m_dwFileLen)
    930       read_pos = m_dwFileLen - read_size;
    931 
    932     if (!m_pFileRead->ReadBlock(m_bufferData, read_pos, read_size))
    933       return false;
    934 
    935     m_bufferOffset = read_pos;
    936     m_bufferSize = read_size;
    937   }
    938   ch = m_bufferData[pos - m_bufferOffset];
    939   m_Pos++;
    940   return true;
    941 }
    942 
    943 bool CPDF_DataAvail::CheckCrossRefItem(DownloadHints* pHints) {
    944   int32_t iSize = 0;
    945   CFX_ByteString token;
    946   while (1) {
    947     if (!GetNextToken(token)) {
    948       iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
    949       pHints->AddSegment(m_Pos, iSize);
    950       return false;
    951     }
    952 
    953     if (token == "trailer") {
    954       m_dwTrailerOffset = m_Pos;
    955       m_docStatus = PDF_DATAAVAIL_TRAILER;
    956       return true;
    957     }
    958   }
    959 }
    960 
    961 bool CPDF_DataAvail::CheckAllCrossRefStream(DownloadHints* pHints) {
    962   FX_FILESIZE xref_offset = 0;
    963 
    964   int32_t nRet = CheckCrossRefStream(pHints, xref_offset);
    965   if (nRet == 1) {
    966     if (!xref_offset) {
    967       m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF;
    968     } else {
    969       m_dwCurrentXRefSteam = xref_offset;
    970       m_Pos = xref_offset;
    971     }
    972     return true;
    973   }
    974 
    975   if (nRet == -1)
    976     m_docStatus = PDF_DATAAVAIL_ERROR;
    977   return false;
    978 }
    979 
    980 bool CPDF_DataAvail::CheckCrossRef(DownloadHints* pHints) {
    981   int32_t iSize = 0;
    982   CFX_ByteString token;
    983   if (!GetNextToken(token)) {
    984     iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
    985     pHints->AddSegment(m_Pos, iSize);
    986     return false;
    987   }
    988 
    989   if (token == "xref") {
    990     while (1) {
    991       if (!GetNextToken(token)) {
    992         iSize =
    993             (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
    994         pHints->AddSegment(m_Pos, iSize);
    995         m_docStatus = PDF_DATAAVAIL_CROSSREF_ITEM;
    996         return false;
    997       }
    998 
    999       if (token == "trailer") {
   1000         m_dwTrailerOffset = m_Pos;
   1001         m_docStatus = PDF_DATAAVAIL_TRAILER;
   1002         return true;
   1003       }
   1004     }
   1005   } else {
   1006     m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
   1007     return true;
   1008   }
   1009   return false;
   1010 }
   1011 
   1012 bool CPDF_DataAvail::CheckTrailerAppend(DownloadHints* pHints) {
   1013   if (m_Pos < m_dwFileLen) {
   1014     FX_FILESIZE dwAppendPos = m_Pos + m_syntaxParser.SavePos();
   1015     int32_t iSize = (int32_t)(
   1016         dwAppendPos + 512 > m_dwFileLen ? m_dwFileLen - dwAppendPos : 512);
   1017 
   1018     if (!m_pFileAvail->IsDataAvail(dwAppendPos, iSize)) {
   1019       pHints->AddSegment(dwAppendPos, iSize);
   1020       return false;
   1021     }
   1022   }
   1023 
   1024   if (m_dwPrevXRefOffset) {
   1025     SetStartOffset(m_dwPrevXRefOffset);
   1026     m_docStatus = PDF_DATAAVAIL_CROSSREF;
   1027   } else {
   1028     m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF;
   1029   }
   1030   return true;
   1031 }
   1032 
   1033 bool CPDF_DataAvail::CheckTrailer(DownloadHints* pHints) {
   1034   int32_t iTrailerSize =
   1035       (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512);
   1036   if (m_pFileAvail->IsDataAvail(m_Pos, iTrailerSize)) {
   1037     int32_t iSize = (int32_t)(m_Pos + iTrailerSize - m_dwTrailerOffset);
   1038     CFX_BinaryBuf buf(iSize);
   1039     uint8_t* pBuf = buf.GetBuffer();
   1040     if (!pBuf) {
   1041       m_docStatus = PDF_DATAAVAIL_ERROR;
   1042       return false;
   1043     }
   1044 
   1045     if (!m_pFileRead->ReadBlock(pBuf, m_dwTrailerOffset, iSize))
   1046       return false;
   1047 
   1048     CFX_RetainPtr<IFX_MemoryStream> file =
   1049         IFX_MemoryStream::Create(pBuf, (size_t)iSize, false);
   1050     m_syntaxParser.InitParser(file, 0);
   1051 
   1052     std::unique_ptr<CPDF_Object> pTrailer(
   1053         m_syntaxParser.GetObject(nullptr, 0, 0, true));
   1054     if (!pTrailer) {
   1055       m_Pos += m_syntaxParser.SavePos();
   1056       pHints->AddSegment(m_Pos, iTrailerSize);
   1057       return false;
   1058     }
   1059 
   1060     if (!pTrailer->IsDictionary())
   1061       return false;
   1062 
   1063     CPDF_Dictionary* pTrailerDict = pTrailer->GetDict();
   1064     CPDF_Object* pEncrypt = pTrailerDict->GetObjectFor("Encrypt");
   1065     if (ToReference(pEncrypt)) {
   1066       m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
   1067       return true;
   1068     }
   1069 
   1070     uint32_t xrefpos = GetDirectInteger(pTrailerDict, "Prev");
   1071     if (xrefpos) {
   1072       m_dwPrevXRefOffset = GetDirectInteger(pTrailerDict, "XRefStm");
   1073       if (m_dwPrevXRefOffset) {
   1074         m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
   1075       } else {
   1076         m_dwPrevXRefOffset = xrefpos;
   1077         if (m_dwPrevXRefOffset >= m_dwFileLen) {
   1078           m_docStatus = PDF_DATAAVAIL_LOADALLFILE;
   1079         } else {
   1080           SetStartOffset(m_dwPrevXRefOffset);
   1081           m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
   1082         }
   1083       }
   1084       return true;
   1085     }
   1086     m_dwPrevXRefOffset = 0;
   1087     m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND;
   1088     return true;
   1089   }
   1090   pHints->AddSegment(m_Pos, iTrailerSize);
   1091   return false;
   1092 }
   1093 
   1094 bool CPDF_DataAvail::CheckPage(uint32_t dwPage, DownloadHints* pHints) {
   1095   while (true) {
   1096     switch (m_docStatus) {
   1097       case PDF_DATAAVAIL_PAGETREE:
   1098         if (!LoadDocPages(pHints))
   1099           return false;
   1100         break;
   1101       case PDF_DATAAVAIL_PAGE:
   1102         if (!LoadDocPage(dwPage, pHints))
   1103           return false;
   1104         break;
   1105       case PDF_DATAAVAIL_ERROR:
   1106         return LoadAllFile(pHints);
   1107       default:
   1108         m_bPagesTreeLoad = true;
   1109         m_bPagesLoad = true;
   1110         m_bCurPageDictLoadOK = true;
   1111         m_docStatus = PDF_DATAAVAIL_PAGE;
   1112         return true;
   1113     }
   1114   }
   1115 }
   1116 
   1117 bool CPDF_DataAvail::CheckArrayPageNode(uint32_t dwPageNo,
   1118                                         PageNode* pPageNode,
   1119                                         DownloadHints* pHints) {
   1120   bool bExists = false;
   1121   std::unique_ptr<CPDF_Object> pPages = GetObject(dwPageNo, pHints, &bExists);
   1122   if (!bExists) {
   1123     m_docStatus = PDF_DATAAVAIL_ERROR;
   1124     return false;
   1125   }
   1126 
   1127   if (!pPages)
   1128     return false;
   1129 
   1130   CPDF_Array* pArray = pPages->AsArray();
   1131   if (!pArray) {
   1132     m_docStatus = PDF_DATAAVAIL_ERROR;
   1133     return false;
   1134   }
   1135 
   1136   pPageNode->m_type = PDF_PAGENODE_PAGES;
   1137   for (size_t i = 0; i < pArray->GetCount(); ++i) {
   1138     CPDF_Reference* pKid = ToReference(pArray->GetObjectAt(i));
   1139     if (!pKid)
   1140       continue;
   1141 
   1142     auto pNode = pdfium::MakeUnique<PageNode>();
   1143     pNode->m_dwPageNo = pKid->GetRefObjNum();
   1144     pPageNode->m_ChildNodes.push_back(std::move(pNode));
   1145   }
   1146   return true;
   1147 }
   1148 
   1149 bool CPDF_DataAvail::CheckUnknownPageNode(uint32_t dwPageNo,
   1150                                           PageNode* pPageNode,
   1151                                           DownloadHints* pHints) {
   1152   bool bExists = false;
   1153   std::unique_ptr<CPDF_Object> pPage = GetObject(dwPageNo, pHints, &bExists);
   1154   if (!bExists) {
   1155     m_docStatus = PDF_DATAAVAIL_ERROR;
   1156     return false;
   1157   }
   1158 
   1159   if (!pPage)
   1160     return false;
   1161 
   1162   if (pPage->IsArray()) {
   1163     pPageNode->m_dwPageNo = dwPageNo;
   1164     pPageNode->m_type = PDF_PAGENODE_ARRAY;
   1165     return true;
   1166   }
   1167 
   1168   if (!pPage->IsDictionary()) {
   1169     m_docStatus = PDF_DATAAVAIL_ERROR;
   1170     return false;
   1171   }
   1172 
   1173   pPageNode->m_dwPageNo = dwPageNo;
   1174   CPDF_Dictionary* pDict = pPage->GetDict();
   1175   CFX_ByteString type = pDict->GetStringFor("Type");
   1176   if (type == "Pages") {
   1177     pPageNode->m_type = PDF_PAGENODE_PAGES;
   1178     CPDF_Object* pKids = pDict->GetObjectFor("Kids");
   1179     if (!pKids) {
   1180       m_docStatus = PDF_DATAAVAIL_PAGE;
   1181       return true;
   1182     }
   1183 
   1184     switch (pKids->GetType()) {
   1185       case CPDF_Object::REFERENCE: {
   1186         CPDF_Reference* pKid = pKids->AsReference();
   1187         auto pNode = pdfium::MakeUnique<PageNode>();
   1188         pNode->m_dwPageNo = pKid->GetRefObjNum();
   1189         pPageNode->m_ChildNodes.push_back(std::move(pNode));
   1190       } break;
   1191       case CPDF_Object::ARRAY: {
   1192         CPDF_Array* pKidsArray = pKids->AsArray();
   1193         for (size_t i = 0; i < pKidsArray->GetCount(); ++i) {
   1194           CPDF_Reference* pKid = ToReference(pKidsArray->GetObjectAt(i));
   1195           if (!pKid)
   1196             continue;
   1197 
   1198           auto pNode = pdfium::MakeUnique<PageNode>();
   1199           pNode->m_dwPageNo = pKid->GetRefObjNum();
   1200           pPageNode->m_ChildNodes.push_back(std::move(pNode));
   1201         }
   1202       } break;
   1203       default:
   1204         break;
   1205     }
   1206   } else if (type == "Page") {
   1207     pPageNode->m_type = PDF_PAGENODE_PAGE;
   1208   } else {
   1209     m_docStatus = PDF_DATAAVAIL_ERROR;
   1210     return false;
   1211   }
   1212   return true;
   1213 }
   1214 
   1215 bool CPDF_DataAvail::CheckPageNode(const CPDF_DataAvail::PageNode& pageNode,
   1216                                    int32_t iPage,
   1217                                    int32_t& iCount,
   1218                                    DownloadHints* pHints,
   1219                                    int level) {
   1220   if (level >= kMaxPageRecursionDepth)
   1221     return false;
   1222 
   1223   int32_t iSize = pdfium::CollectionSize<int32_t>(pageNode.m_ChildNodes);
   1224   if (iSize <= 0 || iPage >= iSize) {
   1225     m_docStatus = PDF_DATAAVAIL_ERROR;
   1226     return false;
   1227   }
   1228   for (int32_t i = 0; i < iSize; ++i) {
   1229     PageNode* pNode = pageNode.m_ChildNodes[i].get();
   1230     if (!pNode)
   1231       continue;
   1232 
   1233     if (pNode->m_type == PDF_PAGENODE_UNKNOWN) {
   1234       // Updates the type for the unknown page node.
   1235       if (!CheckUnknownPageNode(pNode->m_dwPageNo, pNode, pHints))
   1236         return false;
   1237     }
   1238     if (pNode->m_type == PDF_PAGENODE_ARRAY) {
   1239       // Updates a more specific type for the array page node.
   1240       if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode, pHints))
   1241         return false;
   1242     }
   1243     switch (pNode->m_type) {
   1244       case PDF_PAGENODE_PAGE:
   1245         iCount++;
   1246         if (iPage == iCount && m_pDocument)
   1247           m_pDocument->SetPageObjNum(iPage, pNode->m_dwPageNo);
   1248         break;
   1249       case PDF_PAGENODE_PAGES:
   1250         if (!CheckPageNode(*pNode, iPage, iCount, pHints, level + 1))
   1251           return false;
   1252         break;
   1253       case PDF_PAGENODE_UNKNOWN:
   1254       case PDF_PAGENODE_ARRAY:
   1255         // Already converted above, error if we get here.
   1256         return false;
   1257     }
   1258     if (iPage == iCount) {
   1259       m_docStatus = PDF_DATAAVAIL_DONE;
   1260       return true;
   1261     }
   1262   }
   1263   return true;
   1264 }
   1265 
   1266 bool CPDF_DataAvail::LoadDocPage(uint32_t dwPage, DownloadHints* pHints) {
   1267   FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
   1268   int32_t iPage = safePage.ValueOrDie();
   1269   if (m_pDocument->GetPageCount() <= iPage ||
   1270       m_pDocument->IsPageLoaded(iPage)) {
   1271     m_docStatus = PDF_DATAAVAIL_DONE;
   1272     return true;
   1273   }
   1274   if (m_PageNode.m_type == PDF_PAGENODE_PAGE) {
   1275     m_docStatus = iPage == 0 ? PDF_DATAAVAIL_DONE : PDF_DATAAVAIL_ERROR;
   1276     return true;
   1277   }
   1278   int32_t iCount = -1;
   1279   return CheckPageNode(m_PageNode, iPage, iCount, pHints, 0);
   1280 }
   1281 
   1282 bool CPDF_DataAvail::CheckPageCount(DownloadHints* pHints) {
   1283   bool bExists = false;
   1284   std::unique_ptr<CPDF_Object> pPages =
   1285       GetObject(m_PagesObjNum, pHints, &bExists);
   1286   if (!bExists) {
   1287     m_docStatus = PDF_DATAAVAIL_ERROR;
   1288     return false;
   1289   }
   1290   if (!pPages)
   1291     return false;
   1292 
   1293   CPDF_Dictionary* pPagesDict = pPages->GetDict();
   1294   if (!pPagesDict) {
   1295     m_docStatus = PDF_DATAAVAIL_ERROR;
   1296     return false;
   1297   }
   1298   if (!pPagesDict->KeyExist("Kids"))
   1299     return true;
   1300 
   1301   return pPagesDict->GetIntegerFor("Count") > 0;
   1302 }
   1303 
   1304 bool CPDF_DataAvail::LoadDocPages(DownloadHints* pHints) {
   1305   if (!CheckUnknownPageNode(m_PagesObjNum, &m_PageNode, pHints))
   1306     return false;
   1307 
   1308   if (CheckPageCount(pHints)) {
   1309     m_docStatus = PDF_DATAAVAIL_PAGE;
   1310     return true;
   1311   }
   1312 
   1313   m_bTotalLoadPageTree = true;
   1314   return false;
   1315 }
   1316 
   1317 bool CPDF_DataAvail::LoadPages(DownloadHints* pHints) {
   1318   while (!m_bPagesTreeLoad) {
   1319     if (!CheckPageStatus(pHints))
   1320       return false;
   1321   }
   1322 
   1323   if (m_bPagesLoad)
   1324     return true;
   1325 
   1326   m_pDocument->LoadPages();
   1327   return false;
   1328 }
   1329 
   1330 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData(
   1331     DownloadHints* pHints) {
   1332   if (m_bLinearedDataOK)
   1333     return DataAvailable;
   1334   ASSERT(m_pLinearized);
   1335   if (!m_pLinearized->GetLastXRefOffset())
   1336     return DataError;
   1337 
   1338   if (!m_bMainXRefLoadTried) {
   1339     FX_SAFE_UINT32 data_size = m_dwFileLen;
   1340     data_size -= m_pLinearized->GetLastXRefOffset();
   1341     if (!data_size.IsValid())
   1342       return DataError;
   1343 
   1344     if (!m_pFileAvail->IsDataAvail(m_pLinearized->GetLastXRefOffset(),
   1345                                    data_size.ValueOrDie())) {
   1346       pHints->AddSegment(m_pLinearized->GetLastXRefOffset(),
   1347                          data_size.ValueOrDie());
   1348       return DataNotAvailable;
   1349     }
   1350 
   1351     CPDF_Parser::Error eRet =
   1352         m_pDocument->GetParser()->LoadLinearizedMainXRefTable();
   1353     m_bMainXRefLoadTried = true;
   1354     if (eRet != CPDF_Parser::SUCCESS)
   1355       return DataError;
   1356 
   1357     if (!PreparePageItem())
   1358       return DataNotAvailable;
   1359 
   1360     m_bMainXRefLoadedOK = true;
   1361     m_bLinearedDataOK = true;
   1362   }
   1363 
   1364   return m_bLinearedDataOK ? DataAvailable : DataNotAvailable;
   1365 }
   1366 
   1367 bool CPDF_DataAvail::CheckPageAnnots(uint32_t dwPage, DownloadHints* pHints) {
   1368   if (m_objs_array.empty()) {
   1369     m_ObjectSet.clear();
   1370 
   1371     FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
   1372     CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
   1373     if (!pPageDict)
   1374       return true;
   1375 
   1376     CPDF_Object* pAnnots = pPageDict->GetObjectFor("Annots");
   1377     if (!pAnnots)
   1378       return true;
   1379 
   1380     std::vector<CPDF_Object*> obj_array;
   1381     obj_array.push_back(pAnnots);
   1382     if (!AreObjectsAvailable(obj_array, false, pHints, m_objs_array))
   1383       return false;
   1384 
   1385     m_objs_array.clear();
   1386     return true;
   1387   }
   1388 
   1389   std::vector<CPDF_Object*> new_objs_array;
   1390   if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
   1391     m_objs_array = new_objs_array;
   1392     return false;
   1393   }
   1394   m_objs_array.clear();
   1395   return true;
   1396 }
   1397 
   1398 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage(
   1399     uint32_t dwPage,
   1400     DownloadHints* pHints) {
   1401   if (!m_bAnnotsLoad) {
   1402     if (!CheckPageAnnots(dwPage, pHints))
   1403       return DataNotAvailable;
   1404     m_bAnnotsLoad = true;
   1405   }
   1406   const bool is_page_valid = ValidatePage(dwPage);
   1407   (void)is_page_valid;
   1408   ASSERT(is_page_valid);
   1409   return DataAvailable;
   1410 }
   1411 
   1412 bool CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) {
   1413   CFX_AutoRestorer<int> restorer(&s_CurrentDataAvailRecursionDepth);
   1414   if (++s_CurrentDataAvailRecursionDepth > kMaxDataAvailRecursionDepth)
   1415     return false;
   1416 
   1417   CPDF_Object* pParent = pDict->GetObjectFor("Parent");
   1418   if (!pParent)
   1419     return false;
   1420 
   1421   CPDF_Dictionary* pParentDict = pParent->GetDict();
   1422   if (!pParentDict)
   1423     return false;
   1424 
   1425   CPDF_Object* pRet = pParentDict->GetObjectFor("Resources");
   1426   if (pRet) {
   1427     m_pPageResource = pRet;
   1428     return true;
   1429   }
   1430 
   1431   return HaveResourceAncestor(pParentDict);
   1432 }
   1433 
   1434 CPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail(
   1435     uint32_t dwPage,
   1436     DownloadHints* pHints) {
   1437   if (!m_pDocument)
   1438     return DataError;
   1439 
   1440   if (IsFirstCheck(dwPage)) {
   1441     m_bCurPageDictLoadOK = false;
   1442     m_bPageLoadedOK = false;
   1443     m_bAnnotsLoad = false;
   1444     m_bNeedDownLoadResource = false;
   1445     m_objs_array.clear();
   1446     m_ObjectSet.clear();
   1447   }
   1448 
   1449   if (pdfium::ContainsKey(m_pagesLoadState, dwPage))
   1450     return DataAvailable;
   1451 
   1452   if (m_pLinearized) {
   1453     if (dwPage == m_pLinearized->GetFirstPageNo()) {
   1454       DocAvailStatus nRet = CheckLinearizedFirstPage(dwPage, pHints);
   1455       if (nRet == DataAvailable)
   1456         m_pagesLoadState.insert(dwPage);
   1457       return nRet;
   1458     }
   1459 
   1460     DocAvailStatus nResult = CheckLinearizedData(pHints);
   1461     if (nResult != DataAvailable)
   1462       return nResult;
   1463 
   1464     if (m_pHintTables) {
   1465       nResult = m_pHintTables->CheckPage(dwPage, pHints);
   1466       if (nResult != DataAvailable)
   1467         return nResult;
   1468       m_pagesLoadState.insert(dwPage);
   1469       return GetPage(dwPage) ? DataAvailable : DataError;
   1470     }
   1471 
   1472     if (m_bMainXRefLoadedOK) {
   1473       if (m_bTotalLoadPageTree) {
   1474         if (!LoadPages(pHints))
   1475           return DataNotAvailable;
   1476       } else {
   1477         if (!m_bCurPageDictLoadOK && !CheckPage(dwPage, pHints))
   1478           return DataNotAvailable;
   1479       }
   1480     } else {
   1481       if (!LoadAllFile(pHints))
   1482         return DataNotAvailable;
   1483       m_pDocument->GetParser()->RebuildCrossRef();
   1484       ResetFirstCheck(dwPage);
   1485       return DataAvailable;
   1486     }
   1487   } else {
   1488     if (!m_bTotalLoadPageTree && !m_bCurPageDictLoadOK &&
   1489         !CheckPage(dwPage, pHints)) {
   1490       return DataNotAvailable;
   1491     }
   1492   }
   1493 
   1494   if (m_bHaveAcroForm && !m_bAcroFormLoad) {
   1495     if (!CheckAcroFormSubObject(pHints))
   1496       return DataNotAvailable;
   1497     m_bAcroFormLoad = true;
   1498   }
   1499 
   1500   if (!m_bPageLoadedOK) {
   1501     if (m_objs_array.empty()) {
   1502       m_ObjectSet.clear();
   1503 
   1504       FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
   1505       m_pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
   1506       if (!m_pPageDict) {
   1507         ResetFirstCheck(dwPage);
   1508         // This is XFA page.
   1509         return DataAvailable;
   1510       }
   1511 
   1512       std::vector<CPDF_Object*> obj_array;
   1513       obj_array.push_back(m_pPageDict);
   1514       if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array))
   1515         return DataNotAvailable;
   1516 
   1517       m_objs_array.clear();
   1518     } else {
   1519       std::vector<CPDF_Object*> new_objs_array;
   1520       if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
   1521         m_objs_array = new_objs_array;
   1522         return DataNotAvailable;
   1523       }
   1524     }
   1525     m_objs_array.clear();
   1526     m_bPageLoadedOK = true;
   1527   }
   1528 
   1529   if (!m_bAnnotsLoad) {
   1530     if (!CheckPageAnnots(dwPage, pHints))
   1531       return DataNotAvailable;
   1532     m_bAnnotsLoad = true;
   1533   }
   1534 
   1535   if (m_pPageDict && !m_bNeedDownLoadResource) {
   1536     m_pPageResource = m_pPageDict->GetObjectFor("Resources");
   1537     m_bNeedDownLoadResource =
   1538         m_pPageResource || HaveResourceAncestor(m_pPageDict);
   1539   }
   1540 
   1541   if (m_bNeedDownLoadResource) {
   1542     if (!CheckResources(pHints))
   1543       return DataNotAvailable;
   1544     m_bNeedDownLoadResource = false;
   1545   }
   1546 
   1547   m_bPageLoadedOK = false;
   1548   m_bAnnotsLoad = false;
   1549   m_bCurPageDictLoadOK = false;
   1550 
   1551   ResetFirstCheck(dwPage);
   1552   m_pagesLoadState.insert(dwPage);
   1553   const bool is_page_valid = ValidatePage(dwPage);
   1554   (void)is_page_valid;
   1555   ASSERT(is_page_valid);
   1556   return DataAvailable;
   1557 }
   1558 
   1559 bool CPDF_DataAvail::CheckResources(DownloadHints* pHints) {
   1560   if (m_objs_array.empty()) {
   1561     std::vector<CPDF_Object*> obj_array;
   1562     obj_array.push_back(m_pPageResource);
   1563     if (!AreObjectsAvailable(obj_array, true, pHints, m_objs_array))
   1564       return false;
   1565 
   1566     m_objs_array.clear();
   1567     return true;
   1568   }
   1569   std::vector<CPDF_Object*> new_objs_array;
   1570   if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
   1571     m_objs_array = new_objs_array;
   1572     return false;
   1573   }
   1574   m_objs_array.clear();
   1575   return true;
   1576 }
   1577 
   1578 void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE* pPos,
   1579                                                uint32_t* pSize) {
   1580   if (pPos)
   1581     *pPos = m_dwLastXRefOffset;
   1582   if (pSize)
   1583     *pSize = (uint32_t)(m_dwFileLen - m_dwLastXRefOffset);
   1584 }
   1585 
   1586 int CPDF_DataAvail::GetPageCount() const {
   1587   if (m_pLinearized)
   1588     return m_pLinearized->GetPageCount();
   1589   return m_pDocument ? m_pDocument->GetPageCount() : 0;
   1590 }
   1591 
   1592 CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) {
   1593   if (!m_pDocument || index < 0 || index >= GetPageCount())
   1594     return nullptr;
   1595   CPDF_Dictionary* page = m_pDocument->GetPage(index);
   1596   if (page)
   1597     return page;
   1598   if (!m_pLinearized || !m_pHintTables)
   1599     return nullptr;
   1600 
   1601   if (index == static_cast<int>(m_pLinearized->GetFirstPageNo()))
   1602     return nullptr;
   1603   FX_FILESIZE szPageStartPos = 0;
   1604   FX_FILESIZE szPageLength = 0;
   1605   uint32_t dwObjNum = 0;
   1606   const bool bPagePosGot = m_pHintTables->GetPagePos(index, &szPageStartPos,
   1607                                                      &szPageLength, &dwObjNum);
   1608   if (!bPagePosGot || !dwObjNum)
   1609     return nullptr;
   1610   // We should say to the document, which object is the page.
   1611   m_pDocument->SetPageObjNum(index, dwObjNum);
   1612   // Page object already can be parsed in document.
   1613   if (!m_pDocument->GetIndirectObject(dwObjNum)) {
   1614     m_syntaxParser.InitParser(
   1615         m_pFileRead, pdfium::base::checked_cast<uint32_t>(szPageStartPos));
   1616     m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
   1617         dwObjNum, ParseIndirectObjectAt(0, dwObjNum, m_pDocument));
   1618   }
   1619   const bool is_page_valid = ValidatePage(index);
   1620   (void)is_page_valid;
   1621   ASSERT(is_page_valid);
   1622   return m_pDocument->GetPage(index);
   1623 }
   1624 
   1625 CPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail(
   1626     DownloadHints* pHints) {
   1627   if (!m_pDocument)
   1628     return FormAvailable;
   1629   if (m_pLinearized) {
   1630     DocAvailStatus nDocStatus = CheckLinearizedData(pHints);
   1631     if (nDocStatus == DataError)
   1632       return FormError;
   1633     if (nDocStatus == DataNotAvailable)
   1634       return FormNotAvailable;
   1635   }
   1636 
   1637   if (!m_bLinearizedFormParamLoad) {
   1638     CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
   1639     if (!pRoot)
   1640       return FormAvailable;
   1641 
   1642     CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
   1643     if (!pAcroForm)
   1644       return FormNotExist;
   1645 
   1646     m_objs_array.push_back(pAcroForm->GetDict());
   1647     m_bLinearizedFormParamLoad = true;
   1648   }
   1649 
   1650   std::vector<CPDF_Object*> new_objs_array;
   1651   if (!AreObjectsAvailable(m_objs_array, false, pHints, new_objs_array)) {
   1652     m_objs_array = new_objs_array;
   1653     return FormNotAvailable;
   1654   }
   1655 
   1656   m_objs_array.clear();
   1657   const bool is_form_valid = ValidateForm();
   1658   (void)is_form_valid;
   1659   ASSERT(is_form_valid);
   1660   return FormAvailable;
   1661 }
   1662 
   1663 bool CPDF_DataAvail::ValidatePage(uint32_t dwPage) {
   1664   FX_SAFE_INT32 safePage = pdfium::base::checked_cast<int32_t>(dwPage);
   1665   CPDF_Dictionary* pPageDict = m_pDocument->GetPage(safePage.ValueOrDie());
   1666   if (!pPageDict)
   1667     return false;
   1668   std::vector<CPDF_Object*> obj_array;
   1669   obj_array.push_back(pPageDict);
   1670   std::vector<CPDF_Object*> dummy;
   1671   return AreObjectsAvailable(obj_array, true, nullptr, dummy);
   1672 }
   1673 
   1674 bool CPDF_DataAvail::ValidateForm() {
   1675   CPDF_Dictionary* pRoot = m_pDocument->GetRoot();
   1676   if (!pRoot)
   1677     return true;
   1678   CPDF_Object* pAcroForm = pRoot->GetObjectFor("AcroForm");
   1679   if (!pAcroForm)
   1680     return false;
   1681   std::vector<CPDF_Object*> obj_array;
   1682   obj_array.push_back(pAcroForm);
   1683   std::vector<CPDF_Object*> dummy;
   1684   return AreObjectsAvailable(obj_array, true, nullptr, dummy);
   1685 }
   1686 
   1687 CPDF_DataAvail::PageNode::PageNode() : m_type(PDF_PAGENODE_UNKNOWN) {}
   1688 
   1689 CPDF_DataAvail::PageNode::~PageNode() {}
   1690