Home | History | Annotate | Download | only in parser
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/parser/cpdf_parser.h"
      8 
      9 #include <algorithm>
     10 #include <utility>
     11 #include <vector>
     12 
     13 #include "core/fpdfapi/parser/cpdf_array.h"
     14 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
     15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
     16 #include "core/fpdfapi/parser/cpdf_document.h"
     17 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
     18 #include "core/fpdfapi/parser/cpdf_number.h"
     19 #include "core/fpdfapi/parser/cpdf_reference.h"
     20 #include "core/fpdfapi/parser/cpdf_security_handler.h"
     21 #include "core/fpdfapi/parser/cpdf_stream.h"
     22 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
     23 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
     24 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
     25 #include "core/fxcrt/autorestorer.h"
     26 #include "core/fxcrt/cfx_memorystream.h"
     27 #include "core/fxcrt/fx_extension.h"
     28 #include "core/fxcrt/fx_safe_types.h"
     29 #include "third_party/base/ptr_util.h"
     30 #include "third_party/base/stl_util.h"
     31 
     32 namespace {
     33 
     34 // A limit on the size of the xref table. Theoretical limits are higher, but
     35 // this may be large enough in practice.
     36 const int32_t kMaxXRefSize = 1048576;
     37 
     38 constexpr FX_FILESIZE kPDFHeaderSize = 9;
     39 
     40 uint32_t GetVarInt(const uint8_t* p, int32_t n) {
     41   uint32_t result = 0;
     42   for (int32_t i = 0; i < n; ++i)
     43     result = result * 256 + p[i];
     44   return result;
     45 }
     46 
     47 int32_t GetStreamNCount(const RetainPtr<CPDF_StreamAcc>& pObjStream) {
     48   return pObjStream->GetDict()->GetIntegerFor("N");
     49 }
     50 
     51 int32_t GetStreamFirst(const RetainPtr<CPDF_StreamAcc>& pObjStream) {
     52   return pObjStream->GetDict()->GetIntegerFor("First");
     53 }
     54 
     55 }  // namespace
     56 
     57 class CPDF_Parser::TrailerData {
     58  public:
     59   TrailerData() {}
     60   ~TrailerData() {}
     61 
     62   CPDF_Dictionary* GetMainTrailer() const { return main_trailer_.get(); }
     63 
     64   std::unique_ptr<CPDF_Dictionary> GetCombinedTrailer() const {
     65     std::unique_ptr<CPDF_Dictionary> result =
     66         ToDictionary(main_trailer_->Clone());
     67 
     68     // Info is optional.
     69     uint32_t info_obj_num = GetInfoObjNum();
     70     if (info_obj_num > 0)
     71       result->SetNewFor<CPDF_Reference>("Info", nullptr, GetInfoObjNum());
     72 
     73     // Root is required.
     74     result->SetNewFor<CPDF_Reference>("Root", nullptr, GetRootObjNum());
     75     return result;
     76   }
     77 
     78   void SetMainTrailer(std::unique_ptr<CPDF_Dictionary> trailer) {
     79     ASSERT(trailer);
     80     main_trailer_ = std::move(trailer);
     81     ApplyTrailer(main_trailer_.get());
     82   }
     83 
     84   void AppendTrailer(std::unique_ptr<CPDF_Dictionary> trailer) {
     85     ASSERT(trailer);
     86     ApplyTrailer(trailer.get());
     87   }
     88 
     89   void Clear() {
     90     main_trailer_.reset();
     91     last_info_obj_num_ = 0;
     92     last_root_obj_num_ = 0;
     93   }
     94 
     95   uint32_t GetInfoObjNum() const {
     96     const CPDF_Reference* pRef = ToReference(
     97         GetMainTrailer() ? GetMainTrailer()->GetObjectFor("Info") : nullptr);
     98     return pRef ? pRef->GetRefObjNum() : last_info_obj_num_;
     99   }
    100 
    101   uint32_t GetRootObjNum() const {
    102     const CPDF_Reference* pRef = ToReference(
    103         GetMainTrailer() ? GetMainTrailer()->GetObjectFor("Root") : nullptr);
    104     return pRef ? pRef->GetRefObjNum() : last_root_obj_num_;
    105   }
    106 
    107  private:
    108   void ApplyTrailer(const CPDF_Dictionary* dict) {
    109     // The most recent Info object number contained in last added trailer.
    110     // See PDF 1.7 spec, section 3.4.5 - Incremental Updates.
    111     const auto* pRef = ToReference(dict->GetObjectFor("Info"));
    112     if (pRef)
    113       last_info_obj_num_ = pRef->GetRefObjNum();
    114 
    115     const auto* pRoot = ToReference(dict->GetObjectFor("Root"));
    116     if (pRoot)
    117       last_root_obj_num_ = pRoot->GetRefObjNum();
    118   }
    119 
    120   std::unique_ptr<CPDF_Dictionary> main_trailer_;
    121   uint32_t last_info_obj_num_ = 0;
    122   uint32_t last_root_obj_num_ = 0;
    123 };
    124 
    125 CPDF_Parser::CPDF_Parser()
    126     : m_pSyntax(pdfium::MakeUnique<CPDF_SyntaxParser>()),
    127       m_bHasParsed(false),
    128       m_bXRefStream(false),
    129       m_FileVersion(0),
    130       m_TrailerData(pdfium::MakeUnique<TrailerData>()) {}
    131 
    132 CPDF_Parser::~CPDF_Parser() {
    133   ReleaseEncryptHandler();
    134 }
    135 
    136 uint32_t CPDF_Parser::GetLastObjNum() const {
    137   return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;
    138 }
    139 
    140 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const {
    141   return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first;
    142 }
    143 
    144 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const {
    145   auto it = m_ObjectInfo.find(objnum);
    146   return it != m_ObjectInfo.end() ? it->second.pos : 0;
    147 }
    148 
    149 CPDF_Parser::ObjectType CPDF_Parser::GetObjectType(uint32_t objnum) const {
    150   ASSERT(IsValidObjectNumber(objnum));
    151   auto it = m_ObjectInfo.find(objnum);
    152   return it != m_ObjectInfo.end() ? it->second.type : ObjectType::kFree;
    153 }
    154 
    155 uint16_t CPDF_Parser::GetObjectGenNum(uint32_t objnum) const {
    156   ASSERT(IsValidObjectNumber(objnum));
    157   auto it = m_ObjectInfo.find(objnum);
    158   return it != m_ObjectInfo.end() ? it->second.gennum : 0;
    159 }
    160 
    161 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const {
    162   switch (GetObjectType(objnum)) {
    163     case ObjectType::kFree:
    164     case ObjectType::kNull:
    165       return true;
    166     case ObjectType::kNotCompressed:
    167     case ObjectType::kCompressed:
    168       return false;
    169   }
    170   ASSERT(false);  // NOTREACHED();
    171   return false;
    172 }
    173 
    174 bool CPDF_Parser::IsObjectFree(uint32_t objnum) const {
    175   return GetObjectType(objnum) == ObjectType::kFree;
    176 }
    177 
    178 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
    179   m_pEncryptDict = pDict;
    180 }
    181 
    182 RetainPtr<IFX_SeekableReadStream> CPDF_Parser::GetFileAccess() const {
    183   return m_pSyntax->GetFileAccess();
    184 }
    185 
    186 void CPDF_Parser::ShrinkObjectMap(uint32_t objnum) {
    187   if (objnum == 0) {
    188     m_ObjectInfo.clear();
    189     return;
    190   }
    191 
    192   auto it = m_ObjectInfo.lower_bound(objnum);
    193   while (it != m_ObjectInfo.end()) {
    194     auto saved_it = it++;
    195     m_ObjectInfo.erase(saved_it);
    196   }
    197 
    198   if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))
    199     m_ObjectInfo[objnum - 1].pos = 0;
    200 }
    201 
    202 bool CPDF_Parser::InitSyntaxParser(
    203     const RetainPtr<IFX_SeekableReadStream>& file_access) {
    204   const int32_t header_offset = GetHeaderOffset(file_access);
    205   if (header_offset == kInvalidHeaderOffset)
    206     return false;
    207   if (file_access->GetSize() < header_offset + kPDFHeaderSize)
    208     return false;
    209 
    210   m_pSyntax->InitParser(file_access, header_offset);
    211   return ParseFileVersion();
    212 }
    213 
    214 bool CPDF_Parser::ParseFileVersion() {
    215   m_FileVersion = 0;
    216   uint8_t ch;
    217   if (!m_pSyntax->GetCharAt(5, ch))
    218     return false;
    219 
    220   if (std::isdigit(ch))
    221     m_FileVersion = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)) * 10;
    222 
    223   if (!m_pSyntax->GetCharAt(7, ch))
    224     return false;
    225 
    226   if (std::isdigit(ch))
    227     m_FileVersion += FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
    228   return true;
    229 }
    230 
    231 CPDF_Parser::Error CPDF_Parser::StartParse(
    232     const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
    233     CPDF_Document* pDocument) {
    234   if (!InitSyntaxParser(pFileAccess))
    235     return FORMAT_ERROR;
    236   return StartParseInternal(pDocument);
    237 }
    238 
    239 CPDF_Parser::Error CPDF_Parser::StartParseInternal(CPDF_Document* pDocument) {
    240   ASSERT(!m_bHasParsed);
    241   m_bHasParsed = true;
    242   m_bXRefStream = false;
    243 
    244   m_pDocument = pDocument;
    245 
    246   bool bXRefRebuilt = false;
    247 
    248   m_LastXRefOffset = ParseStartXRef();
    249 
    250   if (m_LastXRefOffset > 0) {
    251     if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
    252         !LoadAllCrossRefV5(m_LastXRefOffset)) {
    253       if (!RebuildCrossRef())
    254         return FORMAT_ERROR;
    255 
    256       bXRefRebuilt = true;
    257       m_LastXRefOffset = 0;
    258     }
    259   } else {
    260     if (!RebuildCrossRef())
    261       return FORMAT_ERROR;
    262 
    263     bXRefRebuilt = true;
    264   }
    265   Error eRet = SetEncryptHandler();
    266   if (eRet != SUCCESS)
    267     return eRet;
    268 
    269   m_pDocument->LoadDoc();
    270   if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
    271     if (bXRefRebuilt)
    272       return FORMAT_ERROR;
    273 
    274     ReleaseEncryptHandler();
    275     if (!RebuildCrossRef())
    276       return FORMAT_ERROR;
    277 
    278     eRet = SetEncryptHandler();
    279     if (eRet != SUCCESS)
    280       return eRet;
    281 
    282     m_pDocument->LoadDoc();
    283     if (!m_pDocument->GetRoot())
    284       return FORMAT_ERROR;
    285   }
    286   if (GetRootObjNum() == 0) {
    287     ReleaseEncryptHandler();
    288     if (!RebuildCrossRef() || GetRootObjNum() == 0)
    289       return FORMAT_ERROR;
    290 
    291     eRet = SetEncryptHandler();
    292     if (eRet != SUCCESS)
    293       return eRet;
    294   }
    295   if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
    296     CPDF_Reference* pMetadata =
    297         ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata"));
    298     if (pMetadata)
    299       m_MetadataObjnum = pMetadata->GetRefObjNum();
    300   }
    301   return SUCCESS;
    302 }
    303 
    304 FX_FILESIZE CPDF_Parser::ParseStartXRef() {
    305   static constexpr char kStartXRefKeyword[] = "startxref";
    306   m_pSyntax->SetPos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset -
    307                     strlen(kStartXRefKeyword));
    308   if (!m_pSyntax->BackwardsSearchToWord(kStartXRefKeyword, 4096))
    309     return 0;
    310 
    311   // Skip "startxref" keyword.
    312   m_pSyntax->GetKeyword();
    313 
    314   // Read XRef offset.
    315   bool bNumber;
    316   const ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber);
    317   if (!bNumber || xrefpos_str.IsEmpty())
    318     return 0;
    319 
    320   const FX_SAFE_FILESIZE result = FXSYS_atoi64(xrefpos_str.c_str());
    321   if (!result.IsValid() || result.ValueOrDie() >= GetFileAccess()->GetSize())
    322     return 0;
    323 
    324   return result.ValueOrDie();
    325 }
    326 
    327 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
    328   ReleaseEncryptHandler();
    329   if (!GetTrailer())
    330     return FORMAT_ERROR;
    331 
    332   CPDF_Object* pEncryptObj = GetTrailer()->GetObjectFor("Encrypt");
    333   if (pEncryptObj) {
    334     if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) {
    335       SetEncryptDictionary(pEncryptDict);
    336     } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) {
    337       pEncryptObj = m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum());
    338       if (pEncryptObj)
    339         SetEncryptDictionary(pEncryptObj->GetDict());
    340     }
    341   }
    342 
    343   if (m_pEncryptDict) {
    344     ByteString filter = m_pEncryptDict->GetStringFor("Filter");
    345     if (filter != "Standard")
    346       return HANDLER_ERROR;
    347 
    348     std::unique_ptr<CPDF_SecurityHandler> pSecurityHandler =
    349         pdfium::MakeUnique<CPDF_SecurityHandler>();
    350     if (!pSecurityHandler->OnInit(m_pEncryptDict.Get(), GetIDArray(),
    351                                   m_Password))
    352       return PASSWORD_ERROR;
    353 
    354     m_pSecurityHandler = std::move(pSecurityHandler);
    355   }
    356   return SUCCESS;
    357 }
    358 
    359 void CPDF_Parser::ReleaseEncryptHandler() {
    360   m_pSecurityHandler.reset();
    361   SetEncryptDictionary(nullptr);
    362 }
    363 
    364 FX_FILESIZE CPDF_Parser::GetObjectOffset(uint32_t objnum) const {
    365   if (!IsValidObjectNumber(objnum))
    366     return 0;
    367 
    368   if (GetObjectType(objnum) == ObjectType::kNotCompressed)
    369     return GetObjectPositionOrZero(objnum);
    370 
    371   if (GetObjectType(objnum) == ObjectType::kCompressed) {
    372     FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
    373     return GetObjectPositionOrZero(pos);
    374   }
    375   return 0;
    376 }
    377 
    378 // Ideally, all the cross reference entries should be verified.
    379 // In reality, we rarely see well-formed cross references don't match
    380 // with the objects. crbug/602650 showed a case where object numbers
    381 // in the cross reference table are all off by one.
    382 bool CPDF_Parser::VerifyCrossRefV4() {
    383   for (const auto& it : m_ObjectInfo) {
    384     if (it.second.pos == 0)
    385       continue;
    386     // Find the first non-zero position.
    387     FX_FILESIZE SavedPos = m_pSyntax->GetPos();
    388     m_pSyntax->SetPos(it.second.pos);
    389     bool is_num = false;
    390     ByteString num_str = m_pSyntax->GetNextWord(&is_num);
    391     m_pSyntax->SetPos(SavedPos);
    392     if (!is_num || num_str.IsEmpty() ||
    393         FXSYS_atoui(num_str.c_str()) != it.first) {
    394       // If the object number read doesn't match the one stored,
    395       // something is wrong with the cross reference table.
    396       return false;
    397     }
    398     return true;
    399   }
    400   return true;
    401 }
    402 
    403 bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
    404   if (!LoadCrossRefV4(xrefpos, true))
    405     return false;
    406 
    407   std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4();
    408   if (!trailer)
    409     return false;
    410 
    411   m_TrailerData->SetMainTrailer(std::move(trailer));
    412   int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
    413   if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
    414     ShrinkObjectMap(xrefsize);
    415 
    416   std::vector<FX_FILESIZE> CrossRefList;
    417   std::vector<FX_FILESIZE> XRefStreamList;
    418   std::set<FX_FILESIZE> seen_xrefpos;
    419 
    420   CrossRefList.push_back(xrefpos);
    421   XRefStreamList.push_back(GetDirectInteger(GetTrailer(), "XRefStm"));
    422   seen_xrefpos.insert(xrefpos);
    423 
    424   // When the trailer doesn't have Prev entry or Prev entry value is not
    425   // numerical, GetDirectInteger() returns 0. Loading will end.
    426   xrefpos = GetDirectInteger(GetTrailer(), "Prev");
    427   while (xrefpos) {
    428     // Check for circular references.
    429     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
    430       return false;
    431 
    432     seen_xrefpos.insert(xrefpos);
    433 
    434     // SLOW ...
    435     CrossRefList.insert(CrossRefList.begin(), xrefpos);
    436     LoadCrossRefV4(xrefpos, true);
    437 
    438     std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4());
    439     if (!pDict)
    440       return false;
    441 
    442     xrefpos = GetDirectInteger(pDict.get(), "Prev");
    443 
    444     // SLOW ...
    445     XRefStreamList.insert(XRefStreamList.begin(),
    446                           pDict->GetIntegerFor("XRefStm"));
    447     m_TrailerData->AppendTrailer(std::move(pDict));
    448   }
    449 
    450   for (size_t i = 0; i < CrossRefList.size(); ++i) {
    451     if (!LoadCrossRefV4(CrossRefList[i], false))
    452       return false;
    453 
    454     if (XRefStreamList[i] && !LoadCrossRefV5(&XRefStreamList[i], false))
    455       return false;
    456 
    457     if (i == 0 && !VerifyCrossRefV4())
    458       return false;
    459   }
    460   return true;
    461 }
    462 
    463 bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos) {
    464   if (!LoadCrossRefV4(xrefpos, false))
    465     return false;
    466 
    467   std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4();
    468   if (!trailer)
    469     return false;
    470 
    471   m_TrailerData->SetMainTrailer(std::move(trailer));
    472   int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
    473   if (xrefsize == 0)
    474     return false;
    475 
    476   std::vector<FX_FILESIZE> CrossRefList;
    477   std::vector<FX_FILESIZE> XRefStreamList;
    478   std::set<FX_FILESIZE> seen_xrefpos;
    479 
    480   CrossRefList.push_back(xrefpos);
    481   XRefStreamList.push_back(GetDirectInteger(GetTrailer(), "XRefStm"));
    482   seen_xrefpos.insert(xrefpos);
    483 
    484   xrefpos = GetDirectInteger(GetTrailer(), "Prev");
    485   while (xrefpos) {
    486     // Check for circular references.
    487     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
    488       return false;
    489 
    490     seen_xrefpos.insert(xrefpos);
    491 
    492     // SLOW ...
    493     CrossRefList.insert(CrossRefList.begin(), xrefpos);
    494     LoadCrossRefV4(xrefpos, true);
    495 
    496     std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4());
    497     if (!pDict)
    498       return false;
    499 
    500     xrefpos = GetDirectInteger(pDict.get(), "Prev");
    501 
    502     // SLOW ...
    503     XRefStreamList.insert(XRefStreamList.begin(),
    504                           pDict->GetIntegerFor("XRefStm"));
    505     m_TrailerData->AppendTrailer(std::move(pDict));
    506   }
    507 
    508   for (size_t i = 1; i < CrossRefList.size(); ++i) {
    509     if (!LoadCrossRefV4(CrossRefList[i], false))
    510       return false;
    511 
    512     if (XRefStreamList[i] && !LoadCrossRefV5(&XRefStreamList[i], false))
    513       return false;
    514   }
    515   return true;
    516 }
    517 
    518 bool CPDF_Parser::ParseAndAppendCrossRefSubsectionData(
    519     uint32_t start_objnum,
    520     uint32_t count,
    521     std::vector<CrossRefObjData>* out_objects) {
    522   // Each entry shall be exactly 20 byte.
    523   // A sample entry looks like:
    524   // "0000000000 00007 f\r\n"
    525   static constexpr int32_t kEntryConstSize = 20;
    526 
    527   if (!out_objects) {
    528     FX_SAFE_FILESIZE pos = count;
    529     pos *= kEntryConstSize;
    530     pos += m_pSyntax->GetPos();
    531     if (!pos.IsValid())
    532       return false;
    533     m_pSyntax->SetPos(pos.ValueOrDie());
    534     return true;
    535   }
    536   const size_t start_obj_index = out_objects->size();
    537   FX_SAFE_SIZE_T new_size = start_obj_index;
    538   new_size += count;
    539   if (!new_size.IsValid())
    540     return false;
    541 
    542   if (new_size.ValueOrDie() > kMaxXRefSize)
    543     return false;
    544 
    545   const size_t max_entries_in_file =
    546       m_pSyntax->GetFileAccess()->GetSize() / kEntryConstSize;
    547   if (new_size.ValueOrDie() > max_entries_in_file)
    548     return false;
    549 
    550   out_objects->resize(new_size.ValueOrDie());
    551 
    552   std::vector<char> buf(1024 * kEntryConstSize + 1);
    553   buf.back() = '\0';
    554 
    555   int32_t nBlocks = count / 1024 + 1;
    556   for (int32_t block = 0; block < nBlocks; block++) {
    557     int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
    558     if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
    559                               block_size * kEntryConstSize)) {
    560       return false;
    561     }
    562 
    563     for (int32_t i = 0; i < block_size; i++) {
    564       CrossRefObjData& obj_data =
    565           (*out_objects)[start_obj_index + block * 1024 + i];
    566 
    567       const uint32_t objnum = start_objnum + block * 1024 + i;
    568 
    569       obj_data.obj_num = objnum;
    570 
    571       ObjectInfo& info = obj_data.info;
    572 
    573       char* pEntry = &buf[i * kEntryConstSize];
    574       if (pEntry[17] == 'f') {
    575         info.pos = 0;
    576         info.type = ObjectType::kFree;
    577       } else {
    578         const FX_SAFE_FILESIZE offset = FXSYS_atoi64(pEntry);
    579         if (!offset.IsValid())
    580           return false;
    581 
    582         if (offset.ValueOrDie() == 0) {
    583           for (int32_t c = 0; c < 10; c++) {
    584             if (!std::isdigit(pEntry[c]))
    585               return false;
    586           }
    587         }
    588 
    589         info.pos = offset.ValueOrDie();
    590 
    591         // TODO(art-snake): The info.gennum is uint16_t, but version may be
    592         // greated than max<uint16_t>. Needs solve this issue.
    593         const int32_t version = FXSYS_atoi(pEntry + 11);
    594         info.gennum = version;
    595         info.type = ObjectType::kNotCompressed;
    596       }
    597     }
    598   }
    599   return true;
    600 }
    601 
    602 bool CPDF_Parser::ParseCrossRefV4(std::vector<CrossRefObjData>* out_objects) {
    603   if (out_objects)
    604     out_objects->clear();
    605 
    606   if (m_pSyntax->GetKeyword() != "xref")
    607     return false;
    608   std::vector<CrossRefObjData> result_objects;
    609   while (1) {
    610     FX_FILESIZE SavedPos = m_pSyntax->GetPos();
    611     bool bIsNumber;
    612     ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
    613     if (word.IsEmpty()) {
    614       return false;
    615     }
    616 
    617     if (!bIsNumber) {
    618       m_pSyntax->SetPos(SavedPos);
    619       break;
    620     }
    621 
    622     uint32_t start_objnum = FXSYS_atoui(word.c_str());
    623     if (start_objnum >= kMaxObjectNumber)
    624       return false;
    625 
    626     uint32_t count = m_pSyntax->GetDirectNum();
    627     m_pSyntax->ToNextWord();
    628     SavedPos = m_pSyntax->GetPos();
    629 
    630     if (!ParseAndAppendCrossRefSubsectionData(
    631             start_objnum, count, out_objects ? &result_objects : nullptr)) {
    632       return false;
    633     }
    634   }
    635   if (out_objects)
    636     *out_objects = std::move(result_objects);
    637   return true;
    638 }
    639 
    640 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
    641                                  bool bSkip) {
    642   m_pSyntax->SetPos(pos);
    643   std::vector<CrossRefObjData> objects;
    644   if (!ParseCrossRefV4(bSkip ? nullptr : &objects))
    645     return false;
    646 
    647   MergeCrossRefObjectsData(objects);
    648 
    649   return true;
    650 }
    651 
    652 void CPDF_Parser::MergeCrossRefObjectsData(
    653     const std::vector<CrossRefObjData>& objects) {
    654   for (const auto& obj : objects) {
    655     m_ObjectInfo[obj.obj_num] = obj.info;
    656   }
    657 }
    658 
    659 bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
    660   if (!LoadCrossRefV5(&xrefpos, true))
    661     return false;
    662 
    663   std::set<FX_FILESIZE> seen_xrefpos;
    664   while (xrefpos) {
    665     seen_xrefpos.insert(xrefpos);
    666     if (!LoadCrossRefV5(&xrefpos, false))
    667       return false;
    668 
    669     // Check for circular references.
    670     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
    671       return false;
    672   }
    673   m_ObjectStreamMap.clear();
    674   m_bXRefStream = true;
    675   return true;
    676 }
    677 
    678 bool CPDF_Parser::RebuildCrossRef() {
    679   m_ObjectInfo.clear();
    680   m_TrailerData->Clear();
    681 
    682   ParserState state = ParserState::kDefault;
    683   int32_t inside_index = 0;
    684   uint32_t objnum = 0;
    685   uint32_t gennum = 0;
    686   int32_t depth = 0;
    687   const uint32_t kBufferSize = 4096;
    688   std::vector<uint8_t> buffer(kBufferSize);
    689 
    690   FX_FILESIZE pos = m_pSyntax->m_HeaderOffset;
    691   FX_FILESIZE start_pos = 0;
    692   FX_FILESIZE start_pos1 = 0;
    693   FX_FILESIZE last_obj = -1;
    694   FX_FILESIZE last_xref = -1;
    695   FX_FILESIZE last_trailer = -1;
    696 
    697   while (pos < m_pSyntax->m_FileLen) {
    698     const FX_FILESIZE saved_pos = pos;
    699     bool bOverFlow = false;
    700     uint32_t size =
    701         std::min((uint32_t)(m_pSyntax->m_FileLen - pos), kBufferSize);
    702     if (!m_pSyntax->GetFileAccess()->ReadBlock(buffer.data(), pos, size))
    703       break;
    704 
    705     for (uint32_t i = 0; i < size; i++) {
    706       uint8_t byte = buffer[i];
    707       switch (state) {
    708         case ParserState::kDefault:
    709           if (PDFCharIsWhitespace(byte)) {
    710             state = ParserState::kWhitespace;
    711           } else if (std::isdigit(byte)) {
    712             --i;
    713             state = ParserState::kWhitespace;
    714           } else if (byte == '%') {
    715             inside_index = 0;
    716             state = ParserState::kComment;
    717           } else if (byte == '(') {
    718             state = ParserState::kString;
    719             depth = 1;
    720           } else if (byte == '<') {
    721             inside_index = 1;
    722             state = ParserState::kHexString;
    723           } else if (byte == '\\') {
    724             state = ParserState::kEscapedString;
    725           } else if (byte == 't') {
    726             state = ParserState::kTrailer;
    727             inside_index = 1;
    728           }
    729           break;
    730 
    731         case ParserState::kWhitespace:
    732           if (std::isdigit(byte)) {
    733             start_pos = pos + i;
    734             state = ParserState::kObjNum;
    735             objnum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
    736           } else if (byte == 't') {
    737             state = ParserState::kTrailer;
    738             inside_index = 1;
    739           } else if (byte == 'x') {
    740             state = ParserState::kXref;
    741             inside_index = 1;
    742           } else if (!PDFCharIsWhitespace(byte)) {
    743             --i;
    744             state = ParserState::kDefault;
    745           }
    746           break;
    747 
    748         case ParserState::kObjNum:
    749           if (std::isdigit(byte)) {
    750             objnum = objnum * 10 +
    751                      FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
    752           } else if (PDFCharIsWhitespace(byte)) {
    753             state = ParserState::kPostObjNum;
    754           } else {
    755             --i;
    756             state = ParserState::kEndObj;
    757             inside_index = 0;
    758           }
    759           break;
    760 
    761         case ParserState::kPostObjNum:
    762           if (std::isdigit(byte)) {
    763             start_pos1 = pos + i;
    764             state = ParserState::kGenNum;
    765             gennum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
    766           } else if (byte == 't') {
    767             state = ParserState::kTrailer;
    768             inside_index = 1;
    769           } else if (!PDFCharIsWhitespace(byte)) {
    770             --i;
    771             state = ParserState::kDefault;
    772           }
    773           break;
    774 
    775         case ParserState::kGenNum:
    776           if (std::isdigit(byte)) {
    777             gennum = gennum * 10 +
    778                      FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
    779           } else if (PDFCharIsWhitespace(byte)) {
    780             state = ParserState::kPostGenNum;
    781           } else {
    782             --i;
    783             state = ParserState::kDefault;
    784           }
    785           break;
    786 
    787         case ParserState::kPostGenNum:
    788           if (byte == 'o') {
    789             state = ParserState::kBeginObj;
    790             inside_index = 1;
    791           } else if (std::isdigit(byte)) {
    792             objnum = gennum;
    793             gennum = FXSYS_DecimalCharToInt(static_cast<wchar_t>(byte));
    794             start_pos = start_pos1;
    795             start_pos1 = pos + i;
    796             state = ParserState::kGenNum;
    797           } else if (byte == 't') {
    798             state = ParserState::kTrailer;
    799             inside_index = 1;
    800           } else if (!PDFCharIsWhitespace(byte)) {
    801             --i;
    802             state = ParserState::kDefault;
    803           }
    804           break;
    805 
    806         case ParserState::kBeginObj:
    807           switch (inside_index) {
    808             case 1:
    809               if (byte != 'b') {
    810                 --i;
    811                 state = ParserState::kDefault;
    812               } else {
    813                 inside_index++;
    814               }
    815               break;
    816             case 2:
    817               if (byte != 'j') {
    818                 --i;
    819                 state = ParserState::kDefault;
    820               } else {
    821                 inside_index++;
    822               }
    823               break;
    824             case 3:
    825               if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
    826                 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset;
    827                 last_obj = start_pos;
    828                 FX_FILESIZE obj_end = 0;
    829                 std::unique_ptr<CPDF_Object> pObject =
    830                     ParseIndirectObjectAtByStrict(m_pDocument.Get(), obj_pos,
    831                                                   objnum, &obj_end);
    832                 if (CPDF_Stream* pStream = ToStream(pObject.get())) {
    833                   if (CPDF_Dictionary* pDict = pStream->GetDict()) {
    834                     if ((pDict->KeyExist("Type")) &&
    835                         (pDict->GetStringFor("Type") == "XRef" &&
    836                          pDict->KeyExist("Size"))) {
    837                       CPDF_Object* pRoot = pDict->GetObjectFor("Root");
    838                       if (pRoot && pRoot->GetDict() &&
    839                           pRoot->GetDict()->GetObjectFor("Pages")) {
    840                         m_TrailerData->SetMainTrailer(
    841                             ToDictionary(pDict->Clone()));
    842                       }
    843                     }
    844                   }
    845                 }
    846 
    847                 FX_FILESIZE offset = 0;
    848                 m_pSyntax->SetPos(obj_pos);
    849                 offset = m_pSyntax->FindTag("obj", 0);
    850                 if (offset == -1)
    851                   offset = 0;
    852                 else
    853                   offset += 3;
    854 
    855                 FX_FILESIZE nLen = obj_end - obj_pos - offset;
    856                 if ((uint32_t)nLen > size - i) {
    857                   pos = obj_end + m_pSyntax->m_HeaderOffset;
    858                   bOverFlow = true;
    859                 } else {
    860                   i += (uint32_t)nLen;
    861                 }
    862 
    863                 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
    864                     m_ObjectInfo[objnum].pos) {
    865                   if (pObject) {
    866                     m_ObjectInfo[objnum].pos = obj_pos;
    867                     m_ObjectInfo[objnum].gennum = gennum;
    868                   }
    869                 } else {
    870                   m_ObjectInfo[objnum].pos = obj_pos;
    871                   m_ObjectInfo[objnum].type = ObjectType::kNotCompressed;
    872                   m_ObjectInfo[objnum].gennum = gennum;
    873                 }
    874               }
    875               --i;
    876               state = ParserState::kDefault;
    877               break;
    878           }
    879           break;
    880 
    881         case ParserState::kTrailer:
    882           if (inside_index == 7) {
    883             if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
    884               last_trailer = pos + i - 7;
    885               m_pSyntax->SetPos(pos + i - m_pSyntax->m_HeaderOffset);
    886 
    887               std::unique_ptr<CPDF_Object> pObj =
    888                   m_pSyntax->GetObjectBody(m_pDocument.Get());
    889               if (pObj) {
    890                 if (pObj->IsDictionary() || pObj->AsStream()) {
    891                   CPDF_Stream* pStream = pObj->AsStream();
    892                   if (CPDF_Dictionary* pTrailer =
    893                           pStream ? pStream->GetDict() : pObj->AsDictionary()) {
    894                     if (GetTrailer()) {
    895                       CPDF_Object* pRoot = pTrailer->GetObjectFor("Root");
    896                       CPDF_Reference* pRef = ToReference(pRoot);
    897                       if (!pRoot ||
    898                           (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
    899                            m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
    900                         auto it = pTrailer->begin();
    901                         while (it != pTrailer->end()) {
    902                           const ByteString& key = it->first;
    903                           CPDF_Object* pElement = it->second.get();
    904                           ++it;
    905                           uint32_t dwObjNum =
    906                               pElement ? pElement->GetObjNum() : 0;
    907                           if (dwObjNum) {
    908                             GetTrailer()->SetNewFor<CPDF_Reference>(
    909                                 key, m_pDocument.Get(), dwObjNum);
    910                           } else {
    911                             GetTrailer()->SetFor(key, pElement->Clone());
    912                           }
    913                         }
    914                       }
    915                     } else {
    916                       m_TrailerData->SetMainTrailer(
    917                           ToDictionary(pObj->IsStream() ? pTrailer->Clone()
    918                                                         : std::move(pObj)));
    919 
    920                       FX_FILESIZE dwSavePos = m_pSyntax->GetPos();
    921                       ByteString strWord = m_pSyntax->GetKeyword();
    922                       if (!strWord.Compare("startxref")) {
    923                         bool bNumber;
    924                         ByteString bsOffset = m_pSyntax->GetNextWord(&bNumber);
    925                         if (bNumber)
    926                           m_LastXRefOffset = FXSYS_atoi(bsOffset.c_str());
    927                       }
    928                       m_pSyntax->SetPos(dwSavePos);
    929                     }
    930                   }
    931                 }
    932               }
    933             }
    934             --i;
    935             state = ParserState::kDefault;
    936           } else if (byte == "trailer"[inside_index]) {
    937             inside_index++;
    938           } else {
    939             --i;
    940             state = ParserState::kDefault;
    941           }
    942           break;
    943 
    944         case ParserState::kXref:
    945           if (inside_index == 4) {
    946             last_xref = pos + i - 4;
    947             state = ParserState::kWhitespace;
    948           } else if (byte == "xref"[inside_index]) {
    949             inside_index++;
    950           } else {
    951             --i;
    952             state = ParserState::kDefault;
    953           }
    954           break;
    955 
    956         case ParserState::kComment:
    957           if (PDFCharIsLineEnding(byte))
    958             state = ParserState::kDefault;
    959           break;
    960 
    961         case ParserState::kString:
    962           if (byte == ')') {
    963             if (depth > 0)
    964               depth--;
    965           } else if (byte == '(') {
    966             depth++;
    967           }
    968 
    969           if (!depth)
    970             state = ParserState::kDefault;
    971           break;
    972 
    973         case ParserState::kHexString:
    974           if (byte == '>' || (byte == '<' && inside_index == 1))
    975             state = ParserState::kDefault;
    976           inside_index = 0;
    977           break;
    978 
    979         case ParserState::kEscapedString:
    980           if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
    981             --i;
    982             state = ParserState::kDefault;
    983           }
    984           break;
    985 
    986         case ParserState::kEndObj:
    987           if (PDFCharIsWhitespace(byte)) {
    988             state = ParserState::kDefault;
    989           } else if (byte == '%' || byte == '(' || byte == '<' ||
    990                      byte == '\\') {
    991             state = ParserState::kDefault;
    992             --i;
    993           } else if (inside_index == 6) {
    994             state = ParserState::kDefault;
    995             --i;
    996           } else if (byte == "endobj"[inside_index]) {
    997             inside_index++;
    998           }
    999           break;
   1000       }
   1001 
   1002       if (bOverFlow) {
   1003         size = 0;
   1004         break;
   1005       }
   1006     }
   1007     pos += size;
   1008 
   1009     // If the position has not changed at all or went backwards in a loop
   1010     // iteration, then break out to prevent infinite looping.
   1011     if (pos <= saved_pos)
   1012       break;
   1013   }
   1014 
   1015   if (last_xref != -1 && last_xref > last_obj)
   1016     last_trailer = last_xref;
   1017   else if (last_trailer == -1 || last_xref < last_obj)
   1018     last_trailer = m_pSyntax->m_FileLen;
   1019 
   1020   return GetTrailer() && !m_ObjectInfo.empty();
   1021 }
   1022 
   1023 bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) {
   1024   std::unique_ptr<CPDF_Object> pObject(
   1025       ParseIndirectObjectAt(m_pDocument.Get(), *pos, 0));
   1026   if (!pObject)
   1027     return false;
   1028 
   1029   uint32_t objnum = pObject->GetObjNum();
   1030   if (!objnum)
   1031     return false;
   1032 
   1033   CPDF_Object* pUnownedObject = pObject.get();
   1034   if (m_pDocument) {
   1035     const CPDF_Dictionary* pRootDict = m_pDocument->GetRoot();
   1036     if (pRootDict && pRootDict->GetObjNum() == objnum)
   1037       return false;
   1038     if (!m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
   1039             objnum, std::move(pObject))) {
   1040       return false;
   1041     }
   1042   }
   1043 
   1044   CPDF_Stream* pStream = pUnownedObject->AsStream();
   1045   if (!pStream)
   1046     return false;
   1047 
   1048   CPDF_Dictionary* pDict = pStream->GetDict();
   1049   *pos = pDict->GetIntegerFor("Prev");
   1050   int32_t size = pDict->GetIntegerFor("Size");
   1051   if (size < 0)
   1052     return false;
   1053 
   1054   std::unique_ptr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone());
   1055   if (bMainXRef) {
   1056     m_TrailerData->SetMainTrailer(std::move(pNewTrailer));
   1057     ShrinkObjectMap(size);
   1058     for (auto& it : m_ObjectInfo)
   1059       it.second.type = ObjectType::kFree;
   1060   } else {
   1061     m_TrailerData->AppendTrailer(std::move(pNewTrailer));
   1062   }
   1063 
   1064   std::vector<std::pair<int32_t, int32_t>> arrIndex;
   1065   CPDF_Array* pArray = pDict->GetArrayFor("Index");
   1066   if (pArray) {
   1067     for (size_t i = 0; i < pArray->GetCount() / 2; i++) {
   1068       CPDF_Object* pStartNumObj = pArray->GetObjectAt(i * 2);
   1069       CPDF_Object* pCountObj = pArray->GetObjectAt(i * 2 + 1);
   1070 
   1071       if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
   1072         int nStartNum = pStartNumObj->GetInteger();
   1073         int nCount = pCountObj->GetInteger();
   1074         if (nStartNum >= 0 && nCount > 0)
   1075           arrIndex.push_back(std::make_pair(nStartNum, nCount));
   1076       }
   1077     }
   1078   }
   1079 
   1080   if (arrIndex.size() == 0)
   1081     arrIndex.push_back(std::make_pair(0, size));
   1082 
   1083   pArray = pDict->GetArrayFor("W");
   1084   if (!pArray)
   1085     return false;
   1086 
   1087   std::vector<uint32_t> WidthArray;
   1088   FX_SAFE_UINT32 dwAccWidth = 0;
   1089   for (size_t i = 0; i < pArray->GetCount(); ++i) {
   1090     WidthArray.push_back(pArray->GetIntegerAt(i));
   1091     dwAccWidth += WidthArray[i];
   1092   }
   1093 
   1094   if (!dwAccWidth.IsValid() || WidthArray.size() < 3)
   1095     return false;
   1096 
   1097   uint32_t totalWidth = dwAccWidth.ValueOrDie();
   1098   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
   1099   pAcc->LoadAllDataFiltered();
   1100 
   1101   const uint8_t* pData = pAcc->GetData();
   1102   uint32_t dwTotalSize = pAcc->GetSize();
   1103   uint32_t segindex = 0;
   1104   for (uint32_t i = 0; i < arrIndex.size(); i++) {
   1105     int32_t startnum = arrIndex[i].first;
   1106     if (startnum < 0)
   1107       continue;
   1108 
   1109     uint32_t count = pdfium::base::checked_cast<uint32_t>(arrIndex[i].second);
   1110     FX_SAFE_UINT32 dwCaculatedSize = segindex;
   1111     dwCaculatedSize += count;
   1112     dwCaculatedSize *= totalWidth;
   1113     if (!dwCaculatedSize.IsValid() ||
   1114         dwCaculatedSize.ValueOrDie() > dwTotalSize) {
   1115       continue;
   1116     }
   1117 
   1118     const uint8_t* segstart = pData + segindex * totalWidth;
   1119     FX_SAFE_UINT32 dwMaxObjNum = startnum;
   1120     dwMaxObjNum += count;
   1121     uint32_t dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1;
   1122     if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size)
   1123       continue;
   1124 
   1125     for (uint32_t j = 0; j < count; j++) {
   1126       ObjectType type = ObjectType::kNotCompressed;
   1127       const uint8_t* entrystart = segstart + j * totalWidth;
   1128       if (WidthArray[0]) {
   1129         const int cross_ref_stream_obj_type =
   1130             GetVarInt(entrystart, WidthArray[0]);
   1131         type = GetObjectTypeFromCrossRefStreamType(cross_ref_stream_obj_type);
   1132       }
   1133 
   1134       if (GetObjectType(startnum + j) == ObjectType::kNull) {
   1135         FX_FILESIZE offset =
   1136             GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
   1137         m_ObjectInfo[startnum + j].pos = offset;
   1138         continue;
   1139       }
   1140 
   1141       if (GetObjectType(startnum + j) != ObjectType::kFree)
   1142         continue;
   1143 
   1144       ObjectInfo& info = m_ObjectInfo[startnum + j];
   1145 
   1146       info.type = type;
   1147       if (type == ObjectType::kFree) {
   1148         info.pos = 0;
   1149       } else {
   1150         const FX_FILESIZE entry_value =
   1151             GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
   1152         if (type == ObjectType::kNotCompressed) {
   1153           const auto object_offset = entry_value;
   1154           info.pos = object_offset;
   1155         } else {
   1156           const auto archive_obj_num = entry_value;
   1157           info.archive_obj_num = archive_obj_num;
   1158           if (archive_obj_num < 0 || !IsValidObjectNumber(archive_obj_num))
   1159             return false;
   1160           m_ObjectInfo[archive_obj_num].type = ObjectType::kNull;
   1161         }
   1162       }
   1163     }
   1164     segindex += count;
   1165   }
   1166   return true;
   1167 }
   1168 
   1169 const CPDF_Array* CPDF_Parser::GetIDArray() const {
   1170   return GetTrailer() ? GetTrailer()->GetArrayFor("ID") : nullptr;
   1171 }
   1172 
   1173 CPDF_Dictionary* CPDF_Parser::GetTrailer() const {
   1174   return m_TrailerData->GetMainTrailer();
   1175 }
   1176 
   1177 std::unique_ptr<CPDF_Dictionary> CPDF_Parser::GetCombinedTrailer() const {
   1178   return m_TrailerData->GetCombinedTrailer();
   1179 }
   1180 
   1181 uint32_t CPDF_Parser::GetInfoObjNum() {
   1182   return m_TrailerData->GetInfoObjNum();
   1183 }
   1184 
   1185 uint32_t CPDF_Parser::GetRootObjNum() {
   1186   return m_TrailerData->GetRootObjNum();
   1187 }
   1188 
   1189 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObject(
   1190     CPDF_IndirectObjectHolder* pObjList,
   1191     uint32_t objnum) {
   1192   if (!IsValidObjectNumber(objnum))
   1193     return nullptr;
   1194 
   1195   // Prevent circular parsing the same object.
   1196   if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
   1197     return nullptr;
   1198 
   1199   pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum);
   1200   if (GetObjectType(objnum) == ObjectType::kNotCompressed ||
   1201       GetObjectType(objnum) == ObjectType::kNull) {
   1202     FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
   1203     if (pos <= 0)
   1204       return nullptr;
   1205     return ParseIndirectObjectAt(pObjList, pos, objnum);
   1206   }
   1207   if (GetObjectType(objnum) != ObjectType::kCompressed)
   1208     return nullptr;
   1209 
   1210   RetainPtr<CPDF_StreamAcc> pObjStream =
   1211       GetObjectStream(m_ObjectInfo[objnum].pos);
   1212   if (!pObjStream)
   1213     return nullptr;
   1214 
   1215   auto file = pdfium::MakeRetain<CFX_MemoryStream>(
   1216       const_cast<uint8_t*>(pObjStream->GetData()),
   1217       static_cast<size_t>(pObjStream->GetSize()), false);
   1218   CPDF_SyntaxParser syntax;
   1219   syntax.InitParser(file, 0);
   1220   const int32_t offset = GetStreamFirst(pObjStream);
   1221 
   1222   // Read object numbers from |pObjStream| into a cache.
   1223   if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) {
   1224     for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) {
   1225       uint32_t thisnum = syntax.GetDirectNum();
   1226       uint32_t thisoff = syntax.GetDirectNum();
   1227       m_ObjCache[pObjStream][thisnum] = thisoff;
   1228     }
   1229   }
   1230 
   1231   const auto it = m_ObjCache[pObjStream].find(objnum);
   1232   if (it == m_ObjCache[pObjStream].end())
   1233     return nullptr;
   1234 
   1235   syntax.SetPos(offset + it->second);
   1236   return syntax.GetObjectBody(pObjList);
   1237 }
   1238 
   1239 RetainPtr<CPDF_StreamAcc> CPDF_Parser::GetObjectStream(uint32_t objnum) {
   1240   auto it = m_ObjectStreamMap.find(objnum);
   1241   if (it != m_ObjectStreamMap.end())
   1242     return it->second;
   1243 
   1244   if (!m_pDocument)
   1245     return nullptr;
   1246 
   1247   const CPDF_Stream* pStream =
   1248       ToStream(m_pDocument->GetOrParseIndirectObject(objnum));
   1249   if (!pStream)
   1250     return nullptr;
   1251 
   1252   auto pStreamAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pStream);
   1253   pStreamAcc->LoadAllDataFiltered();
   1254   m_ObjectStreamMap[objnum] = pStreamAcc;
   1255   return pStreamAcc;
   1256 }
   1257 
   1258 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt(
   1259     CPDF_IndirectObjectHolder* pObjList,
   1260     FX_FILESIZE pos,
   1261     uint32_t objnum) {
   1262   return ParseIndirectObjectAtInternal(
   1263       pObjList, pos, objnum, CPDF_SyntaxParser::ParseType::kLoose, nullptr);
   1264 }
   1265 
   1266 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAtInternal(
   1267     CPDF_IndirectObjectHolder* pObjList,
   1268     FX_FILESIZE pos,
   1269     uint32_t objnum,
   1270     CPDF_SyntaxParser::ParseType parse_type,
   1271     FX_FILESIZE* pResultPos) {
   1272   const FX_FILESIZE saved_pos = m_pSyntax->GetPos();
   1273   m_pSyntax->SetPos(pos);
   1274   auto result = m_pSyntax->GetIndirectObject(pObjList, parse_type);
   1275 
   1276   if (pResultPos)
   1277     *pResultPos = m_pSyntax->GetPos();
   1278   m_pSyntax->SetPos(saved_pos);
   1279 
   1280   if (result && objnum && result->GetObjNum() != objnum)
   1281     return nullptr;
   1282 
   1283   const bool should_decrypt = m_pSecurityHandler &&
   1284                               m_pSecurityHandler->GetCryptoHandler() &&
   1285                               objnum != m_MetadataObjnum;
   1286   if (should_decrypt)
   1287     result = m_pSecurityHandler->GetCryptoHandler()->DecryptObjectTree(
   1288         std::move(result));
   1289 
   1290   return result;
   1291 }
   1292 
   1293 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAtByStrict(
   1294     CPDF_IndirectObjectHolder* pObjList,
   1295     FX_FILESIZE pos,
   1296     uint32_t objnum,
   1297     FX_FILESIZE* pResultPos) {
   1298   return ParseIndirectObjectAtInternal(
   1299       pObjList, pos, objnum, CPDF_SyntaxParser::ParseType::kStrict, pResultPos);
   1300 }
   1301 
   1302 uint32_t CPDF_Parser::GetFirstPageNo() const {
   1303   return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
   1304 }
   1305 
   1306 std::unique_ptr<CPDF_Dictionary> CPDF_Parser::LoadTrailerV4() {
   1307   if (m_pSyntax->GetKeyword() != "trailer")
   1308     return nullptr;
   1309 
   1310   return ToDictionary(m_pSyntax->GetObjectBody(m_pDocument.Get()));
   1311 }
   1312 
   1313 uint32_t CPDF_Parser::GetPermissions() const {
   1314   if (!m_pSecurityHandler)
   1315     return 0xFFFFFFFF;
   1316 
   1317   uint32_t dwPermission = m_pSecurityHandler->GetPermissions();
   1318   if (m_pEncryptDict && m_pEncryptDict->GetStringFor("Filter") == "Standard") {
   1319     // See PDF Reference 1.7, page 123, table 3.20.
   1320     dwPermission &= 0xFFFFFFFC;
   1321     dwPermission |= 0xFFFFF0C0;
   1322   }
   1323   return dwPermission;
   1324 }
   1325 
   1326 std::unique_ptr<CPDF_LinearizedHeader> CPDF_Parser::ParseLinearizedHeader() {
   1327   return CPDF_LinearizedHeader::Parse(m_pSyntax.get());
   1328 }
   1329 
   1330 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
   1331     const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
   1332     CPDF_Document* pDocument) {
   1333   ASSERT(!m_bHasParsed);
   1334   m_bXRefStream = false;
   1335   m_LastXRefOffset = 0;
   1336 
   1337   if (!InitSyntaxParser(pFileAccess))
   1338     return FORMAT_ERROR;
   1339 
   1340   m_pLinearized = ParseLinearizedHeader();
   1341   if (!m_pLinearized)
   1342     return StartParseInternal(std::move(pDocument));
   1343 
   1344   m_bHasParsed = true;
   1345   m_pDocument = pDocument;
   1346 
   1347   m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
   1348   FX_FILESIZE dwFirstXRefOffset = m_LastXRefOffset;
   1349   bool bXRefRebuilt = false;
   1350   bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, false);
   1351   if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) {
   1352     if (!RebuildCrossRef())
   1353       return FORMAT_ERROR;
   1354 
   1355     bXRefRebuilt = true;
   1356     m_LastXRefOffset = 0;
   1357   }
   1358   if (bLoadV4) {
   1359     std::unique_ptr<CPDF_Dictionary> trailer = LoadTrailerV4();
   1360     if (!trailer)
   1361       return SUCCESS;
   1362 
   1363     m_TrailerData->SetMainTrailer(std::move(trailer));
   1364     int32_t xrefsize = GetDirectInteger(GetTrailer(), "Size");
   1365     if (xrefsize > 0)
   1366       ShrinkObjectMap(xrefsize);
   1367   }
   1368 
   1369   Error eRet = SetEncryptHandler();
   1370   if (eRet != SUCCESS)
   1371     return eRet;
   1372 
   1373   m_pDocument->LoadLinearizedDoc(m_pLinearized.get());
   1374   if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
   1375     if (bXRefRebuilt)
   1376       return FORMAT_ERROR;
   1377 
   1378     ReleaseEncryptHandler();
   1379     if (!RebuildCrossRef())
   1380       return FORMAT_ERROR;
   1381 
   1382     eRet = SetEncryptHandler();
   1383     if (eRet != SUCCESS)
   1384       return eRet;
   1385 
   1386     m_pDocument->LoadLinearizedDoc(m_pLinearized.get());
   1387     if (!m_pDocument->GetRoot())
   1388       return FORMAT_ERROR;
   1389   }
   1390 
   1391   if (GetRootObjNum() == 0) {
   1392     ReleaseEncryptHandler();
   1393     if (!RebuildCrossRef() || GetRootObjNum() == 0)
   1394       return FORMAT_ERROR;
   1395 
   1396     eRet = SetEncryptHandler();
   1397     if (eRet != SUCCESS)
   1398       return eRet;
   1399   }
   1400 
   1401   if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
   1402     if (CPDF_Reference* pMetadata =
   1403             ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata")))
   1404       m_MetadataObjnum = pMetadata->GetRefObjNum();
   1405   }
   1406   return SUCCESS;
   1407 }
   1408 
   1409 bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
   1410   if (!LoadCrossRefV5(&xrefpos, false))
   1411     return false;
   1412 
   1413   std::set<FX_FILESIZE> seen_xrefpos;
   1414   while (xrefpos) {
   1415     seen_xrefpos.insert(xrefpos);
   1416     if (!LoadCrossRefV5(&xrefpos, false))
   1417       return false;
   1418 
   1419     // Check for circular references.
   1420     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
   1421       return false;
   1422   }
   1423   m_ObjectStreamMap.clear();
   1424   m_bXRefStream = true;
   1425   return true;
   1426 }
   1427 
   1428 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
   1429   const FX_SAFE_FILESIZE main_xref_offset = GetTrailer()->GetIntegerFor("Prev");
   1430   if (!main_xref_offset.IsValid())
   1431     return FORMAT_ERROR;
   1432 
   1433   if (main_xref_offset.ValueOrDie() == 0)
   1434     return SUCCESS;
   1435 
   1436   const AutoRestorer<uint32_t> save_metadata_objnum(&m_MetadataObjnum);
   1437   m_MetadataObjnum = 0;
   1438   m_ObjectStreamMap.clear();
   1439   m_ObjCache.clear();
   1440 
   1441   if (!LoadLinearizedAllCrossRefV4(main_xref_offset.ValueOrDie()) &&
   1442       !LoadLinearizedAllCrossRefV5(main_xref_offset.ValueOrDie())) {
   1443     m_LastXRefOffset = 0;
   1444     return FORMAT_ERROR;
   1445   }
   1446 
   1447   return SUCCESS;
   1448 }
   1449 
   1450 CPDF_Parser::ObjectType CPDF_Parser::GetObjectTypeFromCrossRefStreamType(
   1451     int cross_ref_stream_type) const {
   1452   switch (cross_ref_stream_type) {
   1453     case 0:
   1454       return CPDF_Parser::ObjectType::kFree;
   1455     case 1:
   1456       return CPDF_Parser::ObjectType::kNotCompressed;
   1457     case 2:
   1458       return CPDF_Parser::ObjectType::kCompressed;
   1459     default:
   1460       return CPDF_Parser::ObjectType::kNull;
   1461   }
   1462 }
   1463