Home | History | Annotate | Download | only in parser
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/parser/cpdf_parser.h"
      8 
      9 #include <algorithm>
     10 #include <utility>
     11 #include <vector>
     12 
     13 #include "core/fpdfapi/parser/cpdf_array.h"
     14 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
     15 #include "core/fpdfapi/parser/cpdf_dictionary.h"
     16 #include "core/fpdfapi/parser/cpdf_document.h"
     17 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
     18 #include "core/fpdfapi/parser/cpdf_number.h"
     19 #include "core/fpdfapi/parser/cpdf_reference.h"
     20 #include "core/fpdfapi/parser/cpdf_security_handler.h"
     21 #include "core/fpdfapi/parser/cpdf_stream.h"
     22 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
     23 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
     24 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
     25 #include "core/fxcrt/fx_ext.h"
     26 #include "core/fxcrt/fx_safe_types.h"
     27 #include "third_party/base/ptr_util.h"
     28 #include "third_party/base/stl_util.h"
     29 
     30 namespace {
     31 
     32 // A limit on the size of the xref table. Theoretical limits are higher, but
     33 // this may be large enough in practice.
     34 const int32_t kMaxXRefSize = 1048576;
     35 
     36 uint32_t GetVarInt(const uint8_t* p, int32_t n) {
     37   uint32_t result = 0;
     38   for (int32_t i = 0; i < n; ++i)
     39     result = result * 256 + p[i];
     40   return result;
     41 }
     42 
     43 int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) {
     44   return pObjStream->GetDict()->GetIntegerFor("N");
     45 }
     46 
     47 int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) {
     48   return pObjStream->GetDict()->GetIntegerFor("First");
     49 }
     50 
     51 }  // namespace
     52 
     53 CPDF_Parser::CPDF_Parser()
     54     : m_pDocument(nullptr),
     55       m_bHasParsed(false),
     56       m_bXRefStream(false),
     57       m_bVersionUpdated(false),
     58       m_FileVersion(0),
     59       m_pEncryptDict(nullptr),
     60       m_dwXrefStartObjNum(0) {
     61   m_pSyntax = pdfium::MakeUnique<CPDF_SyntaxParser>();
     62 }
     63 
     64 CPDF_Parser::~CPDF_Parser() {
     65   ReleaseEncryptHandler();
     66   SetEncryptDictionary(nullptr);
     67 }
     68 
     69 uint32_t CPDF_Parser::GetLastObjNum() const {
     70   return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first;
     71 }
     72 
     73 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const {
     74   return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first;
     75 }
     76 
     77 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const {
     78   auto it = m_ObjectInfo.find(objnum);
     79   return it != m_ObjectInfo.end() ? it->second.pos : 0;
     80 }
     81 
     82 uint8_t CPDF_Parser::GetObjectType(uint32_t objnum) const {
     83   ASSERT(IsValidObjectNumber(objnum));
     84   auto it = m_ObjectInfo.find(objnum);
     85   return it != m_ObjectInfo.end() ? it->second.type : 0;
     86 }
     87 
     88 uint16_t CPDF_Parser::GetObjectGenNum(uint32_t objnum) const {
     89   ASSERT(IsValidObjectNumber(objnum));
     90   auto it = m_ObjectInfo.find(objnum);
     91   return it != m_ObjectInfo.end() ? it->second.gennum : 0;
     92 }
     93 
     94 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const {
     95   uint8_t type = GetObjectType(objnum);
     96   return type == 0 || type == 255;
     97 }
     98 
     99 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) {
    100   m_pEncryptDict = pDict;
    101 }
    102 
    103 CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() {
    104   return m_pSyntax->m_pCryptoHandler.get();
    105 }
    106 
    107 CFX_RetainPtr<IFX_SeekableReadStream> CPDF_Parser::GetFileAccess() const {
    108   return m_pSyntax->m_pFileAccess;
    109 }
    110 
    111 void CPDF_Parser::ShrinkObjectMap(uint32_t objnum) {
    112   if (objnum == 0) {
    113     m_ObjectInfo.clear();
    114     return;
    115   }
    116 
    117   auto it = m_ObjectInfo.lower_bound(objnum);
    118   while (it != m_ObjectInfo.end()) {
    119     auto saved_it = it++;
    120     m_ObjectInfo.erase(saved_it);
    121   }
    122 
    123   if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1))
    124     m_ObjectInfo[objnum - 1].pos = 0;
    125 }
    126 
    127 CPDF_Parser::Error CPDF_Parser::StartParse(
    128     const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess,
    129     CPDF_Document* pDocument) {
    130   ASSERT(!m_bHasParsed);
    131   m_bHasParsed = true;
    132   m_bXRefStream = false;
    133   m_LastXRefOffset = 0;
    134 
    135   int32_t offset = GetHeaderOffset(pFileAccess);
    136   if (offset == -1)
    137     return FORMAT_ERROR;
    138 
    139   m_pSyntax->InitParser(pFileAccess, offset);
    140 
    141   uint8_t ch;
    142   if (!m_pSyntax->GetCharAt(5, ch))
    143     return FORMAT_ERROR;
    144 
    145   if (std::isdigit(ch))
    146     m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10;
    147 
    148   if (!m_pSyntax->GetCharAt(7, ch))
    149     return FORMAT_ERROR;
    150 
    151   if (std::isdigit(ch))
    152     m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
    153 
    154   if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9)
    155     return FORMAT_ERROR;
    156 
    157   m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9);
    158   m_pDocument = pDocument;
    159 
    160   bool bXRefRebuilt = false;
    161   if (m_pSyntax->SearchWord("startxref", true, false, 4096)) {
    162     m_SortedOffset.insert(m_pSyntax->SavePos());
    163     m_pSyntax->GetKeyword();
    164 
    165     bool bNumber;
    166     CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber);
    167     if (!bNumber)
    168       return FORMAT_ERROR;
    169 
    170     m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str.c_str());
    171     if (!LoadAllCrossRefV4(m_LastXRefOffset) &&
    172         !LoadAllCrossRefV5(m_LastXRefOffset)) {
    173       if (!RebuildCrossRef())
    174         return FORMAT_ERROR;
    175 
    176       bXRefRebuilt = true;
    177       m_LastXRefOffset = 0;
    178     }
    179   } else {
    180     if (!RebuildCrossRef())
    181       return FORMAT_ERROR;
    182 
    183     bXRefRebuilt = true;
    184   }
    185   Error eRet = SetEncryptHandler();
    186   if (eRet != SUCCESS)
    187     return eRet;
    188 
    189   m_pDocument->LoadDoc();
    190   if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
    191     if (bXRefRebuilt)
    192       return FORMAT_ERROR;
    193 
    194     ReleaseEncryptHandler();
    195     if (!RebuildCrossRef())
    196       return FORMAT_ERROR;
    197 
    198     eRet = SetEncryptHandler();
    199     if (eRet != SUCCESS)
    200       return eRet;
    201 
    202     m_pDocument->LoadDoc();
    203     if (!m_pDocument->GetRoot())
    204       return FORMAT_ERROR;
    205   }
    206   if (GetRootObjNum() == 0) {
    207     ReleaseEncryptHandler();
    208     if (!RebuildCrossRef() || GetRootObjNum() == 0)
    209       return FORMAT_ERROR;
    210 
    211     eRet = SetEncryptHandler();
    212     if (eRet != SUCCESS)
    213       return eRet;
    214   }
    215   if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) {
    216     CPDF_Reference* pMetadata =
    217         ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata"));
    218     if (pMetadata)
    219       m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
    220   }
    221   return SUCCESS;
    222 }
    223 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() {
    224   ReleaseEncryptHandler();
    225   SetEncryptDictionary(nullptr);
    226 
    227   if (!m_pTrailer)
    228     return FORMAT_ERROR;
    229 
    230   CPDF_Object* pEncryptObj = m_pTrailer->GetObjectFor("Encrypt");
    231   if (pEncryptObj) {
    232     if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) {
    233       SetEncryptDictionary(pEncryptDict);
    234     } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) {
    235       pEncryptObj = m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum());
    236       if (pEncryptObj)
    237         SetEncryptDictionary(pEncryptObj->GetDict());
    238     }
    239   }
    240 
    241   if (m_pEncryptDict) {
    242     CFX_ByteString filter = m_pEncryptDict->GetStringFor("Filter");
    243     std::unique_ptr<CPDF_SecurityHandler> pSecurityHandler;
    244     Error err = HANDLER_ERROR;
    245     if (filter == "Standard") {
    246       pSecurityHandler = pdfium::MakeUnique<CPDF_SecurityHandler>();
    247       err = PASSWORD_ERROR;
    248     }
    249     if (!pSecurityHandler)
    250       return HANDLER_ERROR;
    251 
    252     if (!pSecurityHandler->OnInit(this, m_pEncryptDict))
    253       return err;
    254 
    255     m_pSecurityHandler = std::move(pSecurityHandler);
    256     std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler(
    257         m_pSecurityHandler->CreateCryptoHandler());
    258     if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get()))
    259       return HANDLER_ERROR;
    260     m_pSyntax->SetEncrypt(std::move(pCryptoHandler));
    261   }
    262   return SUCCESS;
    263 }
    264 
    265 void CPDF_Parser::ReleaseEncryptHandler() {
    266   m_pSyntax->m_pCryptoHandler.reset();
    267   m_pSecurityHandler.reset();
    268 }
    269 
    270 FX_FILESIZE CPDF_Parser::GetObjectOffset(uint32_t objnum) const {
    271   if (!IsValidObjectNumber(objnum))
    272     return 0;
    273 
    274   if (GetObjectType(objnum) == 1)
    275     return GetObjectPositionOrZero(objnum);
    276 
    277   if (GetObjectType(objnum) == 2) {
    278     FX_FILESIZE pos = GetObjectPositionOrZero(objnum);
    279     return GetObjectPositionOrZero(pos);
    280   }
    281   return 0;
    282 }
    283 
    284 // Ideally, all the cross reference entries should be verified.
    285 // In reality, we rarely see well-formed cross references don't match
    286 // with the objects. crbug/602650 showed a case where object numbers
    287 // in the cross reference table are all off by one.
    288 bool CPDF_Parser::VerifyCrossRefV4() {
    289   for (const auto& it : m_ObjectInfo) {
    290     if (it.second.pos == 0)
    291       continue;
    292     // Find the first non-zero position.
    293     FX_FILESIZE SavedPos = m_pSyntax->SavePos();
    294     m_pSyntax->RestorePos(it.second.pos);
    295     bool is_num = false;
    296     CFX_ByteString num_str = m_pSyntax->GetNextWord(&is_num);
    297     m_pSyntax->RestorePos(SavedPos);
    298     if (!is_num || num_str.IsEmpty() ||
    299         FXSYS_atoui(num_str.c_str()) != it.first) {
    300       // If the object number read doesn't match the one stored,
    301       // something is wrong with the cross reference table.
    302       return false;
    303     } else {
    304       return true;
    305     }
    306   }
    307   return true;
    308 }
    309 
    310 bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) {
    311   if (!LoadCrossRefV4(xrefpos, 0, true))
    312     return false;
    313 
    314   m_pTrailer = LoadTrailerV4();
    315   if (!m_pTrailer)
    316     return false;
    317 
    318   int32_t xrefsize = GetDirectInteger(m_pTrailer.get(), "Size");
    319   if (xrefsize > 0 && xrefsize <= kMaxXRefSize)
    320     ShrinkObjectMap(xrefsize);
    321 
    322   std::vector<FX_FILESIZE> CrossRefList;
    323   std::vector<FX_FILESIZE> XRefStreamList;
    324   std::set<FX_FILESIZE> seen_xrefpos;
    325 
    326   CrossRefList.push_back(xrefpos);
    327   XRefStreamList.push_back(GetDirectInteger(m_pTrailer.get(), "XRefStm"));
    328   seen_xrefpos.insert(xrefpos);
    329 
    330   // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not
    331   // numerical, GetDirectInteger() returns 0. Loading will end.
    332   xrefpos = GetDirectInteger(m_pTrailer.get(), "Prev");
    333   while (xrefpos) {
    334     // Check for circular references.
    335     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
    336       return false;
    337 
    338     seen_xrefpos.insert(xrefpos);
    339 
    340     // SLOW ...
    341     CrossRefList.insert(CrossRefList.begin(), xrefpos);
    342     LoadCrossRefV4(xrefpos, 0, true);
    343 
    344     std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4());
    345     if (!pDict)
    346       return false;
    347 
    348     xrefpos = GetDirectInteger(pDict.get(), "Prev");
    349 
    350     // SLOW ...
    351     XRefStreamList.insert(XRefStreamList.begin(),
    352                           pDict->GetIntegerFor("XRefStm"));
    353     m_Trailers.push_back(std::move(pDict));
    354   }
    355 
    356   for (size_t i = 0; i < CrossRefList.size(); ++i) {
    357     if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], false))
    358       return false;
    359     if (i == 0 && !VerifyCrossRefV4())
    360       return false;
    361   }
    362   return true;
    363 }
    364 
    365 bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos,
    366                                               uint32_t dwObjCount) {
    367   if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount))
    368     return false;
    369 
    370   m_pTrailer = LoadTrailerV4();
    371   if (!m_pTrailer)
    372     return false;
    373 
    374   int32_t xrefsize = GetDirectInteger(m_pTrailer.get(), "Size");
    375   if (xrefsize == 0)
    376     return false;
    377 
    378   std::vector<FX_FILESIZE> CrossRefList;
    379   std::vector<FX_FILESIZE> XRefStreamList;
    380   std::set<FX_FILESIZE> seen_xrefpos;
    381 
    382   CrossRefList.push_back(xrefpos);
    383   XRefStreamList.push_back(GetDirectInteger(m_pTrailer.get(), "XRefStm"));
    384   seen_xrefpos.insert(xrefpos);
    385 
    386   xrefpos = GetDirectInteger(m_pTrailer.get(), "Prev");
    387   while (xrefpos) {
    388     // Check for circular references.
    389     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
    390       return false;
    391 
    392     seen_xrefpos.insert(xrefpos);
    393 
    394     // SLOW ...
    395     CrossRefList.insert(CrossRefList.begin(), xrefpos);
    396     LoadCrossRefV4(xrefpos, 0, true);
    397 
    398     std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4());
    399     if (!pDict)
    400       return false;
    401 
    402     xrefpos = GetDirectInteger(pDict.get(), "Prev");
    403 
    404     // SLOW ...
    405     XRefStreamList.insert(XRefStreamList.begin(),
    406                           pDict->GetIntegerFor("XRefStm"));
    407     m_Trailers.push_back(std::move(pDict));
    408   }
    409 
    410   for (size_t i = 1; i < CrossRefList.size(); ++i) {
    411     if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], false))
    412       return false;
    413   }
    414   return true;
    415 }
    416 
    417 bool CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos,
    418                                            uint32_t dwObjCount) {
    419   FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset;
    420 
    421   m_pSyntax->RestorePos(dwStartPos);
    422   m_SortedOffset.insert(pos);
    423 
    424   uint32_t start_objnum = 0;
    425   uint32_t count = dwObjCount;
    426   FX_FILESIZE SavedPos = m_pSyntax->SavePos();
    427 
    428   const int32_t recordsize = 20;
    429   std::vector<char> buf(1024 * recordsize + 1);
    430   buf[1024 * recordsize] = '\0';
    431 
    432   int32_t nBlocks = count / 1024 + 1;
    433   for (int32_t block = 0; block < nBlocks; block++) {
    434     int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
    435     uint32_t dwReadSize = block_size * recordsize;
    436     if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen)
    437       return false;
    438 
    439     if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
    440                               dwReadSize)) {
    441       return false;
    442     }
    443 
    444     for (int32_t i = 0; i < block_size; i++) {
    445       uint32_t objnum = start_objnum + block * 1024 + i;
    446       char* pEntry = &buf[i * recordsize];
    447       if (pEntry[17] == 'f') {
    448         m_ObjectInfo[objnum].pos = 0;
    449         m_ObjectInfo[objnum].type = 0;
    450       } else {
    451         int32_t offset = FXSYS_atoi(pEntry);
    452         if (offset == 0) {
    453           for (int32_t c = 0; c < 10; c++) {
    454             if (!std::isdigit(pEntry[c]))
    455               return false;
    456           }
    457         }
    458 
    459         m_ObjectInfo[objnum].pos = offset;
    460         int32_t version = FXSYS_atoi(pEntry + 11);
    461         if (version >= 1)
    462           m_bVersionUpdated = true;
    463 
    464         m_ObjectInfo[objnum].gennum = version;
    465         if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
    466           m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
    467 
    468         m_ObjectInfo[objnum].type = 1;
    469       }
    470     }
    471   }
    472   m_pSyntax->RestorePos(SavedPos + count * recordsize);
    473   return true;
    474 }
    475 
    476 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos,
    477                                  FX_FILESIZE streampos,
    478                                  bool bSkip) {
    479   m_pSyntax->RestorePos(pos);
    480   if (m_pSyntax->GetKeyword() != "xref")
    481     return false;
    482 
    483   m_SortedOffset.insert(pos);
    484   if (streampos)
    485     m_SortedOffset.insert(streampos);
    486 
    487   while (1) {
    488     FX_FILESIZE SavedPos = m_pSyntax->SavePos();
    489     bool bIsNumber;
    490     CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
    491     if (word.IsEmpty())
    492       return false;
    493 
    494     if (!bIsNumber) {
    495       m_pSyntax->RestorePos(SavedPos);
    496       break;
    497     }
    498 
    499     uint32_t start_objnum = FXSYS_atoui(word.c_str());
    500     if (start_objnum >= kMaxObjectNumber)
    501       return false;
    502 
    503     uint32_t count = m_pSyntax->GetDirectNum();
    504     m_pSyntax->ToNextWord();
    505     SavedPos = m_pSyntax->SavePos();
    506     const int32_t recordsize = 20;
    507 
    508     m_dwXrefStartObjNum = start_objnum;
    509     if (!bSkip) {
    510       std::vector<char> buf(1024 * recordsize + 1);
    511       buf[1024 * recordsize] = '\0';
    512 
    513       int32_t nBlocks = count / 1024 + 1;
    514       for (int32_t block = 0; block < nBlocks; block++) {
    515         int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024;
    516         m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()),
    517                              block_size * recordsize);
    518 
    519         for (int32_t i = 0; i < block_size; i++) {
    520           uint32_t objnum = start_objnum + block * 1024 + i;
    521           char* pEntry = &buf[i * recordsize];
    522           if (pEntry[17] == 'f') {
    523             m_ObjectInfo[objnum].pos = 0;
    524             m_ObjectInfo[objnum].type = 0;
    525           } else {
    526             FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry);
    527             if (offset == 0) {
    528               for (int32_t c = 0; c < 10; c++) {
    529                 if (!std::isdigit(pEntry[c]))
    530                   return false;
    531               }
    532             }
    533 
    534             m_ObjectInfo[objnum].pos = offset;
    535             int32_t version = FXSYS_atoi(pEntry + 11);
    536             if (version >= 1)
    537               m_bVersionUpdated = true;
    538 
    539             m_ObjectInfo[objnum].gennum = version;
    540             if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen)
    541               m_SortedOffset.insert(m_ObjectInfo[objnum].pos);
    542 
    543             m_ObjectInfo[objnum].type = 1;
    544           }
    545         }
    546       }
    547     }
    548     m_pSyntax->RestorePos(SavedPos + count * recordsize);
    549   }
    550   return !streampos || LoadCrossRefV5(&streampos, false);
    551 }
    552 
    553 bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) {
    554   if (!LoadCrossRefV5(&xrefpos, true))
    555     return false;
    556 
    557   std::set<FX_FILESIZE> seen_xrefpos;
    558   while (xrefpos) {
    559     seen_xrefpos.insert(xrefpos);
    560     if (!LoadCrossRefV5(&xrefpos, false))
    561       return false;
    562 
    563     // Check for circular references.
    564     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
    565       return false;
    566   }
    567   m_ObjectStreamMap.clear();
    568   m_bXRefStream = true;
    569   return true;
    570 }
    571 
    572 bool CPDF_Parser::RebuildCrossRef() {
    573   m_ObjectInfo.clear();
    574   m_SortedOffset.clear();
    575   m_pTrailer.reset();
    576 
    577   ParserState state = ParserState::kDefault;
    578   int32_t inside_index = 0;
    579   uint32_t objnum = 0;
    580   uint32_t gennum = 0;
    581   int32_t depth = 0;
    582   const uint32_t kBufferSize = 4096;
    583   std::vector<uint8_t> buffer(kBufferSize);
    584 
    585   FX_FILESIZE pos = m_pSyntax->m_HeaderOffset;
    586   FX_FILESIZE start_pos = 0;
    587   FX_FILESIZE start_pos1 = 0;
    588   FX_FILESIZE last_obj = -1;
    589   FX_FILESIZE last_xref = -1;
    590   FX_FILESIZE last_trailer = -1;
    591 
    592   while (pos < m_pSyntax->m_FileLen) {
    593     const FX_FILESIZE saved_pos = pos;
    594     bool bOverFlow = false;
    595     uint32_t size =
    596         std::min((uint32_t)(m_pSyntax->m_FileLen - pos), kBufferSize);
    597     if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size))
    598       break;
    599 
    600     for (uint32_t i = 0; i < size; i++) {
    601       uint8_t byte = buffer[i];
    602       switch (state) {
    603         case ParserState::kDefault:
    604           if (PDFCharIsWhitespace(byte)) {
    605             state = ParserState::kWhitespace;
    606           } else if (std::isdigit(byte)) {
    607             --i;
    608             state = ParserState::kWhitespace;
    609           } else if (byte == '%') {
    610             inside_index = 0;
    611             state = ParserState::kComment;
    612           } else if (byte == '(') {
    613             state = ParserState::kString;
    614             depth = 1;
    615           } else if (byte == '<') {
    616             inside_index = 1;
    617             state = ParserState::kHexString;
    618           } else if (byte == '\\') {
    619             state = ParserState::kEscapedString;
    620           } else if (byte == 't') {
    621             state = ParserState::kTrailer;
    622             inside_index = 1;
    623           }
    624           break;
    625 
    626         case ParserState::kWhitespace:
    627           if (std::isdigit(byte)) {
    628             start_pos = pos + i;
    629             state = ParserState::kObjNum;
    630             objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
    631           } else if (byte == 't') {
    632             state = ParserState::kTrailer;
    633             inside_index = 1;
    634           } else if (byte == 'x') {
    635             state = ParserState::kXref;
    636             inside_index = 1;
    637           } else if (!PDFCharIsWhitespace(byte)) {
    638             --i;
    639             state = ParserState::kDefault;
    640           }
    641           break;
    642 
    643         case ParserState::kObjNum:
    644           if (std::isdigit(byte)) {
    645             objnum =
    646                 objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
    647           } else if (PDFCharIsWhitespace(byte)) {
    648             state = ParserState::kPostObjNum;
    649           } else {
    650             --i;
    651             state = ParserState::kEndObj;
    652             inside_index = 0;
    653           }
    654           break;
    655 
    656         case ParserState::kPostObjNum:
    657           if (std::isdigit(byte)) {
    658             start_pos1 = pos + i;
    659             state = ParserState::kGenNum;
    660             gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
    661           } else if (byte == 't') {
    662             state = ParserState::kTrailer;
    663             inside_index = 1;
    664           } else if (!PDFCharIsWhitespace(byte)) {
    665             --i;
    666             state = ParserState::kDefault;
    667           }
    668           break;
    669 
    670         case ParserState::kGenNum:
    671           if (std::isdigit(byte)) {
    672             gennum =
    673                 gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
    674           } else if (PDFCharIsWhitespace(byte)) {
    675             state = ParserState::kPostGenNum;
    676           } else {
    677             --i;
    678             state = ParserState::kDefault;
    679           }
    680           break;
    681 
    682         case ParserState::kPostGenNum:
    683           if (byte == 'o') {
    684             state = ParserState::kBeginObj;
    685             inside_index = 1;
    686           } else if (std::isdigit(byte)) {
    687             objnum = gennum;
    688             gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte));
    689             start_pos = start_pos1;
    690             start_pos1 = pos + i;
    691             state = ParserState::kGenNum;
    692           } else if (byte == 't') {
    693             state = ParserState::kTrailer;
    694             inside_index = 1;
    695           } else if (!PDFCharIsWhitespace(byte)) {
    696             --i;
    697             state = ParserState::kDefault;
    698           }
    699           break;
    700 
    701         case ParserState::kBeginObj:
    702           switch (inside_index) {
    703             case 1:
    704               if (byte != 'b') {
    705                 --i;
    706                 state = ParserState::kDefault;
    707               } else {
    708                 inside_index++;
    709               }
    710               break;
    711             case 2:
    712               if (byte != 'j') {
    713                 --i;
    714                 state = ParserState::kDefault;
    715               } else {
    716                 inside_index++;
    717               }
    718               break;
    719             case 3:
    720               if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
    721                 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset;
    722                 m_SortedOffset.insert(obj_pos);
    723                 last_obj = start_pos;
    724                 FX_FILESIZE obj_end = 0;
    725                 std::unique_ptr<CPDF_Object> pObject =
    726                     ParseIndirectObjectAtByStrict(m_pDocument, obj_pos, objnum,
    727                                                   &obj_end);
    728                 if (CPDF_Stream* pStream = ToStream(pObject.get())) {
    729                   if (CPDF_Dictionary* pDict = pStream->GetDict()) {
    730                     if ((pDict->KeyExist("Type")) &&
    731                         (pDict->GetStringFor("Type") == "XRef" &&
    732                          pDict->KeyExist("Size"))) {
    733                       CPDF_Object* pRoot = pDict->GetObjectFor("Root");
    734                       if (pRoot && pRoot->GetDict() &&
    735                           pRoot->GetDict()->GetObjectFor("Pages")) {
    736                         m_pTrailer = ToDictionary(pDict->Clone());
    737                       }
    738                     }
    739                   }
    740                 }
    741 
    742                 FX_FILESIZE offset = 0;
    743                 m_pSyntax->RestorePos(obj_pos);
    744                 offset = m_pSyntax->FindTag("obj", 0);
    745                 if (offset == -1)
    746                   offset = 0;
    747                 else
    748                   offset += 3;
    749 
    750                 FX_FILESIZE nLen = obj_end - obj_pos - offset;
    751                 if ((uint32_t)nLen > size - i) {
    752                   pos = obj_end + m_pSyntax->m_HeaderOffset;
    753                   bOverFlow = true;
    754                 } else {
    755                   i += (uint32_t)nLen;
    756                 }
    757 
    758                 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) &&
    759                     m_ObjectInfo[objnum].pos) {
    760                   if (pObject) {
    761                     uint32_t oldgen = GetObjectGenNum(objnum);
    762                     m_ObjectInfo[objnum].pos = obj_pos;
    763                     m_ObjectInfo[objnum].gennum = gennum;
    764                     if (oldgen != gennum)
    765                       m_bVersionUpdated = true;
    766                   }
    767                 } else {
    768                   m_ObjectInfo[objnum].pos = obj_pos;
    769                   m_ObjectInfo[objnum].type = 1;
    770                   m_ObjectInfo[objnum].gennum = gennum;
    771                 }
    772               }
    773               --i;
    774               state = ParserState::kDefault;
    775               break;
    776           }
    777           break;
    778 
    779         case ParserState::kTrailer:
    780           if (inside_index == 7) {
    781             if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) {
    782               last_trailer = pos + i - 7;
    783               m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset);
    784 
    785               std::unique_ptr<CPDF_Object> pObj =
    786                   m_pSyntax->GetObject(m_pDocument, 0, 0, true);
    787               if (pObj) {
    788                 if (pObj->IsDictionary() || pObj->AsStream()) {
    789                   CPDF_Stream* pStream = pObj->AsStream();
    790                   if (CPDF_Dictionary* pTrailer =
    791                           pStream ? pStream->GetDict() : pObj->AsDictionary()) {
    792                     if (m_pTrailer) {
    793                       CPDF_Object* pRoot = pTrailer->GetObjectFor("Root");
    794                       CPDF_Reference* pRef = ToReference(pRoot);
    795                       if (!pRoot ||
    796                           (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) &&
    797                            m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) {
    798                         auto it = pTrailer->begin();
    799                         while (it != pTrailer->end()) {
    800                           const CFX_ByteString& key = it->first;
    801                           CPDF_Object* pElement = it->second.get();
    802                           ++it;
    803                           uint32_t dwObjNum =
    804                               pElement ? pElement->GetObjNum() : 0;
    805                           if (dwObjNum) {
    806                             m_pTrailer->SetNewFor<CPDF_Reference>(
    807                                 key, m_pDocument, dwObjNum);
    808                           } else {
    809                             m_pTrailer->SetFor(key, pElement->Clone());
    810                           }
    811                         }
    812                       }
    813                     } else {
    814                       if (pObj->IsStream()) {
    815                         m_pTrailer = ToDictionary(pTrailer->Clone());
    816                       } else {
    817                         m_pTrailer = ToDictionary(std::move(pObj));
    818                       }
    819 
    820                       FX_FILESIZE dwSavePos = m_pSyntax->SavePos();
    821                       CFX_ByteString strWord = m_pSyntax->GetKeyword();
    822                       if (!strWord.Compare("startxref")) {
    823                         bool bNumber;
    824                         CFX_ByteString bsOffset =
    825                             m_pSyntax->GetNextWord(&bNumber);
    826                         if (bNumber)
    827                           m_LastXRefOffset = FXSYS_atoi(bsOffset.c_str());
    828                       }
    829                       m_pSyntax->RestorePos(dwSavePos);
    830                     }
    831                   }
    832                 }
    833               }
    834             }
    835             --i;
    836             state = ParserState::kDefault;
    837           } else if (byte == "trailer"[inside_index]) {
    838             inside_index++;
    839           } else {
    840             --i;
    841             state = ParserState::kDefault;
    842           }
    843           break;
    844 
    845         case ParserState::kXref:
    846           if (inside_index == 4) {
    847             last_xref = pos + i - 4;
    848             state = ParserState::kWhitespace;
    849           } else if (byte == "xref"[inside_index]) {
    850             inside_index++;
    851           } else {
    852             --i;
    853             state = ParserState::kDefault;
    854           }
    855           break;
    856 
    857         case ParserState::kComment:
    858           if (PDFCharIsLineEnding(byte))
    859             state = ParserState::kDefault;
    860           break;
    861 
    862         case ParserState::kString:
    863           if (byte == ')') {
    864             if (depth > 0)
    865               depth--;
    866           } else if (byte == '(') {
    867             depth++;
    868           }
    869 
    870           if (!depth)
    871             state = ParserState::kDefault;
    872           break;
    873 
    874         case ParserState::kHexString:
    875           if (byte == '>' || (byte == '<' && inside_index == 1))
    876             state = ParserState::kDefault;
    877           inside_index = 0;
    878           break;
    879 
    880         case ParserState::kEscapedString:
    881           if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) {
    882             --i;
    883             state = ParserState::kDefault;
    884           }
    885           break;
    886 
    887         case ParserState::kEndObj:
    888           if (PDFCharIsWhitespace(byte)) {
    889             state = ParserState::kDefault;
    890           } else if (byte == '%' || byte == '(' || byte == '<' ||
    891                      byte == '\\') {
    892             state = ParserState::kDefault;
    893             --i;
    894           } else if (inside_index == 6) {
    895             state = ParserState::kDefault;
    896             --i;
    897           } else if (byte == "endobj"[inside_index]) {
    898             inside_index++;
    899           }
    900           break;
    901       }
    902 
    903       if (bOverFlow) {
    904         size = 0;
    905         break;
    906       }
    907     }
    908     pos += size;
    909 
    910     // If the position has not changed at all or went backwards in a loop
    911     // iteration, then break out to prevent infinite looping.
    912     if (pos <= saved_pos)
    913       break;
    914   }
    915 
    916   if (last_xref != -1 && last_xref > last_obj)
    917     last_trailer = last_xref;
    918   else if (last_trailer == -1 || last_xref < last_obj)
    919     last_trailer = m_pSyntax->m_FileLen;
    920 
    921   m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset);
    922   return m_pTrailer && !m_ObjectInfo.empty();
    923 }
    924 
    925 bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) {
    926   std::unique_ptr<CPDF_Object> pObject(
    927       ParseIndirectObjectAt(m_pDocument, *pos, 0));
    928   if (!pObject)
    929     return false;
    930 
    931   uint32_t objnum = pObject->m_ObjNum;
    932   if (!objnum)
    933     return false;
    934 
    935   CPDF_Object* pUnownedObject = pObject.get();
    936   if (m_pDocument) {
    937     CPDF_Dictionary* pRootDict = m_pDocument->GetRoot();
    938     if (pRootDict && pRootDict->GetObjNum() == objnum)
    939       return false;
    940     if (!m_pDocument->ReplaceIndirectObjectIfHigherGeneration(
    941             objnum, std::move(pObject))) {
    942       return false;
    943     }
    944   }
    945 
    946   CPDF_Stream* pStream = pUnownedObject->AsStream();
    947   if (!pStream)
    948     return false;
    949 
    950   CPDF_Dictionary* pDict = pStream->GetDict();
    951   *pos = pDict->GetIntegerFor("Prev");
    952   int32_t size = pDict->GetIntegerFor("Size");
    953   if (size < 0)
    954     return false;
    955 
    956   std::unique_ptr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone());
    957   if (bMainXRef) {
    958     m_pTrailer = std::move(pNewTrailer);
    959     ShrinkObjectMap(size);
    960     for (auto& it : m_ObjectInfo)
    961       it.second.type = 0;
    962   } else {
    963     m_Trailers.push_back(std::move(pNewTrailer));
    964   }
    965 
    966   std::vector<std::pair<int32_t, int32_t>> arrIndex;
    967   CPDF_Array* pArray = pDict->GetArrayFor("Index");
    968   if (pArray) {
    969     for (size_t i = 0; i < pArray->GetCount() / 2; i++) {
    970       CPDF_Object* pStartNumObj = pArray->GetObjectAt(i * 2);
    971       CPDF_Object* pCountObj = pArray->GetObjectAt(i * 2 + 1);
    972 
    973       if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) {
    974         int nStartNum = pStartNumObj->GetInteger();
    975         int nCount = pCountObj->GetInteger();
    976         if (nStartNum >= 0 && nCount > 0)
    977           arrIndex.push_back(std::make_pair(nStartNum, nCount));
    978       }
    979     }
    980   }
    981 
    982   if (arrIndex.size() == 0)
    983     arrIndex.push_back(std::make_pair(0, size));
    984 
    985   pArray = pDict->GetArrayFor("W");
    986   if (!pArray)
    987     return false;
    988 
    989   std::vector<uint32_t> WidthArray;
    990   FX_SAFE_UINT32 dwAccWidth = 0;
    991   for (size_t i = 0; i < pArray->GetCount(); ++i) {
    992     WidthArray.push_back(pArray->GetIntegerAt(i));
    993     dwAccWidth += WidthArray[i];
    994   }
    995 
    996   if (!dwAccWidth.IsValid() || WidthArray.size() < 3)
    997     return false;
    998 
    999   uint32_t totalWidth = dwAccWidth.ValueOrDie();
   1000   CPDF_StreamAcc acc;
   1001   acc.LoadAllData(pStream);
   1002 
   1003   const uint8_t* pData = acc.GetData();
   1004   uint32_t dwTotalSize = acc.GetSize();
   1005   uint32_t segindex = 0;
   1006   for (uint32_t i = 0; i < arrIndex.size(); i++) {
   1007     int32_t startnum = arrIndex[i].first;
   1008     if (startnum < 0)
   1009       continue;
   1010 
   1011     m_dwXrefStartObjNum = pdfium::base::checked_cast<uint32_t>(startnum);
   1012     uint32_t count = pdfium::base::checked_cast<uint32_t>(arrIndex[i].second);
   1013     FX_SAFE_UINT32 dwCaculatedSize = segindex;
   1014     dwCaculatedSize += count;
   1015     dwCaculatedSize *= totalWidth;
   1016     if (!dwCaculatedSize.IsValid() ||
   1017         dwCaculatedSize.ValueOrDie() > dwTotalSize) {
   1018       continue;
   1019     }
   1020 
   1021     const uint8_t* segstart = pData + segindex * totalWidth;
   1022     FX_SAFE_UINT32 dwMaxObjNum = startnum;
   1023     dwMaxObjNum += count;
   1024     uint32_t dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1;
   1025     if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size)
   1026       continue;
   1027 
   1028     for (uint32_t j = 0; j < count; j++) {
   1029       int32_t type = 1;
   1030       const uint8_t* entrystart = segstart + j * totalWidth;
   1031       if (WidthArray[0])
   1032         type = GetVarInt(entrystart, WidthArray[0]);
   1033 
   1034       if (GetObjectType(startnum + j) == 255) {
   1035         FX_FILESIZE offset =
   1036             GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
   1037         m_ObjectInfo[startnum + j].pos = offset;
   1038         m_SortedOffset.insert(offset);
   1039         continue;
   1040       }
   1041 
   1042       if (GetObjectType(startnum + j))
   1043         continue;
   1044 
   1045       m_ObjectInfo[startnum + j].type = type;
   1046       if (type == 0) {
   1047         m_ObjectInfo[startnum + j].pos = 0;
   1048       } else {
   1049         FX_FILESIZE offset =
   1050             GetVarInt(entrystart + WidthArray[0], WidthArray[1]);
   1051         m_ObjectInfo[startnum + j].pos = offset;
   1052         if (type == 1) {
   1053           m_SortedOffset.insert(offset);
   1054         } else {
   1055           if (offset < 0 || !IsValidObjectNumber(offset))
   1056             return false;
   1057           m_ObjectInfo[offset].type = 255;
   1058         }
   1059       }
   1060     }
   1061     segindex += count;
   1062   }
   1063   return true;
   1064 }
   1065 
   1066 CPDF_Array* CPDF_Parser::GetIDArray() {
   1067   if (!m_pTrailer)
   1068     return nullptr;
   1069 
   1070   CPDF_Object* pID = m_pTrailer->GetObjectFor("ID");
   1071   if (!pID)
   1072     return nullptr;
   1073 
   1074   CPDF_Reference* pRef = pID->AsReference();
   1075   if (!pRef)
   1076     return ToArray(pID);
   1077 
   1078   std::unique_ptr<CPDF_Object> pNewObj =
   1079       ParseIndirectObject(nullptr, pRef->GetRefObjNum());
   1080   pID = pNewObj.get();
   1081   m_pTrailer->SetFor("ID", std::move(pNewObj));
   1082   return ToArray(pID);
   1083 }
   1084 
   1085 uint32_t CPDF_Parser::GetRootObjNum() {
   1086   CPDF_Reference* pRef =
   1087       ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Root") : nullptr);
   1088   return pRef ? pRef->GetRefObjNum() : 0;
   1089 }
   1090 
   1091 uint32_t CPDF_Parser::GetInfoObjNum() {
   1092   CPDF_Reference* pRef =
   1093       ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Info") : nullptr);
   1094   return pRef ? pRef->GetRefObjNum() : 0;
   1095 }
   1096 
   1097 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObject(
   1098     CPDF_IndirectObjectHolder* pObjList,
   1099     uint32_t objnum) {
   1100   if (!IsValidObjectNumber(objnum))
   1101     return nullptr;
   1102 
   1103   // Prevent circular parsing the same object.
   1104   if (pdfium::ContainsKey(m_ParsingObjNums, objnum))
   1105     return nullptr;
   1106 
   1107   pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum);
   1108   if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) {
   1109     FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
   1110     if (pos <= 0)
   1111       return nullptr;
   1112     return ParseIndirectObjectAt(pObjList, pos, objnum);
   1113   }
   1114   if (GetObjectType(objnum) != 2)
   1115     return nullptr;
   1116 
   1117   CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
   1118   if (!pObjStream)
   1119     return nullptr;
   1120 
   1121   CFX_RetainPtr<IFX_MemoryStream> file = IFX_MemoryStream::Create(
   1122       (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), false);
   1123   CPDF_SyntaxParser syntax;
   1124   syntax.InitParser(file, 0);
   1125   const int32_t offset = GetStreamFirst(pObjStream);
   1126 
   1127   // Read object numbers from |pObjStream| into a cache.
   1128   if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) {
   1129     for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) {
   1130       uint32_t thisnum = syntax.GetDirectNum();
   1131       uint32_t thisoff = syntax.GetDirectNum();
   1132       m_ObjCache[pObjStream][thisnum] = thisoff;
   1133     }
   1134   }
   1135 
   1136   const auto it = m_ObjCache[pObjStream].find(objnum);
   1137   if (it == m_ObjCache[pObjStream].end())
   1138     return nullptr;
   1139 
   1140   syntax.RestorePos(offset + it->second);
   1141   return syntax.GetObject(pObjList, 0, 0, true);
   1142 }
   1143 
   1144 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(uint32_t objnum) {
   1145   auto it = m_ObjectStreamMap.find(objnum);
   1146   if (it != m_ObjectStreamMap.end())
   1147     return it->second.get();
   1148 
   1149   if (!m_pDocument)
   1150     return nullptr;
   1151 
   1152   const CPDF_Stream* pStream =
   1153       ToStream(m_pDocument->GetOrParseIndirectObject(objnum));
   1154   if (!pStream)
   1155     return nullptr;
   1156 
   1157   CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc;
   1158   pStreamAcc->LoadAllData(pStream);
   1159   m_ObjectStreamMap[objnum].reset(pStreamAcc);
   1160   return pStreamAcc;
   1161 }
   1162 
   1163 FX_FILESIZE CPDF_Parser::GetObjectSize(uint32_t objnum) const {
   1164   if (!IsValidObjectNumber(objnum))
   1165     return 0;
   1166 
   1167   if (GetObjectType(objnum) == 2)
   1168     objnum = GetObjectPositionOrZero(objnum);
   1169 
   1170   if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255)
   1171     return 0;
   1172 
   1173   FX_FILESIZE offset = GetObjectPositionOrZero(objnum);
   1174   if (offset == 0)
   1175     return 0;
   1176 
   1177   auto it = m_SortedOffset.find(offset);
   1178   if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end())
   1179     return 0;
   1180 
   1181   return *it - offset;
   1182 }
   1183 
   1184 void CPDF_Parser::GetIndirectBinary(uint32_t objnum,
   1185                                     uint8_t*& pBuffer,
   1186                                     uint32_t& size) {
   1187   pBuffer = nullptr;
   1188   size = 0;
   1189   if (!IsValidObjectNumber(objnum))
   1190     return;
   1191 
   1192   if (GetObjectType(objnum) == 2) {
   1193     CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos);
   1194     if (!pObjStream)
   1195       return;
   1196 
   1197     int32_t offset = GetStreamFirst(pObjStream);
   1198     const uint8_t* pData = pObjStream->GetData();
   1199     uint32_t totalsize = pObjStream->GetSize();
   1200     CFX_RetainPtr<IFX_MemoryStream> file =
   1201         IFX_MemoryStream::Create((uint8_t*)pData, (size_t)totalsize, false);
   1202     CPDF_SyntaxParser syntax;
   1203     syntax.InitParser(file, 0);
   1204 
   1205     for (int i = GetStreamNCount(pObjStream); i > 0; --i) {
   1206       uint32_t thisnum = syntax.GetDirectNum();
   1207       uint32_t thisoff = syntax.GetDirectNum();
   1208       if (thisnum != objnum)
   1209         continue;
   1210 
   1211       if (i == 1) {
   1212         size = totalsize - (thisoff + offset);
   1213       } else {
   1214         syntax.GetDirectNum();  // Skip nextnum.
   1215         uint32_t nextoff = syntax.GetDirectNum();
   1216         size = nextoff - thisoff;
   1217       }
   1218 
   1219       pBuffer = FX_Alloc(uint8_t, size);
   1220       FXSYS_memcpy(pBuffer, pData + thisoff + offset, size);
   1221       return;
   1222     }
   1223     return;
   1224   }
   1225 
   1226   if (GetObjectType(objnum) != 1)
   1227     return;
   1228 
   1229   FX_FILESIZE pos = m_ObjectInfo[objnum].pos;
   1230   if (pos == 0)
   1231     return;
   1232 
   1233   FX_FILESIZE SavedPos = m_pSyntax->SavePos();
   1234   m_pSyntax->RestorePos(pos);
   1235 
   1236   bool bIsNumber;
   1237   CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
   1238   if (!bIsNumber) {
   1239     m_pSyntax->RestorePos(SavedPos);
   1240     return;
   1241   }
   1242 
   1243   uint32_t parser_objnum = FXSYS_atoui(word.c_str());
   1244   if (parser_objnum && parser_objnum != objnum) {
   1245     m_pSyntax->RestorePos(SavedPos);
   1246     return;
   1247   }
   1248 
   1249   word = m_pSyntax->GetNextWord(&bIsNumber);
   1250   if (!bIsNumber) {
   1251     m_pSyntax->RestorePos(SavedPos);
   1252     return;
   1253   }
   1254 
   1255   if (m_pSyntax->GetKeyword() != "obj") {
   1256     m_pSyntax->RestorePos(SavedPos);
   1257     return;
   1258   }
   1259 
   1260   auto it = m_SortedOffset.find(pos);
   1261   if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) {
   1262     m_pSyntax->RestorePos(SavedPos);
   1263     return;
   1264   }
   1265 
   1266   FX_FILESIZE nextoff = *it;
   1267   bool bNextOffValid = false;
   1268   if (nextoff != pos) {
   1269     m_pSyntax->RestorePos(nextoff);
   1270     word = m_pSyntax->GetNextWord(&bIsNumber);
   1271     if (word == "xref") {
   1272       bNextOffValid = true;
   1273     } else if (bIsNumber) {
   1274       word = m_pSyntax->GetNextWord(&bIsNumber);
   1275       if (bIsNumber && m_pSyntax->GetKeyword() == "obj") {
   1276         bNextOffValid = true;
   1277       }
   1278     }
   1279   }
   1280 
   1281   if (!bNextOffValid) {
   1282     m_pSyntax->RestorePos(pos);
   1283     while (1) {
   1284       if (m_pSyntax->GetKeyword() == "endobj")
   1285         break;
   1286 
   1287       if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen)
   1288         break;
   1289     }
   1290     nextoff = m_pSyntax->SavePos();
   1291   }
   1292 
   1293   size = (uint32_t)(nextoff - pos);
   1294   pBuffer = FX_Alloc(uint8_t, size);
   1295   m_pSyntax->RestorePos(pos);
   1296   m_pSyntax->ReadBlock(pBuffer, size);
   1297   m_pSyntax->RestorePos(SavedPos);
   1298 }
   1299 
   1300 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt(
   1301     CPDF_IndirectObjectHolder* pObjList,
   1302     FX_FILESIZE pos,
   1303     uint32_t objnum) {
   1304   FX_FILESIZE SavedPos = m_pSyntax->SavePos();
   1305   m_pSyntax->RestorePos(pos);
   1306   bool bIsNumber;
   1307   CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
   1308   if (!bIsNumber) {
   1309     m_pSyntax->RestorePos(SavedPos);
   1310     return nullptr;
   1311   }
   1312 
   1313   FX_FILESIZE objOffset = m_pSyntax->SavePos();
   1314   objOffset -= word.GetLength();
   1315   uint32_t parser_objnum = FXSYS_atoui(word.c_str());
   1316   if (objnum && parser_objnum != objnum) {
   1317     m_pSyntax->RestorePos(SavedPos);
   1318     return nullptr;
   1319   }
   1320 
   1321   word = m_pSyntax->GetNextWord(&bIsNumber);
   1322   if (!bIsNumber) {
   1323     m_pSyntax->RestorePos(SavedPos);
   1324     return nullptr;
   1325   }
   1326 
   1327   uint32_t parser_gennum = FXSYS_atoui(word.c_str());
   1328   if (m_pSyntax->GetKeyword() != "obj") {
   1329     m_pSyntax->RestorePos(SavedPos);
   1330     return nullptr;
   1331   }
   1332 
   1333   std::unique_ptr<CPDF_Object> pObj =
   1334       m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true);
   1335   m_pSyntax->SavePos();
   1336 
   1337   CFX_ByteString bsWord = m_pSyntax->GetKeyword();
   1338   if (bsWord == "endobj")
   1339     m_pSyntax->SavePos();
   1340 
   1341   m_pSyntax->RestorePos(SavedPos);
   1342   if (pObj) {
   1343     if (!objnum)
   1344       pObj->m_ObjNum = parser_objnum;
   1345     pObj->m_GenNum = parser_gennum;
   1346   }
   1347   return pObj;
   1348 }
   1349 
   1350 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAtByStrict(
   1351     CPDF_IndirectObjectHolder* pObjList,
   1352     FX_FILESIZE pos,
   1353     uint32_t objnum,
   1354     FX_FILESIZE* pResultPos) {
   1355   FX_FILESIZE SavedPos = m_pSyntax->SavePos();
   1356   m_pSyntax->RestorePos(pos);
   1357 
   1358   bool bIsNumber;
   1359   CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
   1360   if (!bIsNumber) {
   1361     m_pSyntax->RestorePos(SavedPos);
   1362     return nullptr;
   1363   }
   1364 
   1365   uint32_t parser_objnum = FXSYS_atoui(word.c_str());
   1366   if (objnum && parser_objnum != objnum) {
   1367     m_pSyntax->RestorePos(SavedPos);
   1368     return nullptr;
   1369   }
   1370 
   1371   word = m_pSyntax->GetNextWord(&bIsNumber);
   1372   if (!bIsNumber) {
   1373     m_pSyntax->RestorePos(SavedPos);
   1374     return nullptr;
   1375   }
   1376 
   1377   uint32_t gennum = FXSYS_atoui(word.c_str());
   1378   if (m_pSyntax->GetKeyword() != "obj") {
   1379     m_pSyntax->RestorePos(SavedPos);
   1380     return nullptr;
   1381   }
   1382 
   1383   std::unique_ptr<CPDF_Object> pObj =
   1384       m_pSyntax->GetObjectForStrict(pObjList, objnum, gennum);
   1385 
   1386   if (pResultPos)
   1387     *pResultPos = m_pSyntax->m_Pos;
   1388 
   1389   m_pSyntax->RestorePos(SavedPos);
   1390   return pObj;
   1391 }
   1392 
   1393 uint32_t CPDF_Parser::GetFirstPageNo() const {
   1394   return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0;
   1395 }
   1396 
   1397 std::unique_ptr<CPDF_Dictionary> CPDF_Parser::LoadTrailerV4() {
   1398   if (m_pSyntax->GetKeyword() != "trailer")
   1399     return nullptr;
   1400 
   1401   return ToDictionary(m_pSyntax->GetObject(m_pDocument, 0, 0, true));
   1402 }
   1403 
   1404 uint32_t CPDF_Parser::GetPermissions() const {
   1405   if (!m_pSecurityHandler)
   1406     return 0xFFFFFFFF;
   1407 
   1408   uint32_t dwPermission = m_pSecurityHandler->GetPermissions();
   1409   if (m_pEncryptDict && m_pEncryptDict->GetStringFor("Filter") == "Standard") {
   1410     // See PDF Reference 1.7, page 123, table 3.20.
   1411     dwPermission &= 0xFFFFFFFC;
   1412     dwPermission |= 0xFFFFF0C0;
   1413   }
   1414   return dwPermission;
   1415 }
   1416 
   1417 bool CPDF_Parser::IsLinearizedFile(
   1418     const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess,
   1419     uint32_t offset) {
   1420   m_pSyntax->InitParser(pFileAccess, offset);
   1421   m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9);
   1422 
   1423   FX_FILESIZE SavedPos = m_pSyntax->SavePos();
   1424   bool bIsNumber;
   1425   CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber);
   1426   if (!bIsNumber)
   1427     return false;
   1428 
   1429   uint32_t objnum = FXSYS_atoui(word.c_str());
   1430   word = m_pSyntax->GetNextWord(&bIsNumber);
   1431   if (!bIsNumber)
   1432     return false;
   1433 
   1434   uint32_t gennum = FXSYS_atoui(word.c_str());
   1435   if (m_pSyntax->GetKeyword() != "obj") {
   1436     m_pSyntax->RestorePos(SavedPos);
   1437     return false;
   1438   }
   1439 
   1440   m_pLinearized = CPDF_LinearizedHeader::CreateForObject(
   1441       m_pSyntax->GetObject(nullptr, objnum, gennum, true));
   1442   if (!m_pLinearized)
   1443     return false;
   1444 
   1445   m_LastXRefOffset = m_pLinearized->GetLastXRefOffset();
   1446   // Move parser onto first page xref table start.
   1447   m_pSyntax->GetNextWord(nullptr);
   1448   return true;
   1449 }
   1450 
   1451 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse(
   1452     const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess,
   1453     CPDF_Document* pDocument) {
   1454   ASSERT(!m_bHasParsed);
   1455   m_bXRefStream = false;
   1456   m_LastXRefOffset = 0;
   1457 
   1458   int32_t offset = GetHeaderOffset(pFileAccess);
   1459   if (offset == -1)
   1460     return FORMAT_ERROR;
   1461 
   1462   if (!IsLinearizedFile(pFileAccess, offset)) {
   1463     m_pSyntax->m_pFileAccess = nullptr;
   1464     return StartParse(pFileAccess, std::move(pDocument));
   1465   }
   1466   m_bHasParsed = true;
   1467   m_pDocument = pDocument;
   1468 
   1469   FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos();
   1470   bool bXRefRebuilt = false;
   1471   bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, false);
   1472   if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) {
   1473     if (!RebuildCrossRef())
   1474       return FORMAT_ERROR;
   1475 
   1476     bXRefRebuilt = true;
   1477     m_LastXRefOffset = 0;
   1478   }
   1479 
   1480   if (bLoadV4) {
   1481     m_pTrailer = LoadTrailerV4();
   1482     if (!m_pTrailer)
   1483       return SUCCESS;
   1484 
   1485     int32_t xrefsize = GetDirectInteger(m_pTrailer.get(), "Size");
   1486     if (xrefsize > 0)
   1487       ShrinkObjectMap(xrefsize);
   1488   }
   1489 
   1490   Error eRet = SetEncryptHandler();
   1491   if (eRet != SUCCESS)
   1492     return eRet;
   1493 
   1494   m_pDocument->LoadLinearizedDoc(m_pLinearized.get());
   1495   if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) {
   1496     if (bXRefRebuilt)
   1497       return FORMAT_ERROR;
   1498 
   1499     ReleaseEncryptHandler();
   1500     if (!RebuildCrossRef())
   1501       return FORMAT_ERROR;
   1502 
   1503     eRet = SetEncryptHandler();
   1504     if (eRet != SUCCESS)
   1505       return eRet;
   1506 
   1507     m_pDocument->LoadLinearizedDoc(m_pLinearized.get());
   1508     if (!m_pDocument->GetRoot())
   1509       return FORMAT_ERROR;
   1510   }
   1511 
   1512   if (GetRootObjNum() == 0) {
   1513     ReleaseEncryptHandler();
   1514     if (!RebuildCrossRef() || GetRootObjNum() == 0)
   1515       return FORMAT_ERROR;
   1516 
   1517     eRet = SetEncryptHandler();
   1518     if (eRet != SUCCESS)
   1519       return eRet;
   1520   }
   1521 
   1522   if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) {
   1523     if (CPDF_Reference* pMetadata =
   1524             ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata")))
   1525       m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum();
   1526   }
   1527   return SUCCESS;
   1528 }
   1529 
   1530 bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) {
   1531   if (!LoadCrossRefV5(&xrefpos, false))
   1532     return false;
   1533 
   1534   std::set<FX_FILESIZE> seen_xrefpos;
   1535   while (xrefpos) {
   1536     seen_xrefpos.insert(xrefpos);
   1537     if (!LoadCrossRefV5(&xrefpos, false))
   1538       return false;
   1539 
   1540     // Check for circular references.
   1541     if (pdfium::ContainsKey(seen_xrefpos, xrefpos))
   1542       return false;
   1543   }
   1544   m_ObjectStreamMap.clear();
   1545   m_bXRefStream = true;
   1546   return true;
   1547 }
   1548 
   1549 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() {
   1550   uint32_t dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum;
   1551   m_pSyntax->m_MetadataObjnum = 0;
   1552   m_pTrailer.reset();
   1553   m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset);
   1554 
   1555   uint8_t ch = 0;
   1556   uint32_t dwCount = 0;
   1557   m_pSyntax->GetNextChar(ch);
   1558   while (PDFCharIsWhitespace(ch)) {
   1559     ++dwCount;
   1560     if (m_pSyntax->m_FileLen <=
   1561         (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) {
   1562       break;
   1563     }
   1564     m_pSyntax->GetNextChar(ch);
   1565   }
   1566   m_LastXRefOffset += dwCount;
   1567   m_ObjectStreamMap.clear();
   1568   m_ObjCache.clear();
   1569 
   1570   if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) &&
   1571       !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) {
   1572     m_LastXRefOffset = 0;
   1573     m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
   1574     return FORMAT_ERROR;
   1575   }
   1576 
   1577   m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum;
   1578   return SUCCESS;
   1579 }
   1580