1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/include/fpdfapi/fpdf_parser.h" 8 9 #include <algorithm> 10 #include <memory> 11 #include <set> 12 #include <utility> 13 #include <vector> 14 15 #include "core/include/fpdfapi/fpdf_module.h" 16 #include "core/include/fpdfapi/fpdf_page.h" 17 #include "core/include/fxcrt/fx_ext.h" 18 #include "core/include/fxcrt/fx_safe_types.h" 19 #include "core/src/fpdfapi/fpdf_page/pageint.h" 20 #include "core/src/fpdfapi/fpdf_parser/parser_int.h" 21 #include "third_party/base/stl_util.h" 22 23 namespace { 24 25 // A limit on the size of the xref table. Theoretical limits are higher, but 26 // this may be large enough in practice. 27 const int32_t kMaxXRefSize = 1048576; 28 29 // A limit on the maximum object number in the xref table. Theoretical limits 30 // are higher, but this may be large enough in practice. 31 const FX_DWORD kMaxObjectNumber = 1048576; 32 33 struct SearchTagRecord { 34 const char* m_pTag; 35 FX_DWORD m_Len; 36 FX_DWORD m_Offset; 37 }; 38 39 template <typename T> 40 class ScopedSetInsertion { 41 public: 42 ScopedSetInsertion(std::set<T>* org_set, T elem) 43 : m_Set(org_set), m_Entry(elem) { 44 m_Set->insert(m_Entry); 45 } 46 ~ScopedSetInsertion() { m_Set->erase(m_Entry); } 47 48 private: 49 std::set<T>* const m_Set; 50 const T m_Entry; 51 }; 52 53 int CompareFileSize(const void* p1, const void* p2) { 54 return *(FX_FILESIZE*)p1 - *(FX_FILESIZE*)p2; 55 } 56 57 int32_t GetHeaderOffset(IFX_FileRead* pFile) { 58 const FX_DWORD tag = FXDWORD_FROM_LSBFIRST(0x46445025); 59 const size_t kBufSize = 4; 60 uint8_t buf[kBufSize]; 61 int32_t offset = 0; 62 while (offset <= 1024) { 63 if (!pFile->ReadBlock(buf, offset, kBufSize)) 64 return -1; 65 66 if (*(FX_DWORD*)buf == tag) 67 return offset; 68 69 ++offset; 70 } 71 return -1; 72 } 73 74 int32_t GetDirectInteger(CPDF_Dictionary* pDict, const CFX_ByteStringC& key) { 75 CPDF_Number* pObj = ToNumber(pDict->GetElement(key)); 76 return pObj ? pObj->GetInteger() : 0; 77 } 78 79 FX_DWORD GetVarInt(const uint8_t* p, int32_t n) { 80 FX_DWORD result = 0; 81 for (int32_t i = 0; i < n; ++i) 82 result = result * 256 + p[i]; 83 return result; 84 } 85 86 int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) { 87 return pObjStream->GetDict()->GetInteger("N"); 88 } 89 90 int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) { 91 return pObjStream->GetDict()->GetInteger("First"); 92 } 93 94 bool CanReadFromBitStream(const CFX_BitStream* hStream, 95 const FX_SAFE_DWORD& num_bits) { 96 return (num_bits.IsValid() && 97 hStream->BitsRemaining() >= num_bits.ValueOrDie()); 98 } 99 100 } // namespace 101 102 // TODO(thestig) Using unique_ptr with ReleaseDeleter is still not ideal. 103 // Come up or wait for something better. 104 using ScopedFileStream = 105 std::unique_ptr<IFX_FileStream, ReleaseDeleter<IFX_FileStream>>; 106 107 bool IsSignatureDict(const CPDF_Dictionary* pDict) { 108 CPDF_Object* pType = pDict->GetElementValue("Type"); 109 if (!pType) 110 pType = pDict->GetElementValue("FT"); 111 return pType && pType->GetString() == "Sig"; 112 } 113 114 CPDF_Parser::CPDF_Parser() { 115 m_pDocument = NULL; 116 m_pTrailer = NULL; 117 m_pEncryptDict = NULL; 118 m_pLinearized = NULL; 119 m_dwFirstPageNo = 0; 120 m_dwXrefStartObjNum = 0; 121 m_bOwnFileRead = TRUE; 122 m_FileVersion = 0; 123 m_bForceUseSecurityHandler = FALSE; 124 } 125 CPDF_Parser::~CPDF_Parser() { 126 CloseParser(FALSE); 127 } 128 129 FX_DWORD CPDF_Parser::GetLastObjNum() const { 130 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first; 131 } 132 133 bool CPDF_Parser::IsValidObjectNumber(FX_DWORD objnum) const { 134 return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first; 135 } 136 137 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) { 138 m_pEncryptDict = pDict; 139 } 140 141 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(FX_DWORD objnum) const { 142 auto it = m_ObjectInfo.find(objnum); 143 return it != m_ObjectInfo.end() ? it->second.pos : 0; 144 } 145 146 void CPDF_Parser::ShrinkObjectMap(FX_DWORD objnum) { 147 if (objnum == 0) { 148 m_ObjectInfo.clear(); 149 return; 150 } 151 152 auto it = m_ObjectInfo.lower_bound(objnum); 153 while (it != m_ObjectInfo.end()) { 154 auto saved_it = it++; 155 m_ObjectInfo.erase(saved_it); 156 } 157 158 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1)) 159 m_ObjectInfo[objnum - 1].pos = 0; 160 } 161 162 void CPDF_Parser::CloseParser(FX_BOOL bReParse) { 163 m_bVersionUpdated = FALSE; 164 if (!bReParse) { 165 delete m_pDocument; 166 m_pDocument = NULL; 167 } 168 if (m_pTrailer) { 169 m_pTrailer->Release(); 170 m_pTrailer = NULL; 171 } 172 ReleaseEncryptHandler(); 173 SetEncryptDictionary(NULL); 174 if (m_bOwnFileRead && m_Syntax.m_pFileAccess) { 175 m_Syntax.m_pFileAccess->Release(); 176 m_Syntax.m_pFileAccess = NULL; 177 } 178 FX_POSITION pos = m_ObjectStreamMap.GetStartPosition(); 179 while (pos) { 180 void* objnum; 181 CPDF_StreamAcc* pStream; 182 m_ObjectStreamMap.GetNextAssoc(pos, objnum, (void*&)pStream); 183 delete pStream; 184 } 185 m_ObjectStreamMap.RemoveAll(); 186 m_ObjCache.clear(); 187 188 m_SortedOffset.RemoveAll(); 189 m_ObjectInfo.clear(); 190 m_V5Type.RemoveAll(); 191 m_ObjVersion.RemoveAll(); 192 int32_t iLen = m_Trailers.GetSize(); 193 for (int32_t i = 0; i < iLen; ++i) { 194 if (CPDF_Dictionary* trailer = m_Trailers.GetAt(i)) 195 trailer->Release(); 196 } 197 m_Trailers.RemoveAll(); 198 if (m_pLinearized) { 199 m_pLinearized->Release(); 200 m_pLinearized = NULL; 201 } 202 } 203 CPDF_SecurityHandler* FPDF_CreateStandardSecurityHandler(); 204 CPDF_SecurityHandler* FPDF_CreatePubKeyHandler(void*); 205 FX_DWORD CPDF_Parser::StartParse(IFX_FileRead* pFileAccess, 206 FX_BOOL bReParse, 207 FX_BOOL bOwnFileRead) { 208 CloseParser(bReParse); 209 m_bXRefStream = FALSE; 210 m_LastXRefOffset = 0; 211 m_bOwnFileRead = bOwnFileRead; 212 213 int32_t offset = GetHeaderOffset(pFileAccess); 214 if (offset == -1) { 215 if (bOwnFileRead && pFileAccess) 216 pFileAccess->Release(); 217 return PDFPARSE_ERROR_FORMAT; 218 } 219 m_Syntax.InitParser(pFileAccess, offset); 220 221 uint8_t ch; 222 if (!m_Syntax.GetCharAt(5, ch)) 223 return PDFPARSE_ERROR_FORMAT; 224 if (std::isdigit(ch)) 225 m_FileVersion = FXSYS_toDecimalDigit(ch) * 10; 226 227 if (!m_Syntax.GetCharAt(7, ch)) 228 return PDFPARSE_ERROR_FORMAT; 229 if (std::isdigit(ch)) 230 m_FileVersion += FXSYS_toDecimalDigit(ch); 231 232 if (m_Syntax.m_FileLen < m_Syntax.m_HeaderOffset + 9) 233 return PDFPARSE_ERROR_FORMAT; 234 235 m_Syntax.RestorePos(m_Syntax.m_FileLen - m_Syntax.m_HeaderOffset - 9); 236 if (!bReParse) 237 m_pDocument = new CPDF_Document(this); 238 239 FX_BOOL bXRefRebuilt = FALSE; 240 if (m_Syntax.SearchWord("startxref", TRUE, FALSE, 4096)) { 241 FX_FILESIZE startxref_offset = m_Syntax.SavePos(); 242 void* pResult = FXSYS_bsearch(&startxref_offset, m_SortedOffset.GetData(), 243 m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), 244 CompareFileSize); 245 if (!pResult) 246 m_SortedOffset.Add(startxref_offset); 247 248 m_Syntax.GetKeyword(); 249 bool bNumber; 250 CFX_ByteString xrefpos_str = m_Syntax.GetNextWord(&bNumber); 251 if (!bNumber) 252 return PDFPARSE_ERROR_FORMAT; 253 254 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str); 255 if (!LoadAllCrossRefV4(m_LastXRefOffset) && 256 !LoadAllCrossRefV5(m_LastXRefOffset)) { 257 if (!RebuildCrossRef()) 258 return PDFPARSE_ERROR_FORMAT; 259 260 bXRefRebuilt = TRUE; 261 m_LastXRefOffset = 0; 262 } 263 } else { 264 if (!RebuildCrossRef()) 265 return PDFPARSE_ERROR_FORMAT; 266 267 bXRefRebuilt = TRUE; 268 } 269 FX_DWORD dwRet = SetEncryptHandler(); 270 if (dwRet != PDFPARSE_ERROR_SUCCESS) 271 return dwRet; 272 273 m_pDocument->LoadDoc(); 274 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { 275 if (bXRefRebuilt) 276 return PDFPARSE_ERROR_FORMAT; 277 278 ReleaseEncryptHandler(); 279 if (!RebuildCrossRef()) 280 return PDFPARSE_ERROR_FORMAT; 281 282 dwRet = SetEncryptHandler(); 283 if (dwRet != PDFPARSE_ERROR_SUCCESS) 284 return dwRet; 285 286 m_pDocument->LoadDoc(); 287 if (!m_pDocument->GetRoot()) 288 return PDFPARSE_ERROR_FORMAT; 289 } 290 FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(), 291 sizeof(FX_FILESIZE), CompareFileSize); 292 if (GetRootObjNum() == 0) { 293 ReleaseEncryptHandler(); 294 if (!RebuildCrossRef() || GetRootObjNum() == 0) 295 return PDFPARSE_ERROR_FORMAT; 296 297 dwRet = SetEncryptHandler(); 298 if (dwRet != PDFPARSE_ERROR_SUCCESS) 299 return dwRet; 300 } 301 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) { 302 CPDF_Reference* pMetadata = 303 ToReference(m_pDocument->GetRoot()->GetElement("Metadata")); 304 if (pMetadata) 305 m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum(); 306 } 307 return PDFPARSE_ERROR_SUCCESS; 308 } 309 FX_DWORD CPDF_Parser::SetEncryptHandler() { 310 ReleaseEncryptHandler(); 311 SetEncryptDictionary(NULL); 312 if (!m_pTrailer) { 313 return PDFPARSE_ERROR_FORMAT; 314 } 315 CPDF_Object* pEncryptObj = m_pTrailer->GetElement("Encrypt"); 316 if (pEncryptObj) { 317 if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) { 318 SetEncryptDictionary(pEncryptDict); 319 } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) { 320 pEncryptObj = 321 m_pDocument->GetIndirectObject(pRef->GetRefObjNum(), nullptr); 322 if (pEncryptObj) 323 SetEncryptDictionary(pEncryptObj->GetDict()); 324 } 325 } 326 if (m_bForceUseSecurityHandler) { 327 FX_DWORD err = PDFPARSE_ERROR_HANDLER; 328 if (!m_pSecurityHandler) { 329 return PDFPARSE_ERROR_HANDLER; 330 } 331 if (!m_pSecurityHandler->OnInit(this, m_pEncryptDict)) { 332 return err; 333 } 334 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler( 335 m_pSecurityHandler->CreateCryptoHandler()); 336 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get())) { 337 return PDFPARSE_ERROR_HANDLER; 338 } 339 m_Syntax.SetEncrypt(pCryptoHandler.release()); 340 } else if (m_pEncryptDict) { 341 CFX_ByteString filter = m_pEncryptDict->GetString("Filter"); 342 std::unique_ptr<CPDF_SecurityHandler> pSecurityHandler; 343 FX_DWORD err = PDFPARSE_ERROR_HANDLER; 344 if (filter == "Standard") { 345 pSecurityHandler.reset(FPDF_CreateStandardSecurityHandler()); 346 err = PDFPARSE_ERROR_PASSWORD; 347 } 348 if (!pSecurityHandler) { 349 return PDFPARSE_ERROR_HANDLER; 350 } 351 if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) { 352 return err; 353 } 354 m_pSecurityHandler = std::move(pSecurityHandler); 355 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler( 356 m_pSecurityHandler->CreateCryptoHandler()); 357 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get())) { 358 return PDFPARSE_ERROR_HANDLER; 359 } 360 m_Syntax.SetEncrypt(pCryptoHandler.release()); 361 } 362 return PDFPARSE_ERROR_SUCCESS; 363 } 364 void CPDF_Parser::ReleaseEncryptHandler() { 365 m_Syntax.m_pCryptoHandler.reset(); 366 if (!m_bForceUseSecurityHandler) { 367 m_pSecurityHandler.reset(); 368 } 369 } 370 371 FX_FILESIZE CPDF_Parser::GetObjectOffset(FX_DWORD objnum) const { 372 if (!IsValidObjectNumber(objnum)) 373 return 0; 374 375 if (m_V5Type[objnum] == 1) 376 return GetObjectPositionOrZero(objnum); 377 378 if (m_V5Type[objnum] == 2) { 379 FX_FILESIZE pos = GetObjectPositionOrZero(objnum); 380 return GetObjectPositionOrZero(pos); 381 } 382 return 0; 383 } 384 385 FX_BOOL CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) { 386 if (!LoadCrossRefV4(xrefpos, 0, TRUE)) { 387 return FALSE; 388 } 389 m_pTrailer = LoadTrailerV4(); 390 if (!m_pTrailer) { 391 return FALSE; 392 } 393 394 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); 395 if (xrefsize <= 0 || xrefsize > kMaxXRefSize) { 396 return FALSE; 397 } 398 ShrinkObjectMap(xrefsize); 399 m_V5Type.SetSize(xrefsize); 400 CFX_FileSizeArray CrossRefList; 401 CFX_FileSizeArray XRefStreamList; 402 CrossRefList.Add(xrefpos); 403 XRefStreamList.Add(GetDirectInteger(m_pTrailer, "XRefStm")); 404 405 std::set<FX_FILESIZE> seen_xrefpos; 406 seen_xrefpos.insert(xrefpos); 407 // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not 408 // numerical, GetDirectInteger() returns 0. Loading will end. 409 xrefpos = GetDirectInteger(m_pTrailer, "Prev"); 410 while (xrefpos) { 411 // Check for circular references. 412 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) 413 return FALSE; 414 seen_xrefpos.insert(xrefpos); 415 CrossRefList.InsertAt(0, xrefpos); 416 LoadCrossRefV4(xrefpos, 0, TRUE); 417 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( 418 LoadTrailerV4()); 419 if (!pDict) 420 return FALSE; 421 xrefpos = GetDirectInteger(pDict.get(), "Prev"); 422 423 XRefStreamList.InsertAt(0, pDict->GetInteger("XRefStm")); 424 m_Trailers.Add(pDict.release()); 425 } 426 for (int32_t i = 0; i < CrossRefList.GetSize(); i++) { 427 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) 428 return FALSE; 429 } 430 return TRUE; 431 } 432 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, 433 FX_DWORD dwObjCount) { 434 if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) { 435 return FALSE; 436 } 437 m_pTrailer = LoadTrailerV4(); 438 if (!m_pTrailer) { 439 return FALSE; 440 } 441 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); 442 if (xrefsize == 0) { 443 return FALSE; 444 } 445 CFX_FileSizeArray CrossRefList, XRefStreamList; 446 CrossRefList.Add(xrefpos); 447 XRefStreamList.Add(GetDirectInteger(m_pTrailer, "XRefStm")); 448 449 std::set<FX_FILESIZE> seen_xrefpos; 450 seen_xrefpos.insert(xrefpos); 451 xrefpos = GetDirectInteger(m_pTrailer, "Prev"); 452 while (xrefpos) { 453 // Check for circular references. 454 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) 455 return FALSE; 456 seen_xrefpos.insert(xrefpos); 457 CrossRefList.InsertAt(0, xrefpos); 458 LoadCrossRefV4(xrefpos, 0, TRUE); 459 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( 460 LoadTrailerV4()); 461 if (!pDict) { 462 return FALSE; 463 } 464 xrefpos = GetDirectInteger(pDict.get(), "Prev"); 465 466 XRefStreamList.InsertAt(0, pDict->GetInteger("XRefStm")); 467 m_Trailers.Add(pDict.release()); 468 } 469 for (int32_t i = 1; i < CrossRefList.GetSize(); i++) 470 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], FALSE)) { 471 return FALSE; 472 } 473 return TRUE; 474 } 475 FX_BOOL CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos, 476 FX_DWORD dwObjCount) { 477 FX_FILESIZE dwStartPos = pos - m_Syntax.m_HeaderOffset; 478 m_Syntax.RestorePos(dwStartPos); 479 void* pResult = 480 FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), 481 sizeof(FX_FILESIZE), CompareFileSize); 482 if (!pResult) { 483 m_SortedOffset.Add(pos); 484 } 485 FX_DWORD start_objnum = 0; 486 FX_DWORD count = dwObjCount; 487 FX_FILESIZE SavedPos = m_Syntax.SavePos(); 488 const int32_t recordsize = 20; 489 std::vector<char> buf(1024 * recordsize + 1); 490 buf[1024 * recordsize] = '\0'; 491 int32_t nBlocks = count / 1024 + 1; 492 for (int32_t block = 0; block < nBlocks; block++) { 493 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; 494 FX_DWORD dwReadSize = block_size * recordsize; 495 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_Syntax.m_FileLen) { 496 return FALSE; 497 } 498 if (!m_Syntax.ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), 499 dwReadSize)) { 500 return FALSE; 501 } 502 for (int32_t i = 0; i < block_size; i++) { 503 FX_DWORD objnum = start_objnum + block * 1024 + i; 504 char* pEntry = &buf[i * recordsize]; 505 if (pEntry[17] == 'f') { 506 m_ObjectInfo[objnum].pos = 0; 507 m_V5Type.SetAtGrow(objnum, 0); 508 } else { 509 int32_t offset = FXSYS_atoi(pEntry); 510 if (offset == 0) { 511 for (int32_t c = 0; c < 10; c++) { 512 if (!std::isdigit(pEntry[c])) 513 return FALSE; 514 } 515 } 516 m_ObjectInfo[objnum].pos = offset; 517 int32_t version = FXSYS_atoi(pEntry + 11); 518 if (version >= 1) { 519 m_bVersionUpdated = TRUE; 520 } 521 m_ObjVersion.SetAtGrow(objnum, version); 522 if (m_ObjectInfo[objnum].pos < m_Syntax.m_FileLen) { 523 void* pResult = FXSYS_bsearch( 524 &m_ObjectInfo[objnum].pos, m_SortedOffset.GetData(), 525 m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), CompareFileSize); 526 if (!pResult) { 527 m_SortedOffset.Add(m_ObjectInfo[objnum].pos); 528 } 529 } 530 m_V5Type.SetAtGrow(objnum, 1); 531 } 532 } 533 } 534 m_Syntax.RestorePos(SavedPos + count * recordsize); 535 return TRUE; 536 } 537 538 bool CPDF_Parser::FindPosInOffsets(FX_FILESIZE pos) const { 539 return FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), 540 sizeof(FX_FILESIZE), CompareFileSize); 541 } 542 543 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, 544 FX_FILESIZE streampos, 545 FX_BOOL bSkip) { 546 m_Syntax.RestorePos(pos); 547 if (m_Syntax.GetKeyword() != "xref") 548 return false; 549 550 if (!FindPosInOffsets(pos)) 551 m_SortedOffset.Add(pos); 552 553 if (streampos && !FindPosInOffsets(streampos)) 554 m_SortedOffset.Add(streampos); 555 556 while (1) { 557 FX_FILESIZE SavedPos = m_Syntax.SavePos(); 558 bool bIsNumber; 559 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber); 560 if (word.IsEmpty()) 561 return false; 562 563 if (!bIsNumber) { 564 m_Syntax.RestorePos(SavedPos); 565 break; 566 } 567 FX_DWORD start_objnum = FXSYS_atoi(word); 568 if (start_objnum >= kMaxObjectNumber) 569 return false; 570 571 FX_DWORD count = m_Syntax.GetDirectNum(); 572 m_Syntax.ToNextWord(); 573 SavedPos = m_Syntax.SavePos(); 574 const int32_t recordsize = 20; 575 m_dwXrefStartObjNum = start_objnum; 576 if (!bSkip) { 577 std::vector<char> buf(1024 * recordsize + 1); 578 buf[1024 * recordsize] = '\0'; 579 int32_t nBlocks = count / 1024 + 1; 580 for (int32_t block = 0; block < nBlocks; block++) { 581 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; 582 m_Syntax.ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), 583 block_size * recordsize); 584 for (int32_t i = 0; i < block_size; i++) { 585 FX_DWORD objnum = start_objnum + block * 1024 + i; 586 char* pEntry = &buf[i * recordsize]; 587 if (pEntry[17] == 'f') { 588 m_ObjectInfo[objnum].pos = 0; 589 m_V5Type.SetAtGrow(objnum, 0); 590 } else { 591 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry); 592 if (offset == 0) { 593 for (int32_t c = 0; c < 10; c++) { 594 if (!std::isdigit(pEntry[c])) 595 return false; 596 } 597 } 598 m_ObjectInfo[objnum].pos = offset; 599 int32_t version = FXSYS_atoi(pEntry + 11); 600 if (version >= 1) { 601 m_bVersionUpdated = TRUE; 602 } 603 m_ObjVersion.SetAtGrow(objnum, version); 604 if (m_ObjectInfo[objnum].pos < m_Syntax.m_FileLen && 605 !FindPosInOffsets(m_ObjectInfo[objnum].pos)) { 606 m_SortedOffset.Add(m_ObjectInfo[objnum].pos); 607 } 608 m_V5Type.SetAtGrow(objnum, 1); 609 } 610 } 611 } 612 } 613 m_Syntax.RestorePos(SavedPos + count * recordsize); 614 } 615 return !streampos || LoadCrossRefV5(&streampos, FALSE); 616 } 617 618 FX_BOOL CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) { 619 if (!LoadCrossRefV5(&xrefpos, TRUE)) { 620 return FALSE; 621 } 622 std::set<FX_FILESIZE> seen_xrefpos; 623 while (xrefpos) { 624 seen_xrefpos.insert(xrefpos); 625 if (!LoadCrossRefV5(&xrefpos, FALSE)) { 626 return FALSE; 627 } 628 // Check for circular references. 629 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) { 630 return FALSE; 631 } 632 } 633 m_ObjectStreamMap.InitHashTable(101, FALSE); 634 m_bXRefStream = TRUE; 635 return TRUE; 636 } 637 638 FX_BOOL CPDF_Parser::RebuildCrossRef() { 639 m_ObjectInfo.clear(); 640 m_V5Type.RemoveAll(); 641 m_SortedOffset.RemoveAll(); 642 m_ObjVersion.RemoveAll(); 643 if (m_pTrailer) { 644 m_pTrailer->Release(); 645 m_pTrailer = NULL; 646 } 647 int32_t status = 0; 648 int32_t inside_index = 0; 649 FX_DWORD objnum = 0; 650 FX_DWORD gennum = 0; 651 int32_t depth = 0; 652 const FX_DWORD kBufferSize = 4096; 653 std::vector<uint8_t> buffer(kBufferSize); 654 FX_FILESIZE pos = m_Syntax.m_HeaderOffset; 655 FX_FILESIZE start_pos = 0; 656 FX_FILESIZE start_pos1 = 0; 657 FX_FILESIZE last_obj = -1; 658 FX_FILESIZE last_xref = -1; 659 FX_FILESIZE last_trailer = -1; 660 while (pos < m_Syntax.m_FileLen) { 661 const FX_FILESIZE saved_pos = pos; 662 bool bOverFlow = false; 663 FX_DWORD size = std::min((FX_DWORD)(m_Syntax.m_FileLen - pos), kBufferSize); 664 if (!m_Syntax.m_pFileAccess->ReadBlock(buffer.data(), pos, size)) 665 break; 666 667 for (FX_DWORD i = 0; i < size; i++) { 668 uint8_t byte = buffer[i]; 669 switch (status) { 670 case 0: 671 if (PDFCharIsWhitespace(byte)) 672 status = 1; 673 674 if (std::isdigit(byte)) { 675 --i; 676 status = 1; 677 } 678 679 if (byte == '%') { 680 inside_index = 0; 681 status = 9; 682 } 683 684 if (byte == '(') { 685 status = 10; 686 depth = 1; 687 } 688 689 if (byte == '<') { 690 inside_index = 1; 691 status = 11; 692 } 693 694 if (byte == '\\') 695 status = 13; 696 697 if (byte == 't') { 698 status = 7; 699 inside_index = 1; 700 } 701 break; 702 case 1: 703 if (PDFCharIsWhitespace(byte)) { 704 break; 705 } else if (std::isdigit(byte)) { 706 start_pos = pos + i; 707 status = 2; 708 objnum = FXSYS_toDecimalDigit(byte); 709 } else if (byte == 't') { 710 status = 7; 711 inside_index = 1; 712 } else if (byte == 'x') { 713 status = 8; 714 inside_index = 1; 715 } else { 716 --i; 717 status = 0; 718 } 719 break; 720 case 2: 721 if (std::isdigit(byte)) { 722 objnum = objnum * 10 + FXSYS_toDecimalDigit(byte); 723 break; 724 } else if (PDFCharIsWhitespace(byte)) { 725 status = 3; 726 } else { 727 --i; 728 status = 14; 729 inside_index = 0; 730 } 731 break; 732 case 3: 733 if (std::isdigit(byte)) { 734 start_pos1 = pos + i; 735 status = 4; 736 gennum = FXSYS_toDecimalDigit(byte); 737 } else if (PDFCharIsWhitespace(byte)) { 738 break; 739 } else if (byte == 't') { 740 status = 7; 741 inside_index = 1; 742 } else { 743 --i; 744 status = 0; 745 } 746 break; 747 case 4: 748 if (std::isdigit(byte)) { 749 gennum = gennum * 10 + FXSYS_toDecimalDigit(byte); 750 break; 751 } else if (PDFCharIsWhitespace(byte)) { 752 status = 5; 753 } else { 754 --i; 755 status = 0; 756 } 757 break; 758 case 5: 759 if (byte == 'o') { 760 status = 6; 761 inside_index = 1; 762 } else if (PDFCharIsWhitespace(byte)) { 763 break; 764 } else if (std::isdigit(byte)) { 765 objnum = gennum; 766 gennum = FXSYS_toDecimalDigit(byte); 767 start_pos = start_pos1; 768 start_pos1 = pos + i; 769 status = 4; 770 } else if (byte == 't') { 771 status = 7; 772 inside_index = 1; 773 } else { 774 --i; 775 status = 0; 776 } 777 break; 778 case 6: 779 switch (inside_index) { 780 case 1: 781 if (byte != 'b') { 782 --i; 783 status = 0; 784 } else { 785 inside_index++; 786 } 787 break; 788 case 2: 789 if (byte != 'j') { 790 --i; 791 status = 0; 792 } else { 793 inside_index++; 794 } 795 break; 796 case 3: 797 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { 798 if (objnum > 0x1000000) { 799 status = 0; 800 break; 801 } 802 FX_FILESIZE obj_pos = start_pos - m_Syntax.m_HeaderOffset; 803 last_obj = start_pos; 804 void* pResult = 805 FXSYS_bsearch(&obj_pos, m_SortedOffset.GetData(), 806 m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), 807 CompareFileSize); 808 if (!pResult) { 809 m_SortedOffset.Add(obj_pos); 810 } 811 FX_FILESIZE obj_end = 0; 812 CPDF_Object* pObject = ParseIndirectObjectAtByStrict( 813 m_pDocument, obj_pos, objnum, NULL, &obj_end); 814 if (CPDF_Stream* pStream = ToStream(pObject)) { 815 if (CPDF_Dictionary* pDict = pStream->GetDict()) { 816 if ((pDict->KeyExist("Type")) && 817 (pDict->GetString("Type") == "XRef" && 818 pDict->KeyExist("Size"))) { 819 CPDF_Object* pRoot = pDict->GetElement("Root"); 820 if (pRoot && pRoot->GetDict() && 821 pRoot->GetDict()->GetElement("Pages")) { 822 if (m_pTrailer) 823 m_pTrailer->Release(); 824 m_pTrailer = ToDictionary(pDict->Clone()); 825 } 826 } 827 } 828 } 829 FX_FILESIZE offset = 0; 830 m_Syntax.RestorePos(obj_pos); 831 offset = m_Syntax.FindTag("obj", 0); 832 if (offset == -1) { 833 offset = 0; 834 } else { 835 offset += 3; 836 } 837 FX_FILESIZE nLen = obj_end - obj_pos - offset; 838 if ((FX_DWORD)nLen > size - i) { 839 pos = obj_end + m_Syntax.m_HeaderOffset; 840 bOverFlow = true; 841 } else { 842 i += (FX_DWORD)nLen; 843 } 844 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) && 845 m_ObjectInfo[objnum].pos) { 846 if (pObject) { 847 FX_DWORD oldgen = m_ObjVersion.GetAt(objnum); 848 m_ObjectInfo[objnum].pos = obj_pos; 849 m_ObjVersion.SetAt(objnum, (int16_t)gennum); 850 if (oldgen != gennum) { 851 m_bVersionUpdated = TRUE; 852 } 853 } 854 } else { 855 m_ObjectInfo[objnum].pos = obj_pos; 856 m_V5Type.SetAtGrow(objnum, 1); 857 m_ObjVersion.SetAtGrow(objnum, (int16_t)gennum); 858 } 859 if (pObject) { 860 pObject->Release(); 861 } 862 } 863 --i; 864 status = 0; 865 break; 866 } 867 break; 868 case 7: 869 if (inside_index == 7) { 870 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { 871 last_trailer = pos + i - 7; 872 m_Syntax.RestorePos(pos + i - m_Syntax.m_HeaderOffset); 873 CPDF_Object* pObj = 874 m_Syntax.GetObject(m_pDocument, 0, 0, nullptr, true); 875 if (pObj) { 876 if (!pObj->IsDictionary() && !pObj->AsStream()) { 877 pObj->Release(); 878 } else { 879 CPDF_Stream* pStream = pObj->AsStream(); 880 if (CPDF_Dictionary* pTrailer = 881 pStream ? pStream->GetDict() : pObj->AsDictionary()) { 882 if (m_pTrailer) { 883 CPDF_Object* pRoot = pTrailer->GetElement("Root"); 884 CPDF_Reference* pRef = ToReference(pRoot); 885 if (!pRoot || 886 (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) && 887 m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) { 888 auto it = pTrailer->begin(); 889 while (it != pTrailer->end()) { 890 const CFX_ByteString& key = it->first; 891 CPDF_Object* pElement = it->second; 892 ++it; 893 FX_DWORD dwObjNum = 894 pElement ? pElement->GetObjNum() : 0; 895 if (dwObjNum) { 896 m_pTrailer->SetAtReference(key, m_pDocument, 897 dwObjNum); 898 } else { 899 m_pTrailer->SetAt(key, pElement->Clone()); 900 } 901 } 902 pObj->Release(); 903 } else { 904 pObj->Release(); 905 } 906 } else { 907 if (pObj->IsStream()) { 908 m_pTrailer = ToDictionary(pTrailer->Clone()); 909 pObj->Release(); 910 } else { 911 m_pTrailer = pTrailer; 912 } 913 FX_FILESIZE dwSavePos = m_Syntax.SavePos(); 914 CFX_ByteString strWord = m_Syntax.GetKeyword(); 915 if (!strWord.Compare("startxref")) { 916 bool bNumber; 917 CFX_ByteString bsOffset = 918 m_Syntax.GetNextWord(&bNumber); 919 if (bNumber) { 920 m_LastXRefOffset = FXSYS_atoi(bsOffset); 921 } 922 } 923 m_Syntax.RestorePos(dwSavePos); 924 } 925 } else { 926 pObj->Release(); 927 } 928 } 929 } 930 } 931 --i; 932 status = 0; 933 } else if (byte == "trailer"[inside_index]) { 934 inside_index++; 935 } else { 936 --i; 937 status = 0; 938 } 939 break; 940 case 8: 941 if (inside_index == 4) { 942 last_xref = pos + i - 4; 943 status = 1; 944 } else if (byte == "xref"[inside_index]) { 945 inside_index++; 946 } else { 947 --i; 948 status = 0; 949 } 950 break; 951 case 9: 952 if (byte == '\r' || byte == '\n') { 953 status = 0; 954 } 955 break; 956 case 10: 957 if (byte == ')') { 958 if (depth > 0) { 959 depth--; 960 } 961 } else if (byte == '(') { 962 depth++; 963 } 964 if (!depth) { 965 status = 0; 966 } 967 break; 968 case 11: 969 if (byte == '>' || (byte == '<' && inside_index == 1)) 970 status = 0; 971 inside_index = 0; 972 break; 973 case 13: 974 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { 975 --i; 976 status = 0; 977 } 978 break; 979 case 14: 980 if (PDFCharIsWhitespace(byte)) { 981 status = 0; 982 } else if (byte == '%' || byte == '(' || byte == '<' || 983 byte == '\\') { 984 status = 0; 985 --i; 986 } else if (inside_index == 6) { 987 status = 0; 988 --i; 989 } else if (byte == "endobj"[inside_index]) { 990 inside_index++; 991 } 992 break; 993 } 994 if (bOverFlow) { 995 size = 0; 996 break; 997 } 998 } 999 pos += size; 1000 1001 // If the position has not changed at all in a loop iteration, then break 1002 // out to prevent infinite looping. 1003 if (pos == saved_pos) 1004 break; 1005 } 1006 if (last_xref != -1 && last_xref > last_obj) { 1007 last_trailer = last_xref; 1008 } else if (last_trailer == -1 || last_xref < last_obj) { 1009 last_trailer = m_Syntax.m_FileLen; 1010 } 1011 FX_FILESIZE offset = last_trailer - m_Syntax.m_HeaderOffset; 1012 void* pResult = 1013 FXSYS_bsearch(&offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), 1014 sizeof(FX_FILESIZE), CompareFileSize); 1015 if (!pResult) { 1016 m_SortedOffset.Add(offset); 1017 } 1018 return m_pTrailer && !m_ObjectInfo.empty(); 1019 } 1020 1021 FX_BOOL CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, FX_BOOL bMainXRef) { 1022 CPDF_Object* pObject = ParseIndirectObjectAt(m_pDocument, *pos, 0, nullptr); 1023 if (!pObject) 1024 return FALSE; 1025 if (m_pDocument) { 1026 FX_BOOL bInserted = FALSE; 1027 CPDF_Dictionary* pDict = m_pDocument->GetRoot(); 1028 if (!pDict || pDict->GetObjNum() != pObject->m_ObjNum) { 1029 bInserted = m_pDocument->InsertIndirectObject(pObject->m_ObjNum, pObject); 1030 } else { 1031 if (pObject->IsStream()) 1032 pObject->Release(); 1033 } 1034 if (!bInserted) 1035 return FALSE; 1036 } 1037 1038 CPDF_Stream* pStream = pObject->AsStream(); 1039 if (!pStream) 1040 return FALSE; 1041 1042 *pos = pStream->GetDict()->GetInteger("Prev"); 1043 int32_t size = pStream->GetDict()->GetInteger("Size"); 1044 if (size < 0) { 1045 pStream->Release(); 1046 return FALSE; 1047 } 1048 if (bMainXRef) { 1049 m_pTrailer = ToDictionary(pStream->GetDict()->Clone()); 1050 ShrinkObjectMap(size); 1051 if (m_V5Type.SetSize(size)) { 1052 FXSYS_memset(m_V5Type.GetData(), 0, size); 1053 } 1054 } else { 1055 m_Trailers.Add(ToDictionary(pStream->GetDict()->Clone())); 1056 } 1057 std::vector<std::pair<int32_t, int32_t> > arrIndex; 1058 CPDF_Array* pArray = pStream->GetDict()->GetArray("Index"); 1059 if (pArray) { 1060 FX_DWORD nPairSize = pArray->GetCount() / 2; 1061 for (FX_DWORD i = 0; i < nPairSize; i++) { 1062 CPDF_Object* pStartNumObj = pArray->GetElement(i * 2); 1063 CPDF_Object* pCountObj = pArray->GetElement(i * 2 + 1); 1064 if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) { 1065 int nStartNum = pStartNumObj->GetInteger(); 1066 int nCount = pCountObj->GetInteger(); 1067 if (nStartNum >= 0 && nCount > 0) { 1068 arrIndex.push_back(std::make_pair(nStartNum, nCount)); 1069 } 1070 } 1071 } 1072 } 1073 if (arrIndex.size() == 0) { 1074 arrIndex.push_back(std::make_pair(0, size)); 1075 } 1076 pArray = pStream->GetDict()->GetArray("W"); 1077 if (!pArray) { 1078 pStream->Release(); 1079 return FALSE; 1080 } 1081 CFX_DWordArray WidthArray; 1082 FX_SAFE_DWORD dwAccWidth = 0; 1083 for (FX_DWORD i = 0; i < pArray->GetCount(); i++) { 1084 WidthArray.Add(pArray->GetInteger(i)); 1085 dwAccWidth += WidthArray[i]; 1086 } 1087 if (!dwAccWidth.IsValid() || WidthArray.GetSize() < 3) { 1088 pStream->Release(); 1089 return FALSE; 1090 } 1091 FX_DWORD totalWidth = dwAccWidth.ValueOrDie(); 1092 CPDF_StreamAcc acc; 1093 acc.LoadAllData(pStream); 1094 const uint8_t* pData = acc.GetData(); 1095 FX_DWORD dwTotalSize = acc.GetSize(); 1096 FX_DWORD segindex = 0; 1097 for (FX_DWORD i = 0; i < arrIndex.size(); i++) { 1098 int32_t startnum = arrIndex[i].first; 1099 if (startnum < 0) { 1100 continue; 1101 } 1102 m_dwXrefStartObjNum = 1103 pdfium::base::checked_cast<FX_DWORD, int32_t>(startnum); 1104 FX_DWORD count = 1105 pdfium::base::checked_cast<FX_DWORD, int32_t>(arrIndex[i].second); 1106 FX_SAFE_DWORD dwCaculatedSize = segindex; 1107 dwCaculatedSize += count; 1108 dwCaculatedSize *= totalWidth; 1109 if (!dwCaculatedSize.IsValid() || 1110 dwCaculatedSize.ValueOrDie() > dwTotalSize) { 1111 continue; 1112 } 1113 const uint8_t* segstart = pData + segindex * totalWidth; 1114 FX_SAFE_DWORD dwMaxObjNum = startnum; 1115 dwMaxObjNum += count; 1116 FX_DWORD dwV5Size = 1117 pdfium::base::checked_cast<FX_DWORD, int32_t>(m_V5Type.GetSize()); 1118 if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) { 1119 continue; 1120 } 1121 for (FX_DWORD j = 0; j < count; j++) { 1122 int32_t type = 1; 1123 const uint8_t* entrystart = segstart + j * totalWidth; 1124 if (WidthArray[0]) { 1125 type = GetVarInt(entrystart, WidthArray[0]); 1126 } 1127 if (m_V5Type[startnum + j] == 255) { 1128 FX_FILESIZE offset = 1129 GetVarInt(entrystart + WidthArray[0], WidthArray[1]); 1130 m_ObjectInfo[startnum + j].pos = offset; 1131 void* pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), 1132 m_SortedOffset.GetSize(), 1133 sizeof(FX_FILESIZE), CompareFileSize); 1134 if (!pResult) { 1135 m_SortedOffset.Add(offset); 1136 } 1137 continue; 1138 } 1139 if (m_V5Type[startnum + j]) { 1140 continue; 1141 } 1142 m_V5Type[startnum + j] = type; 1143 if (type == 0) { 1144 m_ObjectInfo[startnum + j].pos = 0; 1145 } else { 1146 FX_FILESIZE offset = 1147 GetVarInt(entrystart + WidthArray[0], WidthArray[1]); 1148 m_ObjectInfo[startnum + j].pos = offset; 1149 if (type == 1) { 1150 void* pResult = FXSYS_bsearch(&offset, m_SortedOffset.GetData(), 1151 m_SortedOffset.GetSize(), 1152 sizeof(FX_FILESIZE), CompareFileSize); 1153 if (!pResult) { 1154 m_SortedOffset.Add(offset); 1155 } 1156 } else { 1157 if (offset < 0 || offset >= m_V5Type.GetSize()) { 1158 pStream->Release(); 1159 return FALSE; 1160 } 1161 m_V5Type[offset] = 255; 1162 } 1163 } 1164 } 1165 segindex += count; 1166 } 1167 pStream->Release(); 1168 return TRUE; 1169 } 1170 CPDF_Array* CPDF_Parser::GetIDArray() { 1171 CPDF_Object* pID = m_pTrailer ? m_pTrailer->GetElement("ID") : NULL; 1172 if (!pID) 1173 return nullptr; 1174 1175 if (CPDF_Reference* pRef = pID->AsReference()) { 1176 pID = ParseIndirectObject(nullptr, pRef->GetRefObjNum()); 1177 m_pTrailer->SetAt("ID", pID); 1178 } 1179 return ToArray(pID); 1180 } 1181 FX_DWORD CPDF_Parser::GetRootObjNum() { 1182 CPDF_Reference* pRef = 1183 ToReference(m_pTrailer ? m_pTrailer->GetElement("Root") : nullptr); 1184 return pRef ? pRef->GetRefObjNum() : 0; 1185 } 1186 FX_DWORD CPDF_Parser::GetInfoObjNum() { 1187 CPDF_Reference* pRef = 1188 ToReference(m_pTrailer ? m_pTrailer->GetElement("Info") : nullptr); 1189 return pRef ? pRef->GetRefObjNum() : 0; 1190 } 1191 FX_BOOL CPDF_Parser::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) { 1192 bForm = FALSE; 1193 if (!IsValidObjectNumber(objnum)) 1194 return TRUE; 1195 if (m_V5Type[objnum] == 0) 1196 return TRUE; 1197 if (m_V5Type[objnum] == 2) 1198 return TRUE; 1199 FX_FILESIZE pos = m_ObjectInfo[objnum].pos; 1200 void* pResult = 1201 FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), 1202 sizeof(FX_FILESIZE), CompareFileSize); 1203 if (!pResult) { 1204 return TRUE; 1205 } 1206 if ((FX_FILESIZE*)pResult - (FX_FILESIZE*)m_SortedOffset.GetData() == 1207 m_SortedOffset.GetSize() - 1) { 1208 return FALSE; 1209 } 1210 FX_FILESIZE size = ((FX_FILESIZE*)pResult)[1] - pos; 1211 FX_FILESIZE SavedPos = m_Syntax.SavePos(); 1212 m_Syntax.RestorePos(pos); 1213 const char kFormStream[] = "/Form\0stream"; 1214 const CFX_ByteStringC kFormStreamStr(kFormStream, sizeof(kFormStream) - 1); 1215 bForm = m_Syntax.SearchMultiWord(kFormStreamStr, TRUE, size) == 0; 1216 m_Syntax.RestorePos(SavedPos); 1217 return TRUE; 1218 } 1219 1220 CPDF_Object* CPDF_Parser::ParseIndirectObject( 1221 CPDF_IndirectObjectHolder* pObjList, 1222 FX_DWORD objnum, 1223 PARSE_CONTEXT* pContext) { 1224 if (!IsValidObjectNumber(objnum)) 1225 return nullptr; 1226 1227 // Prevent circular parsing the same object. 1228 if (pdfium::ContainsKey(m_ParsingObjNums, objnum)) 1229 return nullptr; 1230 ScopedSetInsertion<FX_DWORD> local_insert(&m_ParsingObjNums, objnum); 1231 1232 if (m_V5Type[objnum] == 1 || m_V5Type[objnum] == 255) { 1233 FX_FILESIZE pos = m_ObjectInfo[objnum].pos; 1234 if (pos <= 0) 1235 return nullptr; 1236 return ParseIndirectObjectAt(pObjList, pos, objnum, pContext); 1237 } 1238 if (m_V5Type[objnum] != 2) 1239 return nullptr; 1240 1241 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); 1242 if (!pObjStream) 1243 return nullptr; 1244 1245 ScopedFileStream file(FX_CreateMemoryStream( 1246 (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), FALSE)); 1247 CPDF_SyntaxParser syntax; 1248 syntax.InitParser(file.get(), 0); 1249 const int32_t offset = GetStreamFirst(pObjStream); 1250 1251 // Read object numbers from |pObjStream| into a cache. 1252 if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) { 1253 for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) { 1254 FX_DWORD thisnum = syntax.GetDirectNum(); 1255 FX_DWORD thisoff = syntax.GetDirectNum(); 1256 m_ObjCache[pObjStream][thisnum] = thisoff; 1257 } 1258 } 1259 1260 const auto it = m_ObjCache[pObjStream].find(objnum); 1261 if (it == m_ObjCache[pObjStream].end()) 1262 return nullptr; 1263 1264 syntax.RestorePos(offset + it->second); 1265 return syntax.GetObject(pObjList, 0, 0, pContext, true); 1266 } 1267 1268 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(FX_DWORD objnum) { 1269 CPDF_StreamAcc* pStreamAcc = nullptr; 1270 if (m_ObjectStreamMap.Lookup((void*)(uintptr_t)objnum, (void*&)pStreamAcc)) 1271 return pStreamAcc; 1272 1273 const CPDF_Stream* pStream = ToStream( 1274 m_pDocument ? m_pDocument->GetIndirectObject(objnum, nullptr) : nullptr); 1275 if (!pStream) 1276 return nullptr; 1277 1278 pStreamAcc = new CPDF_StreamAcc; 1279 pStreamAcc->LoadAllData(pStream); 1280 m_ObjectStreamMap.SetAt((void*)(uintptr_t)objnum, pStreamAcc); 1281 return pStreamAcc; 1282 } 1283 1284 FX_FILESIZE CPDF_Parser::GetObjectSize(FX_DWORD objnum) const { 1285 if (!IsValidObjectNumber(objnum)) 1286 return 0; 1287 1288 if (m_V5Type[objnum] == 2) 1289 objnum = GetObjectPositionOrZero(objnum); 1290 1291 if (m_V5Type[objnum] == 1 || m_V5Type[objnum] == 255) { 1292 FX_FILESIZE offset = GetObjectPositionOrZero(objnum); 1293 if (offset == 0) 1294 return 0; 1295 1296 FX_FILESIZE* pResult = static_cast<FX_FILESIZE*>(FXSYS_bsearch( 1297 &offset, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), 1298 sizeof(FX_FILESIZE), CompareFileSize)); 1299 if (!pResult) 1300 return 0; 1301 1302 if (pResult - m_SortedOffset.GetData() == m_SortedOffset.GetSize() - 1) 1303 return 0; 1304 1305 return pResult[1] - offset; 1306 } 1307 return 0; 1308 } 1309 1310 void CPDF_Parser::GetIndirectBinary(FX_DWORD objnum, 1311 uint8_t*& pBuffer, 1312 FX_DWORD& size) { 1313 pBuffer = NULL; 1314 size = 0; 1315 if (!IsValidObjectNumber(objnum)) 1316 return; 1317 1318 if (m_V5Type[objnum] == 2) { 1319 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); 1320 if (!pObjStream) 1321 return; 1322 1323 int32_t offset = GetStreamFirst(pObjStream); 1324 const uint8_t* pData = pObjStream->GetData(); 1325 FX_DWORD totalsize = pObjStream->GetSize(); 1326 ScopedFileStream file( 1327 FX_CreateMemoryStream((uint8_t*)pData, (size_t)totalsize, FALSE)); 1328 CPDF_SyntaxParser syntax; 1329 syntax.InitParser(file.get(), 0); 1330 for (int i = GetStreamNCount(pObjStream); i > 0; --i) { 1331 FX_DWORD thisnum = syntax.GetDirectNum(); 1332 FX_DWORD thisoff = syntax.GetDirectNum(); 1333 if (thisnum != objnum) 1334 continue; 1335 1336 if (i == 1) { 1337 size = totalsize - (thisoff + offset); 1338 } else { 1339 syntax.GetDirectNum(); // Skip nextnum. 1340 FX_DWORD nextoff = syntax.GetDirectNum(); 1341 size = nextoff - thisoff; 1342 } 1343 pBuffer = FX_Alloc(uint8_t, size); 1344 FXSYS_memcpy(pBuffer, pData + thisoff + offset, size); 1345 return; 1346 } 1347 return; 1348 } 1349 1350 if (m_V5Type[objnum] != 1) 1351 return; 1352 1353 FX_FILESIZE pos = m_ObjectInfo[objnum].pos; 1354 if (pos == 0) { 1355 return; 1356 } 1357 FX_FILESIZE SavedPos = m_Syntax.SavePos(); 1358 m_Syntax.RestorePos(pos); 1359 bool bIsNumber; 1360 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber); 1361 if (!bIsNumber) { 1362 m_Syntax.RestorePos(SavedPos); 1363 return; 1364 } 1365 FX_DWORD parser_objnum = FXSYS_atoi(word); 1366 if (parser_objnum && parser_objnum != objnum) { 1367 m_Syntax.RestorePos(SavedPos); 1368 return; 1369 } 1370 word = m_Syntax.GetNextWord(&bIsNumber); 1371 if (!bIsNumber) { 1372 m_Syntax.RestorePos(SavedPos); 1373 return; 1374 } 1375 if (m_Syntax.GetKeyword() != "obj") { 1376 m_Syntax.RestorePos(SavedPos); 1377 return; 1378 } 1379 void* pResult = 1380 FXSYS_bsearch(&pos, m_SortedOffset.GetData(), m_SortedOffset.GetSize(), 1381 sizeof(FX_FILESIZE), CompareFileSize); 1382 if (!pResult) { 1383 m_Syntax.RestorePos(SavedPos); 1384 return; 1385 } 1386 FX_FILESIZE nextoff = ((FX_FILESIZE*)pResult)[1]; 1387 FX_BOOL bNextOffValid = FALSE; 1388 if (nextoff != pos) { 1389 m_Syntax.RestorePos(nextoff); 1390 word = m_Syntax.GetNextWord(&bIsNumber); 1391 if (word == "xref") { 1392 bNextOffValid = TRUE; 1393 } else if (bIsNumber) { 1394 word = m_Syntax.GetNextWord(&bIsNumber); 1395 if (bIsNumber && m_Syntax.GetKeyword() == "obj") { 1396 bNextOffValid = TRUE; 1397 } 1398 } 1399 } 1400 if (!bNextOffValid) { 1401 m_Syntax.RestorePos(pos); 1402 while (1) { 1403 if (m_Syntax.GetKeyword() == "endobj") { 1404 break; 1405 } 1406 if (m_Syntax.SavePos() == m_Syntax.m_FileLen) { 1407 break; 1408 } 1409 } 1410 nextoff = m_Syntax.SavePos(); 1411 } 1412 size = (FX_DWORD)(nextoff - pos); 1413 pBuffer = FX_Alloc(uint8_t, size); 1414 m_Syntax.RestorePos(pos); 1415 m_Syntax.ReadBlock(pBuffer, size); 1416 m_Syntax.RestorePos(SavedPos); 1417 } 1418 1419 CPDF_Object* CPDF_Parser::ParseIndirectObjectAt( 1420 CPDF_IndirectObjectHolder* pObjList, 1421 FX_FILESIZE pos, 1422 FX_DWORD objnum, 1423 PARSE_CONTEXT* pContext) { 1424 FX_FILESIZE SavedPos = m_Syntax.SavePos(); 1425 m_Syntax.RestorePos(pos); 1426 bool bIsNumber; 1427 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber); 1428 if (!bIsNumber) { 1429 m_Syntax.RestorePos(SavedPos); 1430 return NULL; 1431 } 1432 FX_FILESIZE objOffset = m_Syntax.SavePos(); 1433 objOffset -= word.GetLength(); 1434 FX_DWORD parser_objnum = FXSYS_atoi(word); 1435 if (objnum && parser_objnum != objnum) { 1436 m_Syntax.RestorePos(SavedPos); 1437 return NULL; 1438 } 1439 word = m_Syntax.GetNextWord(&bIsNumber); 1440 if (!bIsNumber) { 1441 m_Syntax.RestorePos(SavedPos); 1442 return NULL; 1443 } 1444 FX_DWORD parser_gennum = FXSYS_atoi(word); 1445 if (m_Syntax.GetKeyword() != "obj") { 1446 m_Syntax.RestorePos(SavedPos); 1447 return NULL; 1448 } 1449 CPDF_Object* pObj = 1450 m_Syntax.GetObject(pObjList, objnum, parser_gennum, pContext, true); 1451 m_Syntax.SavePos(); 1452 CFX_ByteString bsWord = m_Syntax.GetKeyword(); 1453 if (bsWord == "endobj") { 1454 m_Syntax.SavePos(); 1455 } 1456 m_Syntax.RestorePos(SavedPos); 1457 if (pObj) { 1458 if (!objnum) 1459 pObj->m_ObjNum = parser_objnum; 1460 pObj->m_GenNum = parser_gennum; 1461 } 1462 return pObj; 1463 } 1464 CPDF_Object* CPDF_Parser::ParseIndirectObjectAtByStrict( 1465 CPDF_IndirectObjectHolder* pObjList, 1466 FX_FILESIZE pos, 1467 FX_DWORD objnum, 1468 PARSE_CONTEXT* pContext, 1469 FX_FILESIZE* pResultPos) { 1470 FX_FILESIZE SavedPos = m_Syntax.SavePos(); 1471 m_Syntax.RestorePos(pos); 1472 bool bIsNumber; 1473 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber); 1474 if (!bIsNumber) { 1475 m_Syntax.RestorePos(SavedPos); 1476 return NULL; 1477 } 1478 FX_DWORD parser_objnum = FXSYS_atoi(word); 1479 if (objnum && parser_objnum != objnum) { 1480 m_Syntax.RestorePos(SavedPos); 1481 return NULL; 1482 } 1483 word = m_Syntax.GetNextWord(&bIsNumber); 1484 if (!bIsNumber) { 1485 m_Syntax.RestorePos(SavedPos); 1486 return NULL; 1487 } 1488 FX_DWORD gennum = FXSYS_atoi(word); 1489 if (m_Syntax.GetKeyword() != "obj") { 1490 m_Syntax.RestorePos(SavedPos); 1491 return NULL; 1492 } 1493 CPDF_Object* pObj = 1494 m_Syntax.GetObjectByStrict(pObjList, objnum, gennum, pContext); 1495 if (pResultPos) { 1496 *pResultPos = m_Syntax.m_Pos; 1497 } 1498 m_Syntax.RestorePos(SavedPos); 1499 return pObj; 1500 } 1501 1502 CPDF_Dictionary* CPDF_Parser::LoadTrailerV4() { 1503 if (m_Syntax.GetKeyword() != "trailer") 1504 return nullptr; 1505 1506 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj( 1507 m_Syntax.GetObject(m_pDocument, 0, 0, nullptr, true)); 1508 if (!ToDictionary(pObj.get())) 1509 return nullptr; 1510 return pObj.release()->AsDictionary(); 1511 } 1512 1513 FX_DWORD CPDF_Parser::GetPermissions(FX_BOOL bCheckRevision) { 1514 if (!m_pSecurityHandler) { 1515 return (FX_DWORD)-1; 1516 } 1517 FX_DWORD dwPermission = m_pSecurityHandler->GetPermissions(); 1518 if (m_pEncryptDict && m_pEncryptDict->GetString("Filter") == "Standard") { 1519 dwPermission &= 0xFFFFFFFC; 1520 dwPermission |= 0xFFFFF0C0; 1521 if (bCheckRevision && m_pEncryptDict->GetInteger("R") == 2) { 1522 dwPermission &= 0xFFFFF0FF; 1523 } 1524 } 1525 return dwPermission; 1526 } 1527 FX_BOOL CPDF_Parser::IsOwner() { 1528 return !m_pSecurityHandler || m_pSecurityHandler->IsOwner(); 1529 } 1530 void CPDF_Parser::SetSecurityHandler(CPDF_SecurityHandler* pSecurityHandler, 1531 FX_BOOL bForced) { 1532 m_bForceUseSecurityHandler = bForced; 1533 m_pSecurityHandler.reset(pSecurityHandler); 1534 if (m_bForceUseSecurityHandler) { 1535 return; 1536 } 1537 m_Syntax.m_pCryptoHandler.reset(pSecurityHandler->CreateCryptoHandler()); 1538 m_Syntax.m_pCryptoHandler->Init(NULL, pSecurityHandler); 1539 } 1540 FX_BOOL CPDF_Parser::IsLinearizedFile(IFX_FileRead* pFileAccess, 1541 FX_DWORD offset) { 1542 m_Syntax.InitParser(pFileAccess, offset); 1543 m_Syntax.RestorePos(m_Syntax.m_HeaderOffset + 9); 1544 FX_FILESIZE SavedPos = m_Syntax.SavePos(); 1545 bool bIsNumber; 1546 CFX_ByteString word = m_Syntax.GetNextWord(&bIsNumber); 1547 if (!bIsNumber) { 1548 return FALSE; 1549 } 1550 FX_DWORD objnum = FXSYS_atoi(word); 1551 word = m_Syntax.GetNextWord(&bIsNumber); 1552 if (!bIsNumber) { 1553 return FALSE; 1554 } 1555 FX_DWORD gennum = FXSYS_atoi(word); 1556 if (m_Syntax.GetKeyword() != "obj") { 1557 m_Syntax.RestorePos(SavedPos); 1558 return FALSE; 1559 } 1560 m_pLinearized = m_Syntax.GetObject(nullptr, objnum, gennum, nullptr, true); 1561 if (!m_pLinearized) { 1562 return FALSE; 1563 } 1564 1565 CPDF_Dictionary* pDict = m_pLinearized->GetDict(); 1566 if (pDict && pDict->GetElement("Linearized")) { 1567 m_Syntax.GetNextWord(nullptr); 1568 1569 CPDF_Object* pLen = pDict->GetElement("L"); 1570 if (!pLen) { 1571 m_pLinearized->Release(); 1572 m_pLinearized = NULL; 1573 return FALSE; 1574 } 1575 if (pLen->GetInteger() != (int)pFileAccess->GetSize()) { 1576 return FALSE; 1577 } 1578 1579 if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P"))) 1580 m_dwFirstPageNo = pNo->GetInteger(); 1581 1582 if (CPDF_Number* pTable = ToNumber(pDict->GetElement("T"))) 1583 m_LastXRefOffset = pTable->GetInteger(); 1584 1585 return TRUE; 1586 } 1587 m_pLinearized->Release(); 1588 m_pLinearized = NULL; 1589 return FALSE; 1590 } 1591 FX_DWORD CPDF_Parser::StartAsynParse(IFX_FileRead* pFileAccess, 1592 FX_BOOL bReParse, 1593 FX_BOOL bOwnFileRead) { 1594 CloseParser(bReParse); 1595 m_bXRefStream = FALSE; 1596 m_LastXRefOffset = 0; 1597 m_bOwnFileRead = bOwnFileRead; 1598 int32_t offset = GetHeaderOffset(pFileAccess); 1599 if (offset == -1) { 1600 return PDFPARSE_ERROR_FORMAT; 1601 } 1602 if (!IsLinearizedFile(pFileAccess, offset)) { 1603 m_Syntax.m_pFileAccess = NULL; 1604 return StartParse(pFileAccess, bReParse, bOwnFileRead); 1605 } 1606 if (!bReParse) { 1607 m_pDocument = new CPDF_Document(this); 1608 } 1609 FX_FILESIZE dwFirstXRefOffset = m_Syntax.SavePos(); 1610 FX_BOOL bXRefRebuilt = FALSE; 1611 FX_BOOL bLoadV4 = FALSE; 1612 if (!(bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, FALSE)) && 1613 !LoadCrossRefV5(&dwFirstXRefOffset, TRUE)) { 1614 if (!RebuildCrossRef()) { 1615 return PDFPARSE_ERROR_FORMAT; 1616 } 1617 bXRefRebuilt = TRUE; 1618 m_LastXRefOffset = 0; 1619 } 1620 if (bLoadV4) { 1621 m_pTrailer = LoadTrailerV4(); 1622 if (!m_pTrailer) { 1623 return PDFPARSE_ERROR_SUCCESS; 1624 } 1625 1626 int32_t xrefsize = GetDirectInteger(m_pTrailer, "Size"); 1627 if (xrefsize > 0) { 1628 ShrinkObjectMap(xrefsize); 1629 m_V5Type.SetSize(xrefsize); 1630 } 1631 } 1632 FX_DWORD dwRet = SetEncryptHandler(); 1633 if (dwRet != PDFPARSE_ERROR_SUCCESS) { 1634 return dwRet; 1635 } 1636 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict()); 1637 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { 1638 if (bXRefRebuilt) { 1639 return PDFPARSE_ERROR_FORMAT; 1640 } 1641 ReleaseEncryptHandler(); 1642 if (!RebuildCrossRef()) { 1643 return PDFPARSE_ERROR_FORMAT; 1644 } 1645 dwRet = SetEncryptHandler(); 1646 if (dwRet != PDFPARSE_ERROR_SUCCESS) { 1647 return dwRet; 1648 } 1649 m_pDocument->LoadAsynDoc(m_pLinearized->GetDict()); 1650 if (!m_pDocument->GetRoot()) { 1651 return PDFPARSE_ERROR_FORMAT; 1652 } 1653 } 1654 FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(), 1655 sizeof(FX_FILESIZE), CompareFileSize); 1656 if (GetRootObjNum() == 0) { 1657 ReleaseEncryptHandler(); 1658 if (!RebuildCrossRef() || GetRootObjNum() == 0) 1659 return PDFPARSE_ERROR_FORMAT; 1660 1661 dwRet = SetEncryptHandler(); 1662 if (dwRet != PDFPARSE_ERROR_SUCCESS) { 1663 return dwRet; 1664 } 1665 } 1666 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) { 1667 if (CPDF_Reference* pMetadata = 1668 ToReference(m_pDocument->GetRoot()->GetElement("Metadata"))) 1669 m_Syntax.m_MetadataObjnum = pMetadata->GetRefObjNum(); 1670 } 1671 return PDFPARSE_ERROR_SUCCESS; 1672 } 1673 FX_BOOL CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) { 1674 if (!LoadCrossRefV5(&xrefpos, FALSE)) { 1675 return FALSE; 1676 } 1677 std::set<FX_FILESIZE> seen_xrefpos; 1678 while (xrefpos) { 1679 seen_xrefpos.insert(xrefpos); 1680 if (!LoadCrossRefV5(&xrefpos, FALSE)) { 1681 return FALSE; 1682 } 1683 // Check for circular references. 1684 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) { 1685 return FALSE; 1686 } 1687 } 1688 m_ObjectStreamMap.InitHashTable(101, FALSE); 1689 m_bXRefStream = TRUE; 1690 return TRUE; 1691 } 1692 FX_DWORD CPDF_Parser::LoadLinearizedMainXRefTable() { 1693 FX_DWORD dwSaveMetadataObjnum = m_Syntax.m_MetadataObjnum; 1694 m_Syntax.m_MetadataObjnum = 0; 1695 if (m_pTrailer) { 1696 m_pTrailer->Release(); 1697 m_pTrailer = NULL; 1698 } 1699 m_Syntax.RestorePos(m_LastXRefOffset - m_Syntax.m_HeaderOffset); 1700 uint8_t ch = 0; 1701 FX_DWORD dwCount = 0; 1702 m_Syntax.GetNextChar(ch); 1703 while (PDFCharIsWhitespace(ch)) { 1704 ++dwCount; 1705 if (m_Syntax.m_FileLen >= 1706 (FX_FILESIZE)(m_Syntax.SavePos() + m_Syntax.m_HeaderOffset)) { 1707 break; 1708 } 1709 m_Syntax.GetNextChar(ch); 1710 } 1711 m_LastXRefOffset += dwCount; 1712 FX_POSITION pos = m_ObjectStreamMap.GetStartPosition(); 1713 while (pos) { 1714 void* objnum; 1715 CPDF_StreamAcc* pStream; 1716 m_ObjectStreamMap.GetNextAssoc(pos, objnum, (void*&)pStream); 1717 delete pStream; 1718 } 1719 m_ObjectStreamMap.RemoveAll(); 1720 m_ObjCache.clear(); 1721 1722 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) && 1723 !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) { 1724 m_LastXRefOffset = 0; 1725 m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum; 1726 return PDFPARSE_ERROR_FORMAT; 1727 } 1728 FXSYS_qsort(m_SortedOffset.GetData(), m_SortedOffset.GetSize(), 1729 sizeof(FX_FILESIZE), CompareFileSize); 1730 m_Syntax.m_MetadataObjnum = dwSaveMetadataObjnum; 1731 return PDFPARSE_ERROR_SUCCESS; 1732 } 1733 1734 // static 1735 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0; 1736 1737 CPDF_SyntaxParser::CPDF_SyntaxParser() { 1738 m_pFileAccess = NULL; 1739 m_pFileBuf = NULL; 1740 m_BufSize = CPDF_ModuleMgr::kFileBufSize; 1741 m_pFileBuf = NULL; 1742 m_MetadataObjnum = 0; 1743 m_dwWordPos = 0; 1744 m_bFileStream = FALSE; 1745 } 1746 1747 CPDF_SyntaxParser::~CPDF_SyntaxParser() { 1748 FX_Free(m_pFileBuf); 1749 } 1750 1751 FX_BOOL CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) { 1752 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); 1753 m_Pos = pos; 1754 return GetNextChar(ch); 1755 } 1756 1757 FX_BOOL CPDF_SyntaxParser::GetNextChar(uint8_t& ch) { 1758 FX_FILESIZE pos = m_Pos + m_HeaderOffset; 1759 if (pos >= m_FileLen) { 1760 return FALSE; 1761 } 1762 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { 1763 FX_FILESIZE read_pos = pos; 1764 FX_DWORD read_size = m_BufSize; 1765 if ((FX_FILESIZE)read_size > m_FileLen) { 1766 read_size = (FX_DWORD)m_FileLen; 1767 } 1768 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { 1769 if (m_FileLen < (FX_FILESIZE)read_size) { 1770 read_pos = 0; 1771 read_size = (FX_DWORD)m_FileLen; 1772 } else { 1773 read_pos = m_FileLen - read_size; 1774 } 1775 } 1776 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) { 1777 return FALSE; 1778 } 1779 m_BufOffset = read_pos; 1780 } 1781 ch = m_pFileBuf[pos - m_BufOffset]; 1782 m_Pos++; 1783 return TRUE; 1784 } 1785 FX_BOOL CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) { 1786 pos += m_HeaderOffset; 1787 if (pos >= m_FileLen) { 1788 return FALSE; 1789 } 1790 if (m_BufOffset >= pos || (FX_FILESIZE)(m_BufOffset + m_BufSize) <= pos) { 1791 FX_FILESIZE read_pos; 1792 if (pos < (FX_FILESIZE)m_BufSize) { 1793 read_pos = 0; 1794 } else { 1795 read_pos = pos - m_BufSize + 1; 1796 } 1797 FX_DWORD read_size = m_BufSize; 1798 if ((FX_FILESIZE)(read_pos + read_size) > m_FileLen) { 1799 if (m_FileLen < (FX_FILESIZE)read_size) { 1800 read_pos = 0; 1801 read_size = (FX_DWORD)m_FileLen; 1802 } else { 1803 read_pos = m_FileLen - read_size; 1804 } 1805 } 1806 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) { 1807 return FALSE; 1808 } 1809 m_BufOffset = read_pos; 1810 } 1811 ch = m_pFileBuf[pos - m_BufOffset]; 1812 return TRUE; 1813 } 1814 FX_BOOL CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, FX_DWORD size) { 1815 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) { 1816 return FALSE; 1817 } 1818 m_Pos += size; 1819 return TRUE; 1820 } 1821 1822 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) { 1823 m_WordSize = 0; 1824 if (bIsNumber) 1825 *bIsNumber = true; 1826 uint8_t ch; 1827 if (!GetNextChar(ch)) { 1828 return; 1829 } 1830 while (1) { 1831 while (PDFCharIsWhitespace(ch)) { 1832 if (!GetNextChar(ch)) 1833 return; 1834 } 1835 if (ch != '%') 1836 break; 1837 1838 while (1) { 1839 if (!GetNextChar(ch)) 1840 return; 1841 if (PDFCharIsLineEnding(ch)) 1842 break; 1843 } 1844 } 1845 1846 if (PDFCharIsDelimiter(ch)) { 1847 if (bIsNumber) 1848 *bIsNumber = false; 1849 m_WordBuffer[m_WordSize++] = ch; 1850 if (ch == '/') { 1851 while (1) { 1852 if (!GetNextChar(ch)) 1853 return; 1854 1855 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { 1856 m_Pos--; 1857 return; 1858 } 1859 1860 if (m_WordSize < sizeof(m_WordBuffer) - 1) 1861 m_WordBuffer[m_WordSize++] = ch; 1862 } 1863 } else if (ch == '<') { 1864 if (!GetNextChar(ch)) 1865 return; 1866 if (ch == '<') 1867 m_WordBuffer[m_WordSize++] = ch; 1868 else 1869 m_Pos--; 1870 } else if (ch == '>') { 1871 if (!GetNextChar(ch)) 1872 return; 1873 if (ch == '>') 1874 m_WordBuffer[m_WordSize++] = ch; 1875 else 1876 m_Pos--; 1877 } 1878 return; 1879 } 1880 1881 while (1) { 1882 if (m_WordSize < sizeof(m_WordBuffer) - 1) 1883 m_WordBuffer[m_WordSize++] = ch; 1884 1885 if (!PDFCharIsNumeric(ch)) 1886 if (bIsNumber) 1887 *bIsNumber = false; 1888 if (!GetNextChar(ch)) 1889 return; 1890 1891 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { 1892 m_Pos--; 1893 break; 1894 } 1895 } 1896 } 1897 1898 CFX_ByteString CPDF_SyntaxParser::ReadString() { 1899 uint8_t ch; 1900 if (!GetNextChar(ch)) { 1901 return CFX_ByteString(); 1902 } 1903 CFX_ByteTextBuf buf; 1904 int32_t parlevel = 0; 1905 int32_t status = 0, iEscCode = 0; 1906 while (1) { 1907 switch (status) { 1908 case 0: 1909 if (ch == ')') { 1910 if (parlevel == 0) { 1911 return buf.GetByteString(); 1912 } 1913 parlevel--; 1914 buf.AppendChar(')'); 1915 } else if (ch == '(') { 1916 parlevel++; 1917 buf.AppendChar('('); 1918 } else if (ch == '\\') { 1919 status = 1; 1920 } else { 1921 buf.AppendChar(ch); 1922 } 1923 break; 1924 case 1: 1925 if (ch >= '0' && ch <= '7') { 1926 iEscCode = FXSYS_toDecimalDigit(ch); 1927 status = 2; 1928 break; 1929 } 1930 if (ch == 'n') { 1931 buf.AppendChar('\n'); 1932 } else if (ch == 'r') { 1933 buf.AppendChar('\r'); 1934 } else if (ch == 't') { 1935 buf.AppendChar('\t'); 1936 } else if (ch == 'b') { 1937 buf.AppendChar('\b'); 1938 } else if (ch == 'f') { 1939 buf.AppendChar('\f'); 1940 } else if (ch == '\r') { 1941 status = 4; 1942 break; 1943 } else if (ch == '\n') { 1944 } else { 1945 buf.AppendChar(ch); 1946 } 1947 status = 0; 1948 break; 1949 case 2: 1950 if (ch >= '0' && ch <= '7') { 1951 iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch); 1952 status = 3; 1953 } else { 1954 buf.AppendChar(iEscCode); 1955 status = 0; 1956 continue; 1957 } 1958 break; 1959 case 3: 1960 if (ch >= '0' && ch <= '7') { 1961 iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch); 1962 buf.AppendChar(iEscCode); 1963 status = 0; 1964 } else { 1965 buf.AppendChar(iEscCode); 1966 status = 0; 1967 continue; 1968 } 1969 break; 1970 case 4: 1971 status = 0; 1972 if (ch != '\n') { 1973 continue; 1974 } 1975 break; 1976 } 1977 if (!GetNextChar(ch)) { 1978 break; 1979 } 1980 } 1981 GetNextChar(ch); 1982 return buf.GetByteString(); 1983 } 1984 CFX_ByteString CPDF_SyntaxParser::ReadHexString() { 1985 uint8_t ch; 1986 if (!GetNextChar(ch)) 1987 return CFX_ByteString(); 1988 1989 CFX_BinaryBuf buf; 1990 bool bFirst = true; 1991 uint8_t code = 0; 1992 while (1) { 1993 if (ch == '>') 1994 break; 1995 1996 if (std::isxdigit(ch)) { 1997 int val = FXSYS_toHexDigit(ch); 1998 if (bFirst) { 1999 code = val * 16; 2000 } else { 2001 code += val; 2002 buf.AppendByte((uint8_t)code); 2003 } 2004 bFirst = !bFirst; 2005 } 2006 2007 if (!GetNextChar(ch)) 2008 break; 2009 } 2010 if (!bFirst) 2011 buf.AppendByte((uint8_t)code); 2012 2013 return buf.GetByteString(); 2014 } 2015 void CPDF_SyntaxParser::ToNextLine() { 2016 uint8_t ch; 2017 while (GetNextChar(ch)) { 2018 if (ch == '\n') { 2019 break; 2020 } 2021 if (ch == '\r') { 2022 GetNextChar(ch); 2023 if (ch != '\n') { 2024 --m_Pos; 2025 } 2026 break; 2027 } 2028 } 2029 } 2030 void CPDF_SyntaxParser::ToNextWord() { 2031 uint8_t ch; 2032 if (!GetNextChar(ch)) 2033 return; 2034 2035 while (1) { 2036 while (PDFCharIsWhitespace(ch)) { 2037 m_dwWordPos = m_Pos; 2038 if (!GetNextChar(ch)) 2039 return; 2040 } 2041 2042 if (ch != '%') 2043 break; 2044 2045 while (1) { 2046 if (!GetNextChar(ch)) 2047 return; 2048 if (PDFCharIsLineEnding(ch)) 2049 break; 2050 } 2051 } 2052 m_Pos--; 2053 } 2054 2055 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) { 2056 GetNextWordInternal(bIsNumber); 2057 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize); 2058 } 2059 2060 CFX_ByteString CPDF_SyntaxParser::GetKeyword() { 2061 return GetNextWord(nullptr); 2062 } 2063 2064 CPDF_Object* CPDF_SyntaxParser::GetObject(CPDF_IndirectObjectHolder* pObjList, 2065 FX_DWORD objnum, 2066 FX_DWORD gennum, 2067 PARSE_CONTEXT* pContext, 2068 FX_BOOL bDecrypt) { 2069 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); 2070 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) { 2071 return NULL; 2072 } 2073 FX_FILESIZE SavedPos = m_Pos; 2074 FX_BOOL bTypeOnly = pContext && (pContext->m_Flags & PDFPARSE_TYPEONLY); 2075 bool bIsNumber; 2076 CFX_ByteString word = GetNextWord(&bIsNumber); 2077 if (word.GetLength() == 0) { 2078 if (bTypeOnly) 2079 return (CPDF_Object*)PDFOBJ_INVALID; 2080 return NULL; 2081 } 2082 if (bIsNumber) { 2083 FX_FILESIZE SavedPos = m_Pos; 2084 CFX_ByteString nextword = GetNextWord(&bIsNumber); 2085 if (bIsNumber) { 2086 CFX_ByteString nextword2 = GetNextWord(nullptr); 2087 if (nextword2 == "R") { 2088 FX_DWORD objnum = FXSYS_atoi(word); 2089 if (bTypeOnly) 2090 return (CPDF_Object*)PDFOBJ_REFERENCE; 2091 return new CPDF_Reference(pObjList, objnum); 2092 } 2093 } 2094 m_Pos = SavedPos; 2095 if (bTypeOnly) 2096 return (CPDF_Object*)PDFOBJ_NUMBER; 2097 return new CPDF_Number(word); 2098 } 2099 if (word == "true" || word == "false") { 2100 if (bTypeOnly) 2101 return (CPDF_Object*)PDFOBJ_BOOLEAN; 2102 return new CPDF_Boolean(word == "true"); 2103 } 2104 if (word == "null") { 2105 if (bTypeOnly) 2106 return (CPDF_Object*)PDFOBJ_NULL; 2107 return new CPDF_Null; 2108 } 2109 if (word == "(") { 2110 if (bTypeOnly) 2111 return (CPDF_Object*)PDFOBJ_STRING; 2112 CFX_ByteString str = ReadString(); 2113 if (m_pCryptoHandler && bDecrypt) { 2114 m_pCryptoHandler->Decrypt(objnum, gennum, str); 2115 } 2116 return new CPDF_String(str, FALSE); 2117 } 2118 if (word == "<") { 2119 if (bTypeOnly) 2120 return (CPDF_Object*)PDFOBJ_STRING; 2121 CFX_ByteString str = ReadHexString(); 2122 if (m_pCryptoHandler && bDecrypt) { 2123 m_pCryptoHandler->Decrypt(objnum, gennum, str); 2124 } 2125 return new CPDF_String(str, TRUE); 2126 } 2127 if (word == "[") { 2128 if (bTypeOnly) 2129 return (CPDF_Object*)PDFOBJ_ARRAY; 2130 CPDF_Array* pArray = new CPDF_Array; 2131 while (CPDF_Object* pObj = 2132 GetObject(pObjList, objnum, gennum, nullptr, true)) { 2133 pArray->Add(pObj); 2134 } 2135 return pArray; 2136 } 2137 if (word[0] == '/') { 2138 if (bTypeOnly) 2139 return (CPDF_Object*)PDFOBJ_NAME; 2140 return new CPDF_Name( 2141 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); 2142 } 2143 if (word == "<<") { 2144 if (bTypeOnly) 2145 return (CPDF_Object*)PDFOBJ_DICTIONARY; 2146 2147 if (pContext) 2148 pContext->m_DictStart = SavedPos; 2149 2150 int32_t nKeys = 0; 2151 FX_FILESIZE dwSignValuePos = 0; 2152 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( 2153 new CPDF_Dictionary); 2154 while (1) { 2155 CFX_ByteString key = GetNextWord(nullptr); 2156 if (key.IsEmpty()) 2157 return nullptr; 2158 2159 FX_FILESIZE SavedPos = m_Pos - key.GetLength(); 2160 if (key == ">>") 2161 break; 2162 2163 if (key == "endobj") { 2164 m_Pos = SavedPos; 2165 break; 2166 } 2167 if (key[0] != '/') 2168 continue; 2169 2170 ++nKeys; 2171 key = PDF_NameDecode(key); 2172 if (key.IsEmpty()) 2173 continue; 2174 2175 if (key == "/Contents") 2176 dwSignValuePos = m_Pos; 2177 2178 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, nullptr, true); 2179 if (!pObj) 2180 continue; 2181 2182 CFX_ByteStringC keyNoSlash(key.c_str() + 1, key.GetLength() - 1); 2183 pDict->SetAt(keyNoSlash, pObj); 2184 } 2185 2186 // Only when this is a signature dictionary and has contents, we reset the 2187 // contents to the un-decrypted form. 2188 if (IsSignatureDict(pDict.get()) && dwSignValuePos) { 2189 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); 2190 m_Pos = dwSignValuePos; 2191 CPDF_Object* pObj = GetObject(pObjList, objnum, gennum, nullptr, FALSE); 2192 pDict->SetAt("Contents", pObj); 2193 } 2194 if (pContext) { 2195 pContext->m_DictEnd = m_Pos; 2196 if (pContext->m_Flags & PDFPARSE_NOSTREAM) { 2197 return pDict.release(); 2198 } 2199 } 2200 FX_FILESIZE SavedPos = m_Pos; 2201 CFX_ByteString nextword = GetNextWord(nullptr); 2202 if (nextword != "stream") { 2203 m_Pos = SavedPos; 2204 return pDict.release(); 2205 } 2206 2207 return ReadStream(pDict.release(), pContext, objnum, gennum); 2208 } 2209 if (word == ">>") { 2210 m_Pos = SavedPos; 2211 return nullptr; 2212 } 2213 if (bTypeOnly) 2214 return (CPDF_Object*)PDFOBJ_INVALID; 2215 2216 return nullptr; 2217 } 2218 2219 CPDF_Object* CPDF_SyntaxParser::GetObjectByStrict( 2220 CPDF_IndirectObjectHolder* pObjList, 2221 FX_DWORD objnum, 2222 FX_DWORD gennum, 2223 PARSE_CONTEXT* pContext) { 2224 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); 2225 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) { 2226 return NULL; 2227 } 2228 FX_FILESIZE SavedPos = m_Pos; 2229 FX_BOOL bTypeOnly = pContext && (pContext->m_Flags & PDFPARSE_TYPEONLY); 2230 bool bIsNumber; 2231 CFX_ByteString word = GetNextWord(&bIsNumber); 2232 if (word.GetLength() == 0) { 2233 if (bTypeOnly) 2234 return (CPDF_Object*)PDFOBJ_INVALID; 2235 return nullptr; 2236 } 2237 if (bIsNumber) { 2238 FX_FILESIZE SavedPos = m_Pos; 2239 CFX_ByteString nextword = GetNextWord(&bIsNumber); 2240 if (bIsNumber) { 2241 CFX_ByteString nextword2 = GetNextWord(nullptr); 2242 if (nextword2 == "R") { 2243 if (bTypeOnly) 2244 return (CPDF_Object*)PDFOBJ_REFERENCE; 2245 FX_DWORD objnum = FXSYS_atoi(word); 2246 return new CPDF_Reference(pObjList, objnum); 2247 } 2248 } 2249 m_Pos = SavedPos; 2250 if (bTypeOnly) 2251 return (CPDF_Object*)PDFOBJ_NUMBER; 2252 return new CPDF_Number(word); 2253 } 2254 if (word == "true" || word == "false") { 2255 if (bTypeOnly) 2256 return (CPDF_Object*)PDFOBJ_BOOLEAN; 2257 return new CPDF_Boolean(word == "true"); 2258 } 2259 if (word == "null") { 2260 if (bTypeOnly) 2261 return (CPDF_Object*)PDFOBJ_NULL; 2262 return new CPDF_Null; 2263 } 2264 if (word == "(") { 2265 if (bTypeOnly) 2266 return (CPDF_Object*)PDFOBJ_STRING; 2267 CFX_ByteString str = ReadString(); 2268 if (m_pCryptoHandler) 2269 m_pCryptoHandler->Decrypt(objnum, gennum, str); 2270 return new CPDF_String(str, FALSE); 2271 } 2272 if (word == "<") { 2273 if (bTypeOnly) 2274 return (CPDF_Object*)PDFOBJ_STRING; 2275 CFX_ByteString str = ReadHexString(); 2276 if (m_pCryptoHandler) 2277 m_pCryptoHandler->Decrypt(objnum, gennum, str); 2278 return new CPDF_String(str, TRUE); 2279 } 2280 if (word == "[") { 2281 if (bTypeOnly) 2282 return (CPDF_Object*)PDFOBJ_ARRAY; 2283 std::unique_ptr<CPDF_Array, ReleaseDeleter<CPDF_Array>> pArray( 2284 new CPDF_Array); 2285 while (CPDF_Object* pObj = 2286 GetObject(pObjList, objnum, gennum, nullptr, true)) { 2287 pArray->Add(pObj); 2288 } 2289 return m_WordBuffer[0] == ']' ? pArray.release() : nullptr; 2290 } 2291 if (word[0] == '/') { 2292 if (bTypeOnly) 2293 return (CPDF_Object*)PDFOBJ_NAME; 2294 return new CPDF_Name( 2295 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); 2296 } 2297 if (word == "<<") { 2298 if (bTypeOnly) 2299 return (CPDF_Object*)PDFOBJ_DICTIONARY; 2300 if (pContext) 2301 pContext->m_DictStart = SavedPos; 2302 2303 std::unique_ptr<CPDF_Dictionary, ReleaseDeleter<CPDF_Dictionary>> pDict( 2304 new CPDF_Dictionary); 2305 while (1) { 2306 FX_FILESIZE SavedPos = m_Pos; 2307 CFX_ByteString key = GetNextWord(nullptr); 2308 if (key.IsEmpty()) 2309 return nullptr; 2310 2311 if (key == ">>") 2312 break; 2313 2314 if (key == "endobj") { 2315 m_Pos = SavedPos; 2316 break; 2317 } 2318 if (key[0] != '/') 2319 continue; 2320 2321 key = PDF_NameDecode(key); 2322 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> obj( 2323 GetObject(pObjList, objnum, gennum, nullptr, true)); 2324 if (!obj) { 2325 uint8_t ch; 2326 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) { 2327 } 2328 return nullptr; 2329 } 2330 if (key.GetLength() > 1) { 2331 pDict->SetAt(CFX_ByteStringC(key.c_str() + 1, key.GetLength() - 1), 2332 obj.release()); 2333 } 2334 } 2335 if (pContext) { 2336 pContext->m_DictEnd = m_Pos; 2337 if (pContext->m_Flags & PDFPARSE_NOSTREAM) { 2338 return pDict.release(); 2339 } 2340 } 2341 FX_FILESIZE SavedPos = m_Pos; 2342 CFX_ByteString nextword = GetNextWord(nullptr); 2343 if (nextword != "stream") { 2344 m_Pos = SavedPos; 2345 return pDict.release(); 2346 } 2347 2348 return ReadStream(pDict.release(), pContext, objnum, gennum); 2349 } 2350 if (word == ">>") { 2351 m_Pos = SavedPos; 2352 return nullptr; 2353 } 2354 if (bTypeOnly) 2355 return (CPDF_Object*)PDFOBJ_INVALID; 2356 return nullptr; 2357 } 2358 2359 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) { 2360 unsigned char byte1 = 0; 2361 unsigned char byte2 = 0; 2362 GetCharAt(pos, byte1); 2363 GetCharAt(pos + 1, byte2); 2364 unsigned int markers = 0; 2365 if (byte1 == '\r' && byte2 == '\n') { 2366 markers = 2; 2367 } else if (byte1 == '\r' || byte1 == '\n') { 2368 markers = 1; 2369 } 2370 return markers; 2371 } 2372 CPDF_Stream* CPDF_SyntaxParser::ReadStream(CPDF_Dictionary* pDict, 2373 PARSE_CONTEXT* pContext, 2374 FX_DWORD objnum, 2375 FX_DWORD gennum) { 2376 CPDF_Object* pLenObj = pDict->GetElement("Length"); 2377 FX_FILESIZE len = -1; 2378 CPDF_Reference* pLenObjRef = ToReference(pLenObj); 2379 2380 bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() && 2381 pLenObjRef->GetRefObjNum() != objnum); 2382 if (pLenObj && differingObjNum) 2383 len = pLenObj->GetInteger(); 2384 2385 // Locate the start of stream. 2386 ToNextLine(); 2387 FX_FILESIZE streamStartPos = m_Pos; 2388 if (pContext) { 2389 pContext->m_DataStart = streamStartPos; 2390 } 2391 2392 const CFX_ByteStringC kEndStreamStr("endstream"); 2393 const CFX_ByteStringC kEndObjStr("endobj"); 2394 CPDF_CryptoHandler* pCryptoHandler = 2395 objnum == (FX_DWORD)m_MetadataObjnum ? nullptr : m_pCryptoHandler.get(); 2396 if (!pCryptoHandler) { 2397 FX_BOOL bSearchForKeyword = TRUE; 2398 if (len >= 0) { 2399 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos; 2400 pos += len; 2401 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) { 2402 m_Pos = pos.ValueOrDie(); 2403 } 2404 m_Pos += ReadEOLMarkers(m_Pos); 2405 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1); 2406 GetNextWordInternal(nullptr); 2407 // Earlier version of PDF specification doesn't require EOL marker before 2408 // 'endstream' keyword. If keyword 'endstream' follows the bytes in 2409 // specified length, it signals the end of stream. 2410 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.GetPtr(), 2411 kEndStreamStr.GetLength()) == 0) { 2412 bSearchForKeyword = FALSE; 2413 } 2414 } 2415 if (bSearchForKeyword) { 2416 // If len is not available, len needs to be calculated 2417 // by searching the keywords "endstream" or "endobj". 2418 m_Pos = streamStartPos; 2419 FX_FILESIZE endStreamOffset = 0; 2420 while (endStreamOffset >= 0) { 2421 endStreamOffset = FindTag(kEndStreamStr, 0); 2422 if (endStreamOffset < 0) { 2423 // Can't find any "endstream". 2424 break; 2425 } 2426 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen, 2427 kEndStreamStr, TRUE)) { 2428 // Stop searching when the keyword "endstream" is found. 2429 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength(); 2430 break; 2431 } 2432 } 2433 m_Pos = streamStartPos; 2434 FX_FILESIZE endObjOffset = 0; 2435 while (endObjOffset >= 0) { 2436 endObjOffset = FindTag(kEndObjStr, 0); 2437 if (endObjOffset < 0) { 2438 // Can't find any "endobj". 2439 break; 2440 } 2441 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr, 2442 TRUE)) { 2443 // Stop searching when the keyword "endobj" is found. 2444 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength(); 2445 break; 2446 } 2447 } 2448 if (endStreamOffset < 0 && endObjOffset < 0) { 2449 // Can't find "endstream" or "endobj". 2450 pDict->Release(); 2451 return nullptr; 2452 } 2453 if (endStreamOffset < 0 && endObjOffset >= 0) { 2454 // Correct the position of end stream. 2455 endStreamOffset = endObjOffset; 2456 } else if (endStreamOffset >= 0 && endObjOffset < 0) { 2457 // Correct the position of end obj. 2458 endObjOffset = endStreamOffset; 2459 } else if (endStreamOffset > endObjOffset) { 2460 endStreamOffset = endObjOffset; 2461 } 2462 len = endStreamOffset; 2463 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); 2464 if (numMarkers == 2) { 2465 len -= 2; 2466 } else { 2467 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1); 2468 if (numMarkers == 1) { 2469 len -= 1; 2470 } 2471 } 2472 if (len < 0) { 2473 pDict->Release(); 2474 return nullptr; 2475 } 2476 pDict->SetAtInteger("Length", len); 2477 } 2478 m_Pos = streamStartPos; 2479 } 2480 if (len < 0) { 2481 pDict->Release(); 2482 return nullptr; 2483 } 2484 uint8_t* pData = nullptr; 2485 if (len > 0) { 2486 pData = FX_Alloc(uint8_t, len); 2487 ReadBlock(pData, len); 2488 if (pCryptoHandler) { 2489 CFX_BinaryBuf dest_buf; 2490 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len)); 2491 void* context = pCryptoHandler->DecryptStart(objnum, gennum); 2492 pCryptoHandler->DecryptStream(context, pData, len, dest_buf); 2493 pCryptoHandler->DecryptFinish(context, dest_buf); 2494 FX_Free(pData); 2495 pData = dest_buf.GetBuffer(); 2496 len = dest_buf.GetSize(); 2497 dest_buf.DetachBuffer(); 2498 } 2499 } 2500 CPDF_Stream* pStream = new CPDF_Stream(pData, len, pDict); 2501 if (pContext) { 2502 pContext->m_DataEnd = pContext->m_DataStart + len; 2503 } 2504 streamStartPos = m_Pos; 2505 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1); 2506 GetNextWordInternal(nullptr); 2507 int numMarkers = ReadEOLMarkers(m_Pos); 2508 if (m_WordSize == kEndObjStr.GetLength() && numMarkers != 0 && 2509 FXSYS_memcmp(m_WordBuffer, kEndObjStr.GetPtr(), kEndObjStr.GetLength()) == 2510 0) { 2511 m_Pos = streamStartPos; 2512 } 2513 return pStream; 2514 } 2515 void CPDF_SyntaxParser::InitParser(IFX_FileRead* pFileAccess, 2516 FX_DWORD HeaderOffset) { 2517 FX_Free(m_pFileBuf); 2518 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize); 2519 m_HeaderOffset = HeaderOffset; 2520 m_FileLen = pFileAccess->GetSize(); 2521 m_Pos = 0; 2522 m_pFileAccess = pFileAccess; 2523 m_BufOffset = 0; 2524 pFileAccess->ReadBlock( 2525 m_pFileBuf, 0, 2526 (size_t)((FX_FILESIZE)m_BufSize > m_FileLen ? m_FileLen : m_BufSize)); 2527 } 2528 int32_t CPDF_SyntaxParser::GetDirectNum() { 2529 bool bIsNumber; 2530 GetNextWordInternal(&bIsNumber); 2531 if (!bIsNumber) 2532 return 0; 2533 2534 m_WordBuffer[m_WordSize] = 0; 2535 return FXSYS_atoi(reinterpret_cast<const FX_CHAR*>(m_WordBuffer)); 2536 } 2537 2538 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, 2539 FX_FILESIZE limit, 2540 const CFX_ByteStringC& tag, 2541 FX_BOOL checkKeyword) { 2542 const FX_DWORD taglen = tag.GetLength(); 2543 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]); 2544 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) && 2545 !PDFCharIsWhitespace(tag[taglen - 1]); 2546 uint8_t ch; 2547 if (bCheckRight && startpos + (int32_t)taglen <= limit && 2548 GetCharAt(startpos + (int32_t)taglen, ch)) { 2549 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || 2550 (checkKeyword && PDFCharIsDelimiter(ch))) { 2551 return false; 2552 } 2553 } 2554 2555 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { 2556 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || 2557 (checkKeyword && PDFCharIsDelimiter(ch))) { 2558 return false; 2559 } 2560 } 2561 return true; 2562 } 2563 2564 FX_BOOL CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag, 2565 FX_BOOL bWholeWord, 2566 FX_BOOL bForward, 2567 FX_FILESIZE limit) { 2568 int32_t taglen = tag.GetLength(); 2569 if (taglen == 0) { 2570 return FALSE; 2571 } 2572 FX_FILESIZE pos = m_Pos; 2573 int32_t offset = 0; 2574 if (!bForward) { 2575 offset = taglen - 1; 2576 } 2577 const uint8_t* tag_data = tag.GetPtr(); 2578 uint8_t byte; 2579 while (1) { 2580 if (bForward) { 2581 if (limit) { 2582 if (pos >= m_Pos + limit) { 2583 return FALSE; 2584 } 2585 } 2586 if (!GetCharAt(pos, byte)) { 2587 return FALSE; 2588 } 2589 } else { 2590 if (limit) { 2591 if (pos <= m_Pos - limit) { 2592 return FALSE; 2593 } 2594 } 2595 if (!GetCharAtBackward(pos, byte)) { 2596 return FALSE; 2597 } 2598 } 2599 if (byte == tag_data[offset]) { 2600 if (bForward) { 2601 offset++; 2602 if (offset < taglen) { 2603 pos++; 2604 continue; 2605 } 2606 } else { 2607 offset--; 2608 if (offset >= 0) { 2609 pos--; 2610 continue; 2611 } 2612 } 2613 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos; 2614 if (!bWholeWord || IsWholeWord(startpos, limit, tag, FALSE)) { 2615 m_Pos = startpos; 2616 return TRUE; 2617 } 2618 } 2619 if (bForward) { 2620 offset = byte == tag_data[0] ? 1 : 0; 2621 pos++; 2622 } else { 2623 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1; 2624 pos--; 2625 } 2626 if (pos < 0) { 2627 return FALSE; 2628 } 2629 } 2630 return FALSE; 2631 } 2632 2633 int32_t CPDF_SyntaxParser::SearchMultiWord(const CFX_ByteStringC& tags, 2634 FX_BOOL bWholeWord, 2635 FX_FILESIZE limit) { 2636 int32_t ntags = 1; 2637 for (int i = 0; i < tags.GetLength(); ++i) { 2638 if (tags[i] == 0) { 2639 ++ntags; 2640 } 2641 } 2642 2643 std::vector<SearchTagRecord> patterns(ntags); 2644 FX_DWORD start = 0; 2645 FX_DWORD itag = 0; 2646 FX_DWORD max_len = 0; 2647 for (int i = 0; i <= tags.GetLength(); ++i) { 2648 if (tags[i] == 0) { 2649 FX_DWORD len = i - start; 2650 max_len = std::max(len, max_len); 2651 patterns[itag].m_pTag = tags.GetCStr() + start; 2652 patterns[itag].m_Len = len; 2653 patterns[itag].m_Offset = 0; 2654 start = i + 1; 2655 ++itag; 2656 } 2657 } 2658 2659 const FX_FILESIZE pos_limit = m_Pos + limit; 2660 for (FX_FILESIZE pos = m_Pos; !limit || pos < pos_limit; ++pos) { 2661 uint8_t byte; 2662 if (!GetCharAt(pos, byte)) 2663 break; 2664 2665 for (int i = 0; i < ntags; ++i) { 2666 SearchTagRecord& pat = patterns[i]; 2667 if (pat.m_pTag[pat.m_Offset] != byte) { 2668 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0; 2669 continue; 2670 } 2671 2672 ++pat.m_Offset; 2673 if (pat.m_Offset != pat.m_Len) 2674 continue; 2675 2676 if (!bWholeWord || 2677 IsWholeWord(pos - pat.m_Len, limit, 2678 CFX_ByteStringC(pat.m_pTag, pat.m_Len), FALSE)) { 2679 return i; 2680 } 2681 2682 pat.m_Offset = (pat.m_pTag[0] == byte) ? 1 : 0; 2683 } 2684 } 2685 return -1; 2686 } 2687 2688 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag, 2689 FX_FILESIZE limit) { 2690 int32_t taglen = tag.GetLength(); 2691 int32_t match = 0; 2692 limit += m_Pos; 2693 FX_FILESIZE startpos = m_Pos; 2694 while (1) { 2695 uint8_t ch; 2696 if (!GetNextChar(ch)) { 2697 return -1; 2698 } 2699 if (ch == tag[match]) { 2700 match++; 2701 if (match == taglen) { 2702 return m_Pos - startpos - taglen; 2703 } 2704 } else { 2705 match = ch == tag[0] ? 1 : 0; 2706 } 2707 if (limit && m_Pos == limit) { 2708 return -1; 2709 } 2710 } 2711 return -1; 2712 } 2713 void CPDF_SyntaxParser::GetBinary(uint8_t* buffer, FX_DWORD size) { 2714 FX_DWORD offset = 0; 2715 uint8_t ch; 2716 while (1) { 2717 if (!GetNextChar(ch)) { 2718 return; 2719 } 2720 buffer[offset++] = ch; 2721 if (offset == size) { 2722 break; 2723 } 2724 } 2725 } 2726 2727 class CPDF_DataAvail final : public IPDF_DataAvail { 2728 public: 2729 CPDF_DataAvail(IFX_FileAvail* pFileAvail, 2730 IFX_FileRead* pFileRead, 2731 FX_BOOL bSupportHintTable); 2732 ~CPDF_DataAvail() override; 2733 2734 // IPDF_DataAvail: 2735 DocAvailStatus IsDocAvail(IFX_DownloadHints* pHints) override; 2736 void SetDocument(CPDF_Document* pDoc) override; 2737 DocAvailStatus IsPageAvail(int iPage, IFX_DownloadHints* pHints) override; 2738 DocFormStatus IsFormAvail(IFX_DownloadHints* pHints) override; 2739 DocLinearizationStatus IsLinearizedPDF() override; 2740 FX_BOOL IsLinearized() override { return m_bLinearized; } 2741 void GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, FX_DWORD* pSize) override; 2742 2743 int GetPageCount() const; 2744 CPDF_Dictionary* GetPage(int index); 2745 2746 friend class CPDF_HintTables; 2747 2748 protected: 2749 static const int kMaxDataAvailRecursionDepth = 64; 2750 static int s_CurrentDataAvailRecursionDepth; 2751 static const int kMaxPageRecursionDepth = 1024; 2752 2753 FX_DWORD GetObjectSize(FX_DWORD objnum, FX_FILESIZE& offset); 2754 FX_BOOL IsObjectsAvail(CFX_ArrayTemplate<CPDF_Object*>& obj_array, 2755 FX_BOOL bParsePage, 2756 IFX_DownloadHints* pHints, 2757 CFX_ArrayTemplate<CPDF_Object*>& ret_array); 2758 FX_BOOL CheckDocStatus(IFX_DownloadHints* pHints); 2759 FX_BOOL CheckHeader(IFX_DownloadHints* pHints); 2760 FX_BOOL CheckFirstPage(IFX_DownloadHints* pHints); 2761 FX_BOOL CheckHintTables(IFX_DownloadHints* pHints); 2762 FX_BOOL CheckEnd(IFX_DownloadHints* pHints); 2763 FX_BOOL CheckCrossRef(IFX_DownloadHints* pHints); 2764 FX_BOOL CheckCrossRefItem(IFX_DownloadHints* pHints); 2765 FX_BOOL CheckTrailer(IFX_DownloadHints* pHints); 2766 FX_BOOL CheckRoot(IFX_DownloadHints* pHints); 2767 FX_BOOL CheckInfo(IFX_DownloadHints* pHints); 2768 FX_BOOL CheckPages(IFX_DownloadHints* pHints); 2769 FX_BOOL CheckPage(IFX_DownloadHints* pHints); 2770 FX_BOOL CheckResources(IFX_DownloadHints* pHints); 2771 FX_BOOL CheckAnnots(IFX_DownloadHints* pHints); 2772 FX_BOOL CheckAcroForm(IFX_DownloadHints* pHints); 2773 FX_BOOL CheckAcroFormSubObject(IFX_DownloadHints* pHints); 2774 FX_BOOL CheckTrailerAppend(IFX_DownloadHints* pHints); 2775 FX_BOOL CheckPageStatus(IFX_DownloadHints* pHints); 2776 FX_BOOL CheckAllCrossRefStream(IFX_DownloadHints* pHints); 2777 2778 int32_t CheckCrossRefStream(IFX_DownloadHints* pHints, 2779 FX_FILESIZE& xref_offset); 2780 FX_BOOL IsLinearizedFile(uint8_t* pData, FX_DWORD dwLen); 2781 void SetStartOffset(FX_FILESIZE dwOffset); 2782 FX_BOOL GetNextToken(CFX_ByteString& token); 2783 FX_BOOL GetNextChar(uint8_t& ch); 2784 CPDF_Object* ParseIndirectObjectAt( 2785 FX_FILESIZE pos, 2786 FX_DWORD objnum, 2787 CPDF_IndirectObjectHolder* pObjList = NULL); 2788 CPDF_Object* GetObject(FX_DWORD objnum, 2789 IFX_DownloadHints* pHints, 2790 FX_BOOL* pExistInFile); 2791 FX_BOOL GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages); 2792 FX_BOOL PreparePageItem(); 2793 FX_BOOL LoadPages(IFX_DownloadHints* pHints); 2794 FX_BOOL LoadAllXref(IFX_DownloadHints* pHints); 2795 FX_BOOL LoadAllFile(IFX_DownloadHints* pHints); 2796 DocAvailStatus CheckLinearizedData(IFX_DownloadHints* pHints); 2797 FX_BOOL CheckPageAnnots(int iPage, IFX_DownloadHints* pHints); 2798 2799 DocAvailStatus CheckLinearizedFirstPage(int iPage, IFX_DownloadHints* pHints); 2800 FX_BOOL HaveResourceAncestor(CPDF_Dictionary* pDict); 2801 FX_BOOL CheckPage(int32_t iPage, IFX_DownloadHints* pHints); 2802 FX_BOOL LoadDocPages(IFX_DownloadHints* pHints); 2803 FX_BOOL LoadDocPage(int32_t iPage, IFX_DownloadHints* pHints); 2804 FX_BOOL CheckPageNode(CPDF_PageNode& pageNodes, 2805 int32_t iPage, 2806 int32_t& iCount, 2807 IFX_DownloadHints* pHints, 2808 int level); 2809 FX_BOOL CheckUnkownPageNode(FX_DWORD dwPageNo, 2810 CPDF_PageNode* pPageNode, 2811 IFX_DownloadHints* pHints); 2812 FX_BOOL CheckArrayPageNode(FX_DWORD dwPageNo, 2813 CPDF_PageNode* pPageNode, 2814 IFX_DownloadHints* pHints); 2815 FX_BOOL CheckPageCount(IFX_DownloadHints* pHints); 2816 bool IsFirstCheck(int iPage); 2817 void ResetFirstCheck(int iPage); 2818 FX_BOOL IsDataAvail(FX_FILESIZE offset, 2819 FX_DWORD size, 2820 IFX_DownloadHints* pHints); 2821 2822 CPDF_Parser m_parser; 2823 2824 CPDF_SyntaxParser m_syntaxParser; 2825 2826 CPDF_Object* m_pRoot; 2827 2828 FX_DWORD m_dwRootObjNum; 2829 2830 FX_DWORD m_dwInfoObjNum; 2831 2832 CPDF_Object* m_pLinearized; 2833 2834 CPDF_Object* m_pTrailer; 2835 2836 FX_BOOL m_bDocAvail; 2837 2838 FX_FILESIZE m_dwHeaderOffset; 2839 2840 FX_FILESIZE m_dwLastXRefOffset; 2841 2842 FX_FILESIZE m_dwXRefOffset; 2843 2844 FX_FILESIZE m_dwTrailerOffset; 2845 2846 FX_FILESIZE m_dwCurrentOffset; 2847 2848 PDF_DATAAVAIL_STATUS m_docStatus; 2849 2850 FX_FILESIZE m_dwFileLen; 2851 2852 CPDF_Document* m_pDocument; 2853 2854 std::set<FX_DWORD> m_ObjectSet; 2855 2856 CFX_ArrayTemplate<CPDF_Object*> m_objs_array; 2857 2858 FX_FILESIZE m_Pos; 2859 2860 FX_FILESIZE m_bufferOffset; 2861 2862 FX_DWORD m_bufferSize; 2863 2864 CFX_ByteString m_WordBuf; 2865 2866 uint8_t m_bufferData[512]; 2867 2868 CFX_FileSizeArray m_CrossOffset; 2869 2870 CFX_DWordArray m_XRefStreamList; 2871 2872 CFX_DWordArray m_PageObjList; 2873 2874 FX_DWORD m_PagesObjNum; 2875 2876 FX_BOOL m_bLinearized; 2877 2878 FX_DWORD m_dwFirstPageNo; 2879 2880 FX_BOOL m_bLinearedDataOK; 2881 2882 FX_BOOL m_bMainXRefLoadTried; 2883 2884 FX_BOOL m_bMainXRefLoadedOK; 2885 2886 FX_BOOL m_bPagesTreeLoad; 2887 2888 FX_BOOL m_bPagesLoad; 2889 2890 CPDF_Parser* m_pCurrentParser; 2891 2892 FX_FILESIZE m_dwCurrentXRefSteam; 2893 2894 FX_BOOL m_bAnnotsLoad; 2895 2896 FX_BOOL m_bHaveAcroForm; 2897 2898 FX_DWORD m_dwAcroFormObjNum; 2899 2900 FX_BOOL m_bAcroFormLoad; 2901 2902 CPDF_Object* m_pAcroForm; 2903 2904 CFX_ArrayTemplate<CPDF_Object*> m_arrayAcroforms; 2905 2906 CPDF_Dictionary* m_pPageDict; 2907 2908 CPDF_Object* m_pPageResource; 2909 2910 FX_BOOL m_bNeedDownLoadResource; 2911 2912 FX_BOOL m_bPageLoadedOK; 2913 2914 FX_BOOL m_bLinearizedFormParamLoad; 2915 2916 CFX_ArrayTemplate<CPDF_Object*> m_PagesArray; 2917 2918 FX_DWORD m_dwEncryptObjNum; 2919 2920 FX_FILESIZE m_dwPrevXRefOffset; 2921 2922 FX_BOOL m_bTotalLoadPageTree; 2923 2924 FX_BOOL m_bCurPageDictLoadOK; 2925 2926 CPDF_PageNode m_pageNodes; 2927 2928 std::set<FX_DWORD> m_pageMapCheckState; 2929 std::set<FX_DWORD> m_pagesLoadState; 2930 2931 std::unique_ptr<CPDF_HintTables> m_pHintTables; 2932 FX_BOOL m_bSupportHintTable; 2933 }; 2934 2935 IPDF_DataAvail::IPDF_DataAvail(IFX_FileAvail* pFileAvail, 2936 IFX_FileRead* pFileRead) 2937 : m_pFileAvail(pFileAvail), m_pFileRead(pFileRead) {} 2938 2939 // static 2940 IPDF_DataAvail* IPDF_DataAvail::Create(IFX_FileAvail* pFileAvail, 2941 IFX_FileRead* pFileRead) { 2942 return new CPDF_DataAvail(pFileAvail, pFileRead, TRUE); 2943 } 2944 2945 // static 2946 int CPDF_DataAvail::s_CurrentDataAvailRecursionDepth = 0; 2947 2948 CPDF_DataAvail::CPDF_DataAvail(IFX_FileAvail* pFileAvail, 2949 IFX_FileRead* pFileRead, 2950 FX_BOOL bSupportHintTable) 2951 : IPDF_DataAvail(pFileAvail, pFileRead) { 2952 m_Pos = 0; 2953 m_dwFileLen = 0; 2954 if (m_pFileRead) { 2955 m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize(); 2956 } 2957 m_dwCurrentOffset = 0; 2958 m_dwXRefOffset = 0; 2959 m_bufferOffset = 0; 2960 m_dwFirstPageNo = 0; 2961 m_bufferSize = 0; 2962 m_PagesObjNum = 0; 2963 m_dwCurrentXRefSteam = 0; 2964 m_dwAcroFormObjNum = 0; 2965 m_dwInfoObjNum = 0; 2966 m_pDocument = 0; 2967 m_dwEncryptObjNum = 0; 2968 m_dwPrevXRefOffset = 0; 2969 m_dwLastXRefOffset = 0; 2970 m_bDocAvail = FALSE; 2971 m_bMainXRefLoadTried = FALSE; 2972 m_bDocAvail = FALSE; 2973 m_bLinearized = FALSE; 2974 m_bPagesLoad = FALSE; 2975 m_bPagesTreeLoad = FALSE; 2976 m_bMainXRefLoadedOK = FALSE; 2977 m_bAnnotsLoad = FALSE; 2978 m_bHaveAcroForm = FALSE; 2979 m_bAcroFormLoad = FALSE; 2980 m_bPageLoadedOK = FALSE; 2981 m_bNeedDownLoadResource = FALSE; 2982 m_bLinearizedFormParamLoad = FALSE; 2983 m_pLinearized = NULL; 2984 m_pRoot = NULL; 2985 m_pTrailer = NULL; 2986 m_pCurrentParser = NULL; 2987 m_pAcroForm = NULL; 2988 m_pPageDict = NULL; 2989 m_pPageResource = NULL; 2990 m_docStatus = PDF_DATAAVAIL_HEADER; 2991 m_parser.m_bOwnFileRead = FALSE; 2992 m_bTotalLoadPageTree = FALSE; 2993 m_bCurPageDictLoadOK = FALSE; 2994 m_bLinearedDataOK = FALSE; 2995 m_bSupportHintTable = bSupportHintTable; 2996 } 2997 CPDF_DataAvail::~CPDF_DataAvail() { 2998 if (m_pLinearized) { 2999 m_pLinearized->Release(); 3000 } 3001 if (m_pRoot) { 3002 m_pRoot->Release(); 3003 } 3004 if (m_pTrailer) { 3005 m_pTrailer->Release(); 3006 } 3007 3008 int iSize = m_arrayAcroforms.GetSize(); 3009 for (int i = 0; i < iSize; ++i) { 3010 m_arrayAcroforms.GetAt(i)->Release(); 3011 } 3012 } 3013 void CPDF_DataAvail::SetDocument(CPDF_Document* pDoc) { 3014 m_pDocument = pDoc; 3015 } 3016 FX_DWORD CPDF_DataAvail::GetObjectSize(FX_DWORD objnum, FX_FILESIZE& offset) { 3017 CPDF_Parser* pParser = (CPDF_Parser*)(m_pDocument->GetParser()); 3018 if (!pParser || !pParser->IsValidObjectNumber(objnum)) 3019 return 0; 3020 3021 if (pParser->m_V5Type[objnum] == 2) 3022 objnum = pParser->m_ObjectInfo[objnum].pos; 3023 3024 if (pParser->m_V5Type[objnum] == 1 || pParser->m_V5Type[objnum] == 255) { 3025 offset = pParser->m_ObjectInfo[objnum].pos; 3026 if (offset == 0) { 3027 return 0; 3028 } 3029 void* pResult = FXSYS_bsearch(&offset, pParser->m_SortedOffset.GetData(), 3030 pParser->m_SortedOffset.GetSize(), 3031 sizeof(FX_FILESIZE), CompareFileSize); 3032 if (!pResult) { 3033 return 0; 3034 } 3035 if ((FX_FILESIZE*)pResult - 3036 (FX_FILESIZE*)pParser->m_SortedOffset.GetData() == 3037 pParser->m_SortedOffset.GetSize() - 1) { 3038 return 0; 3039 } 3040 return (FX_DWORD)(((FX_FILESIZE*)pResult)[1] - offset); 3041 } 3042 return 0; 3043 } 3044 FX_BOOL CPDF_DataAvail::IsObjectsAvail( 3045 CFX_ArrayTemplate<CPDF_Object*>& obj_array, 3046 FX_BOOL bParsePage, 3047 IFX_DownloadHints* pHints, 3048 CFX_ArrayTemplate<CPDF_Object*>& ret_array) { 3049 if (!obj_array.GetSize()) { 3050 return TRUE; 3051 } 3052 FX_DWORD count = 0; 3053 CFX_ArrayTemplate<CPDF_Object*> new_obj_array; 3054 int32_t i = 0; 3055 for (i = 0; i < obj_array.GetSize(); i++) { 3056 CPDF_Object* pObj = obj_array[i]; 3057 if (!pObj) 3058 continue; 3059 3060 int32_t type = pObj->GetType(); 3061 switch (type) { 3062 case PDFOBJ_ARRAY: { 3063 CPDF_Array* pArray = pObj->GetArray(); 3064 for (FX_DWORD k = 0; k < pArray->GetCount(); k++) { 3065 new_obj_array.Add(pArray->GetElement(k)); 3066 } 3067 } break; 3068 case PDFOBJ_STREAM: 3069 pObj = pObj->GetDict(); 3070 case PDFOBJ_DICTIONARY: { 3071 CPDF_Dictionary* pDict = pObj->GetDict(); 3072 if (pDict && pDict->GetString("Type") == "Page" && !bParsePage) { 3073 continue; 3074 } 3075 for (const auto& it : *pDict) { 3076 const CFX_ByteString& key = it.first; 3077 CPDF_Object* value = it.second; 3078 if (key != "Parent") { 3079 new_obj_array.Add(value); 3080 } 3081 } 3082 } break; 3083 case PDFOBJ_REFERENCE: { 3084 CPDF_Reference* pRef = pObj->AsReference(); 3085 FX_DWORD dwNum = pRef->GetRefObjNum(); 3086 FX_FILESIZE offset; 3087 FX_DWORD size = GetObjectSize(dwNum, offset); 3088 if (size == 0 || offset < 0 || offset >= m_dwFileLen) { 3089 break; 3090 } 3091 if (!IsDataAvail(offset, size, pHints)) { 3092 ret_array.Add(pObj); 3093 count++; 3094 } else if (!pdfium::ContainsKey(m_ObjectSet, dwNum)) { 3095 m_ObjectSet.insert(dwNum); 3096 CPDF_Object* pReferred = 3097 m_pDocument->GetIndirectObject(pRef->GetRefObjNum(), nullptr); 3098 if (pReferred) { 3099 new_obj_array.Add(pReferred); 3100 } 3101 } 3102 } break; 3103 } 3104 } 3105 if (count > 0) { 3106 int32_t iSize = new_obj_array.GetSize(); 3107 for (i = 0; i < iSize; ++i) { 3108 CPDF_Object* pObj = new_obj_array[i]; 3109 if (CPDF_Reference* pRef = pObj->AsReference()) { 3110 FX_DWORD dwNum = pRef->GetRefObjNum(); 3111 if (!pdfium::ContainsKey(m_ObjectSet, dwNum)) 3112 ret_array.Add(pObj); 3113 } else { 3114 ret_array.Add(pObj); 3115 } 3116 } 3117 return FALSE; 3118 } 3119 obj_array.RemoveAll(); 3120 obj_array.Append(new_obj_array); 3121 return IsObjectsAvail(obj_array, FALSE, pHints, ret_array); 3122 } 3123 3124 IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsDocAvail( 3125 IFX_DownloadHints* pHints) { 3126 if (!m_dwFileLen && m_pFileRead) { 3127 m_dwFileLen = (FX_DWORD)m_pFileRead->GetSize(); 3128 if (!m_dwFileLen) { 3129 return DataError; 3130 } 3131 } 3132 while (!m_bDocAvail) { 3133 if (!CheckDocStatus(pHints)) { 3134 return DataNotAvailable; 3135 } 3136 } 3137 return DataAvailable; 3138 } 3139 3140 FX_BOOL CPDF_DataAvail::CheckAcroFormSubObject(IFX_DownloadHints* pHints) { 3141 if (!m_objs_array.GetSize()) { 3142 m_objs_array.RemoveAll(); 3143 m_ObjectSet.clear(); 3144 CFX_ArrayTemplate<CPDF_Object*> obj_array; 3145 obj_array.Append(m_arrayAcroforms); 3146 FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array); 3147 if (bRet) { 3148 m_objs_array.RemoveAll(); 3149 } 3150 return bRet; 3151 } 3152 CFX_ArrayTemplate<CPDF_Object*> new_objs_array; 3153 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); 3154 if (bRet) { 3155 int32_t iSize = m_arrayAcroforms.GetSize(); 3156 for (int32_t i = 0; i < iSize; ++i) { 3157 m_arrayAcroforms.GetAt(i)->Release(); 3158 } 3159 m_arrayAcroforms.RemoveAll(); 3160 } else { 3161 m_objs_array.RemoveAll(); 3162 m_objs_array.Append(new_objs_array); 3163 } 3164 return bRet; 3165 } 3166 FX_BOOL CPDF_DataAvail::CheckAcroForm(IFX_DownloadHints* pHints) { 3167 FX_BOOL bExist = FALSE; 3168 m_pAcroForm = GetObject(m_dwAcroFormObjNum, pHints, &bExist); 3169 if (!bExist) { 3170 m_docStatus = PDF_DATAAVAIL_PAGETREE; 3171 return TRUE; 3172 } 3173 if (!m_pAcroForm) { 3174 if (m_docStatus == PDF_DATAAVAIL_ERROR) { 3175 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 3176 return TRUE; 3177 } 3178 return FALSE; 3179 } 3180 m_arrayAcroforms.Add(m_pAcroForm); 3181 m_docStatus = PDF_DATAAVAIL_PAGETREE; 3182 return TRUE; 3183 } 3184 FX_BOOL CPDF_DataAvail::CheckDocStatus(IFX_DownloadHints* pHints) { 3185 switch (m_docStatus) { 3186 case PDF_DATAAVAIL_HEADER: 3187 return CheckHeader(pHints); 3188 case PDF_DATAAVAIL_FIRSTPAGE: 3189 case PDF_DATAAVAIL_FIRSTPAGE_PREPARE: 3190 return CheckFirstPage(pHints); 3191 case PDF_DATAAVAIL_HINTTABLE: 3192 return CheckHintTables(pHints); 3193 case PDF_DATAAVAIL_END: 3194 return CheckEnd(pHints); 3195 case PDF_DATAAVAIL_CROSSREF: 3196 return CheckCrossRef(pHints); 3197 case PDF_DATAAVAIL_CROSSREF_ITEM: 3198 return CheckCrossRefItem(pHints); 3199 case PDF_DATAAVAIL_CROSSREF_STREAM: 3200 return CheckAllCrossRefStream(pHints); 3201 case PDF_DATAAVAIL_TRAILER: 3202 return CheckTrailer(pHints); 3203 case PDF_DATAAVAIL_TRAILER_APPEND: 3204 return CheckTrailerAppend(pHints); 3205 case PDF_DATAAVAIL_LOADALLCROSSREF: 3206 return LoadAllXref(pHints); 3207 case PDF_DATAAVAIL_LOADALLFILE: 3208 return LoadAllFile(pHints); 3209 case PDF_DATAAVAIL_ROOT: 3210 return CheckRoot(pHints); 3211 case PDF_DATAAVAIL_INFO: 3212 return CheckInfo(pHints); 3213 case PDF_DATAAVAIL_ACROFORM: 3214 return CheckAcroForm(pHints); 3215 case PDF_DATAAVAIL_PAGETREE: 3216 if (m_bTotalLoadPageTree) { 3217 return CheckPages(pHints); 3218 } 3219 return LoadDocPages(pHints); 3220 case PDF_DATAAVAIL_PAGE: 3221 if (m_bTotalLoadPageTree) { 3222 return CheckPage(pHints); 3223 } 3224 m_docStatus = PDF_DATAAVAIL_PAGE_LATERLOAD; 3225 return TRUE; 3226 case PDF_DATAAVAIL_ERROR: 3227 return LoadAllFile(pHints); 3228 case PDF_DATAAVAIL_PAGE_LATERLOAD: 3229 m_docStatus = PDF_DATAAVAIL_PAGE; 3230 default: 3231 m_bDocAvail = TRUE; 3232 return TRUE; 3233 } 3234 } 3235 FX_BOOL CPDF_DataAvail::CheckPageStatus(IFX_DownloadHints* pHints) { 3236 switch (m_docStatus) { 3237 case PDF_DATAAVAIL_PAGETREE: 3238 return CheckPages(pHints); 3239 case PDF_DATAAVAIL_PAGE: 3240 return CheckPage(pHints); 3241 case PDF_DATAAVAIL_ERROR: 3242 return LoadAllFile(pHints); 3243 default: 3244 m_bPagesTreeLoad = TRUE; 3245 m_bPagesLoad = TRUE; 3246 return TRUE; 3247 } 3248 } 3249 FX_BOOL CPDF_DataAvail::LoadAllFile(IFX_DownloadHints* pHints) { 3250 if (m_pFileAvail->IsDataAvail(0, (FX_DWORD)m_dwFileLen)) { 3251 m_docStatus = PDF_DATAAVAIL_DONE; 3252 return TRUE; 3253 } 3254 pHints->AddSegment(0, (FX_DWORD)m_dwFileLen); 3255 return FALSE; 3256 } 3257 FX_BOOL CPDF_DataAvail::LoadAllXref(IFX_DownloadHints* pHints) { 3258 m_parser.m_Syntax.InitParser(m_pFileRead, (FX_DWORD)m_dwHeaderOffset); 3259 m_parser.m_bOwnFileRead = FALSE; 3260 if (!m_parser.LoadAllCrossRefV4(m_dwLastXRefOffset) && 3261 !m_parser.LoadAllCrossRefV5(m_dwLastXRefOffset)) { 3262 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 3263 return FALSE; 3264 } 3265 FXSYS_qsort(m_parser.m_SortedOffset.GetData(), 3266 m_parser.m_SortedOffset.GetSize(), sizeof(FX_FILESIZE), 3267 CompareFileSize); 3268 m_dwRootObjNum = m_parser.GetRootObjNum(); 3269 m_dwInfoObjNum = m_parser.GetInfoObjNum(); 3270 m_pCurrentParser = &m_parser; 3271 m_docStatus = PDF_DATAAVAIL_ROOT; 3272 return TRUE; 3273 } 3274 CPDF_Object* CPDF_DataAvail::GetObject(FX_DWORD objnum, 3275 IFX_DownloadHints* pHints, 3276 FX_BOOL* pExistInFile) { 3277 CPDF_Object* pRet = nullptr; 3278 FX_DWORD size = 0; 3279 FX_FILESIZE offset = 0; 3280 CPDF_Parser* pParser = nullptr; 3281 if (pExistInFile) 3282 *pExistInFile = TRUE; 3283 3284 if (m_pDocument) { 3285 size = GetObjectSize(objnum, offset); 3286 pParser = (CPDF_Parser*)(m_pDocument->GetParser()); 3287 } else { 3288 size = (FX_DWORD)m_parser.GetObjectSize(objnum); 3289 offset = m_parser.GetObjectOffset(objnum); 3290 pParser = &m_parser; 3291 } 3292 if (!IsDataAvail(offset, size, pHints)) { 3293 return nullptr; 3294 } 3295 if (pParser) { 3296 pRet = pParser->ParseIndirectObject(NULL, objnum, NULL); 3297 } 3298 3299 if (!pRet && pExistInFile) { 3300 *pExistInFile = FALSE; 3301 } 3302 3303 return pRet; 3304 } 3305 3306 FX_BOOL CPDF_DataAvail::CheckInfo(IFX_DownloadHints* pHints) { 3307 FX_BOOL bExist = FALSE; 3308 CPDF_Object* pInfo = GetObject(m_dwInfoObjNum, pHints, &bExist); 3309 if (!bExist) { 3310 if (m_bHaveAcroForm) { 3311 m_docStatus = PDF_DATAAVAIL_ACROFORM; 3312 } else { 3313 m_docStatus = PDF_DATAAVAIL_PAGETREE; 3314 } 3315 return TRUE; 3316 } 3317 if (!pInfo) { 3318 if (m_docStatus == PDF_DATAAVAIL_ERROR) { 3319 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 3320 return TRUE; 3321 } 3322 if (m_Pos == m_dwFileLen) { 3323 m_docStatus = PDF_DATAAVAIL_ERROR; 3324 } 3325 return FALSE; 3326 } 3327 if (pInfo) { 3328 pInfo->Release(); 3329 } 3330 if (m_bHaveAcroForm) { 3331 m_docStatus = PDF_DATAAVAIL_ACROFORM; 3332 } else { 3333 m_docStatus = PDF_DATAAVAIL_PAGETREE; 3334 } 3335 return TRUE; 3336 } 3337 FX_BOOL CPDF_DataAvail::CheckRoot(IFX_DownloadHints* pHints) { 3338 FX_BOOL bExist = FALSE; 3339 m_pRoot = GetObject(m_dwRootObjNum, pHints, &bExist); 3340 if (!bExist) { 3341 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 3342 return TRUE; 3343 } 3344 if (!m_pRoot) { 3345 if (m_docStatus == PDF_DATAAVAIL_ERROR) { 3346 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 3347 return TRUE; 3348 } 3349 return FALSE; 3350 } 3351 CPDF_Dictionary* pDict = m_pRoot->GetDict(); 3352 if (!pDict) { 3353 m_docStatus = PDF_DATAAVAIL_ERROR; 3354 return FALSE; 3355 } 3356 CPDF_Reference* pRef = ToReference(pDict->GetElement("Pages")); 3357 if (!pRef) { 3358 m_docStatus = PDF_DATAAVAIL_ERROR; 3359 return FALSE; 3360 } 3361 3362 m_PagesObjNum = pRef->GetRefObjNum(); 3363 CPDF_Reference* pAcroFormRef = 3364 ToReference(m_pRoot->GetDict()->GetElement("AcroForm")); 3365 if (pAcroFormRef) { 3366 m_bHaveAcroForm = TRUE; 3367 m_dwAcroFormObjNum = pAcroFormRef->GetRefObjNum(); 3368 } 3369 3370 if (m_dwInfoObjNum) { 3371 m_docStatus = PDF_DATAAVAIL_INFO; 3372 } else { 3373 m_docStatus = 3374 m_bHaveAcroForm ? PDF_DATAAVAIL_ACROFORM : PDF_DATAAVAIL_PAGETREE; 3375 } 3376 return TRUE; 3377 } 3378 FX_BOOL CPDF_DataAvail::PreparePageItem() { 3379 CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); 3380 CPDF_Reference* pRef = 3381 ToReference(pRoot ? pRoot->GetElement("Pages") : nullptr); 3382 if (!pRef) { 3383 m_docStatus = PDF_DATAAVAIL_ERROR; 3384 return FALSE; 3385 } 3386 3387 m_PagesObjNum = pRef->GetRefObjNum(); 3388 m_pCurrentParser = (CPDF_Parser*)m_pDocument->GetParser(); 3389 m_docStatus = PDF_DATAAVAIL_PAGETREE; 3390 return TRUE; 3391 } 3392 bool CPDF_DataAvail::IsFirstCheck(int iPage) { 3393 return m_pageMapCheckState.insert(iPage).second; 3394 } 3395 void CPDF_DataAvail::ResetFirstCheck(int iPage) { 3396 m_pageMapCheckState.erase(iPage); 3397 } 3398 FX_BOOL CPDF_DataAvail::CheckPage(IFX_DownloadHints* pHints) { 3399 FX_DWORD iPageObjs = m_PageObjList.GetSize(); 3400 CFX_DWordArray UnavailObjList; 3401 for (FX_DWORD i = 0; i < iPageObjs; ++i) { 3402 FX_DWORD dwPageObjNum = m_PageObjList.GetAt(i); 3403 FX_BOOL bExist = FALSE; 3404 CPDF_Object* pObj = GetObject(dwPageObjNum, pHints, &bExist); 3405 if (!pObj) { 3406 if (bExist) { 3407 UnavailObjList.Add(dwPageObjNum); 3408 } 3409 continue; 3410 } 3411 if (pObj->IsArray()) { 3412 CPDF_Array* pArray = pObj->GetArray(); 3413 if (pArray) { 3414 int32_t iSize = pArray->GetCount(); 3415 for (int32_t j = 0; j < iSize; ++j) { 3416 if (CPDF_Reference* pRef = ToReference(pArray->GetElement(j))) 3417 UnavailObjList.Add(pRef->GetRefObjNum()); 3418 } 3419 } 3420 } 3421 if (!pObj->IsDictionary()) { 3422 pObj->Release(); 3423 continue; 3424 } 3425 CFX_ByteString type = pObj->GetDict()->GetString("Type"); 3426 if (type == "Pages") { 3427 m_PagesArray.Add(pObj); 3428 continue; 3429 } 3430 pObj->Release(); 3431 } 3432 m_PageObjList.RemoveAll(); 3433 if (UnavailObjList.GetSize()) { 3434 m_PageObjList.Append(UnavailObjList); 3435 return FALSE; 3436 } 3437 FX_DWORD iPages = m_PagesArray.GetSize(); 3438 for (FX_DWORD i = 0; i < iPages; i++) { 3439 CPDF_Object* pPages = m_PagesArray.GetAt(i); 3440 if (!pPages) 3441 continue; 3442 3443 if (!GetPageKids(m_pCurrentParser, pPages)) { 3444 pPages->Release(); 3445 while (++i < iPages) { 3446 pPages = m_PagesArray.GetAt(i); 3447 pPages->Release(); 3448 } 3449 m_PagesArray.RemoveAll(); 3450 m_docStatus = PDF_DATAAVAIL_ERROR; 3451 return FALSE; 3452 } 3453 pPages->Release(); 3454 } 3455 m_PagesArray.RemoveAll(); 3456 if (!m_PageObjList.GetSize()) { 3457 m_docStatus = PDF_DATAAVAIL_DONE; 3458 } 3459 return TRUE; 3460 } 3461 FX_BOOL CPDF_DataAvail::GetPageKids(CPDF_Parser* pParser, CPDF_Object* pPages) { 3462 if (!pParser) { 3463 m_docStatus = PDF_DATAAVAIL_ERROR; 3464 return FALSE; 3465 } 3466 CPDF_Dictionary* pDict = pPages->GetDict(); 3467 CPDF_Object* pKids = pDict ? pDict->GetElement("Kids") : NULL; 3468 if (!pKids) { 3469 return TRUE; 3470 } 3471 switch (pKids->GetType()) { 3472 case PDFOBJ_REFERENCE: 3473 m_PageObjList.Add(pKids->AsReference()->GetRefObjNum()); 3474 break; 3475 case PDFOBJ_ARRAY: { 3476 CPDF_Array* pKidsArray = pKids->AsArray(); 3477 for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) { 3478 if (CPDF_Reference* pRef = ToReference(pKidsArray->GetElement(i))) 3479 m_PageObjList.Add(pRef->GetRefObjNum()); 3480 } 3481 } break; 3482 default: 3483 m_docStatus = PDF_DATAAVAIL_ERROR; 3484 return FALSE; 3485 } 3486 return TRUE; 3487 } 3488 FX_BOOL CPDF_DataAvail::CheckPages(IFX_DownloadHints* pHints) { 3489 FX_BOOL bExist = FALSE; 3490 CPDF_Object* pPages = GetObject(m_PagesObjNum, pHints, &bExist); 3491 if (!bExist) { 3492 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 3493 return TRUE; 3494 } 3495 if (!pPages) { 3496 if (m_docStatus == PDF_DATAAVAIL_ERROR) { 3497 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 3498 return TRUE; 3499 } 3500 return FALSE; 3501 } 3502 if (!GetPageKids(m_pCurrentParser, pPages)) { 3503 pPages->Release(); 3504 m_docStatus = PDF_DATAAVAIL_ERROR; 3505 return FALSE; 3506 } 3507 pPages->Release(); 3508 m_docStatus = PDF_DATAAVAIL_PAGE; 3509 return TRUE; 3510 } 3511 FX_BOOL CPDF_DataAvail::CheckHeader(IFX_DownloadHints* pHints) { 3512 FX_DWORD req_size = 1024; 3513 if ((FX_FILESIZE)req_size > m_dwFileLen) { 3514 req_size = (FX_DWORD)m_dwFileLen; 3515 } 3516 if (m_pFileAvail->IsDataAvail(0, req_size)) { 3517 uint8_t buffer[1024]; 3518 m_pFileRead->ReadBlock(buffer, 0, req_size); 3519 if (IsLinearizedFile(buffer, req_size)) { 3520 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE; 3521 } else { 3522 if (m_docStatus == PDF_DATAAVAIL_ERROR) { 3523 return FALSE; 3524 } 3525 m_docStatus = PDF_DATAAVAIL_END; 3526 } 3527 return TRUE; 3528 } 3529 pHints->AddSegment(0, req_size); 3530 return FALSE; 3531 } 3532 FX_BOOL CPDF_DataAvail::CheckFirstPage(IFX_DownloadHints* pHints) { 3533 CPDF_Dictionary* pDict = m_pLinearized->GetDict(); 3534 CPDF_Object* pEndOffSet = pDict ? pDict->GetElement("E") : NULL; 3535 if (!pEndOffSet) { 3536 m_docStatus = PDF_DATAAVAIL_ERROR; 3537 return FALSE; 3538 } 3539 CPDF_Object* pXRefOffset = pDict ? pDict->GetElement("T") : NULL; 3540 if (!pXRefOffset) { 3541 m_docStatus = PDF_DATAAVAIL_ERROR; 3542 return FALSE; 3543 } 3544 CPDF_Object* pFileLen = pDict ? pDict->GetElement("L") : NULL; 3545 if (!pFileLen) { 3546 m_docStatus = PDF_DATAAVAIL_ERROR; 3547 return FALSE; 3548 } 3549 FX_BOOL bNeedDownLoad = FALSE; 3550 if (pEndOffSet->IsNumber()) { 3551 FX_DWORD dwEnd = pEndOffSet->GetInteger(); 3552 dwEnd += 512; 3553 if ((FX_FILESIZE)dwEnd > m_dwFileLen) { 3554 dwEnd = (FX_DWORD)m_dwFileLen; 3555 } 3556 int32_t iStartPos = (int32_t)(m_dwFileLen > 1024 ? 1024 : m_dwFileLen); 3557 int32_t iSize = dwEnd > 1024 ? dwEnd - 1024 : 0; 3558 if (!m_pFileAvail->IsDataAvail(iStartPos, iSize)) { 3559 pHints->AddSegment(iStartPos, iSize); 3560 bNeedDownLoad = TRUE; 3561 } 3562 } 3563 m_dwLastXRefOffset = 0; 3564 FX_FILESIZE dwFileLen = 0; 3565 if (pXRefOffset->IsNumber()) 3566 m_dwLastXRefOffset = pXRefOffset->GetInteger(); 3567 3568 if (pFileLen->IsNumber()) 3569 dwFileLen = pFileLen->GetInteger(); 3570 3571 if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, 3572 (FX_DWORD)(dwFileLen - m_dwLastXRefOffset))) { 3573 if (m_docStatus == PDF_DATAAVAIL_FIRSTPAGE) { 3574 FX_DWORD dwSize = (FX_DWORD)(dwFileLen - m_dwLastXRefOffset); 3575 FX_FILESIZE offset = m_dwLastXRefOffset; 3576 if (dwSize < 512 && dwFileLen > 512) { 3577 dwSize = 512; 3578 offset = dwFileLen - 512; 3579 } 3580 pHints->AddSegment(offset, dwSize); 3581 } 3582 } else { 3583 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE; 3584 } 3585 if (bNeedDownLoad || m_docStatus != PDF_DATAAVAIL_FIRSTPAGE_PREPARE) { 3586 m_docStatus = PDF_DATAAVAIL_FIRSTPAGE_PREPARE; 3587 return FALSE; 3588 } 3589 m_docStatus = 3590 m_bSupportHintTable ? PDF_DATAAVAIL_HINTTABLE : PDF_DATAAVAIL_DONE; 3591 return TRUE; 3592 } 3593 FX_BOOL CPDF_DataAvail::IsDataAvail(FX_FILESIZE offset, 3594 FX_DWORD size, 3595 IFX_DownloadHints* pHints) { 3596 if (offset > m_dwFileLen) 3597 return TRUE; 3598 FX_SAFE_DWORD safeSize = pdfium::base::checked_cast<FX_DWORD>(offset); 3599 safeSize += size; 3600 safeSize += 512; 3601 if (!safeSize.IsValid() || safeSize.ValueOrDie() > m_dwFileLen) 3602 size = m_dwFileLen - offset; 3603 else 3604 size += 512; 3605 if (!m_pFileAvail->IsDataAvail(offset, size)) { 3606 pHints->AddSegment(offset, size); 3607 return FALSE; 3608 } 3609 return TRUE; 3610 } 3611 FX_BOOL CPDF_DataAvail::CheckHintTables(IFX_DownloadHints* pHints) { 3612 CPDF_Dictionary* pDict = m_pLinearized->GetDict(); 3613 if (!pDict) { 3614 m_docStatus = PDF_DATAAVAIL_ERROR; 3615 return FALSE; 3616 } 3617 if (!pDict->KeyExist("H") || !pDict->KeyExist("O") || !pDict->KeyExist("N")) { 3618 m_docStatus = PDF_DATAAVAIL_ERROR; 3619 return FALSE; 3620 } 3621 int nPageCount = pDict->GetElementValue("N")->GetInteger(); 3622 if (nPageCount <= 1) { 3623 m_docStatus = PDF_DATAAVAIL_DONE; 3624 return TRUE; 3625 } 3626 CPDF_Array* pHintStreamRange = pDict->GetArray("H"); 3627 FX_FILESIZE szHSStart = 3628 pHintStreamRange->GetElementValue(0) 3629 ? pHintStreamRange->GetElementValue(0)->GetInteger() 3630 : 0; 3631 FX_FILESIZE szHSLength = 3632 pHintStreamRange->GetElementValue(1) 3633 ? pHintStreamRange->GetElementValue(1)->GetInteger() 3634 : 0; 3635 if (szHSStart < 0 || szHSLength <= 0) { 3636 m_docStatus = PDF_DATAAVAIL_ERROR; 3637 return FALSE; 3638 } 3639 if (!IsDataAvail(szHSStart, szHSLength, pHints)) { 3640 return FALSE; 3641 } 3642 m_syntaxParser.InitParser(m_pFileRead, m_dwHeaderOffset); 3643 std::unique_ptr<CPDF_HintTables> pHintTables( 3644 new CPDF_HintTables(this, pDict)); 3645 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pHintStream( 3646 ParseIndirectObjectAt(szHSStart, 0)); 3647 CPDF_Stream* pStream = ToStream(pHintStream.get()); 3648 if (pStream && pHintTables->LoadHintStream(pStream)) 3649 m_pHintTables = std::move(pHintTables); 3650 3651 m_docStatus = PDF_DATAAVAIL_DONE; 3652 return TRUE; 3653 } 3654 CPDF_Object* CPDF_DataAvail::ParseIndirectObjectAt( 3655 FX_FILESIZE pos, 3656 FX_DWORD objnum, 3657 CPDF_IndirectObjectHolder* pObjList) { 3658 FX_FILESIZE SavedPos = m_syntaxParser.SavePos(); 3659 m_syntaxParser.RestorePos(pos); 3660 bool bIsNumber; 3661 CFX_ByteString word = m_syntaxParser.GetNextWord(&bIsNumber); 3662 if (!bIsNumber) 3663 return nullptr; 3664 3665 FX_DWORD parser_objnum = FXSYS_atoi(word); 3666 if (objnum && parser_objnum != objnum) 3667 return nullptr; 3668 3669 word = m_syntaxParser.GetNextWord(&bIsNumber); 3670 if (!bIsNumber) 3671 return nullptr; 3672 3673 FX_DWORD gennum = FXSYS_atoi(word); 3674 if (m_syntaxParser.GetKeyword() != "obj") { 3675 m_syntaxParser.RestorePos(SavedPos); 3676 return nullptr; 3677 } 3678 CPDF_Object* pObj = 3679 m_syntaxParser.GetObject(pObjList, parser_objnum, gennum, nullptr, true); 3680 m_syntaxParser.RestorePos(SavedPos); 3681 return pObj; 3682 } 3683 IPDF_DataAvail::DocLinearizationStatus CPDF_DataAvail::IsLinearizedPDF() { 3684 FX_DWORD req_size = 1024; 3685 if (!m_pFileAvail->IsDataAvail(0, req_size)) { 3686 return LinearizationUnknown; 3687 } 3688 if (!m_pFileRead) { 3689 return NotLinearized; 3690 } 3691 FX_FILESIZE dwSize = m_pFileRead->GetSize(); 3692 if (dwSize < (FX_FILESIZE)req_size) { 3693 return LinearizationUnknown; 3694 } 3695 uint8_t buffer[1024]; 3696 m_pFileRead->ReadBlock(buffer, 0, req_size); 3697 if (IsLinearizedFile(buffer, req_size)) { 3698 return Linearized; 3699 } 3700 return NotLinearized; 3701 } 3702 FX_BOOL CPDF_DataAvail::IsLinearizedFile(uint8_t* pData, FX_DWORD dwLen) { 3703 ScopedFileStream file(FX_CreateMemoryStream(pData, (size_t)dwLen, FALSE)); 3704 int32_t offset = GetHeaderOffset(file.get()); 3705 if (offset == -1) { 3706 m_docStatus = PDF_DATAAVAIL_ERROR; 3707 return FALSE; 3708 } 3709 m_dwHeaderOffset = offset; 3710 m_syntaxParser.InitParser(file.get(), offset); 3711 m_syntaxParser.RestorePos(m_syntaxParser.m_HeaderOffset + 9); 3712 bool bNumber; 3713 CFX_ByteString wordObjNum = m_syntaxParser.GetNextWord(&bNumber); 3714 if (!bNumber) 3715 return FALSE; 3716 3717 FX_DWORD objnum = FXSYS_atoi(wordObjNum); 3718 if (m_pLinearized) { 3719 m_pLinearized->Release(); 3720 m_pLinearized = NULL; 3721 } 3722 m_pLinearized = 3723 ParseIndirectObjectAt(m_syntaxParser.m_HeaderOffset + 9, objnum); 3724 if (!m_pLinearized) { 3725 return FALSE; 3726 } 3727 3728 CPDF_Dictionary* pDict = m_pLinearized->GetDict(); 3729 if (pDict && pDict->GetElement("Linearized")) { 3730 CPDF_Object* pLen = pDict->GetElement("L"); 3731 if (!pLen) { 3732 return FALSE; 3733 } 3734 if ((FX_FILESIZE)pLen->GetInteger() != m_pFileRead->GetSize()) { 3735 return FALSE; 3736 } 3737 m_bLinearized = TRUE; 3738 3739 if (CPDF_Number* pNo = ToNumber(pDict->GetElement("P"))) 3740 m_dwFirstPageNo = pNo->GetInteger(); 3741 3742 return TRUE; 3743 } 3744 return FALSE; 3745 } 3746 FX_BOOL CPDF_DataAvail::CheckEnd(IFX_DownloadHints* pHints) { 3747 FX_DWORD req_pos = (FX_DWORD)(m_dwFileLen > 1024 ? m_dwFileLen - 1024 : 0); 3748 FX_DWORD dwSize = (FX_DWORD)(m_dwFileLen - req_pos); 3749 if (m_pFileAvail->IsDataAvail(req_pos, dwSize)) { 3750 uint8_t buffer[1024]; 3751 m_pFileRead->ReadBlock(buffer, req_pos, dwSize); 3752 ScopedFileStream file(FX_CreateMemoryStream(buffer, (size_t)dwSize, FALSE)); 3753 m_syntaxParser.InitParser(file.get(), 0); 3754 m_syntaxParser.RestorePos(dwSize - 1); 3755 if (m_syntaxParser.SearchWord("startxref", TRUE, FALSE, dwSize)) { 3756 m_syntaxParser.GetNextWord(nullptr); 3757 bool bNumber; 3758 CFX_ByteString xrefpos_str = m_syntaxParser.GetNextWord(&bNumber); 3759 if (!bNumber) { 3760 m_docStatus = PDF_DATAAVAIL_ERROR; 3761 return FALSE; 3762 } 3763 m_dwXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str); 3764 if (!m_dwXRefOffset || m_dwXRefOffset > m_dwFileLen) { 3765 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 3766 return TRUE; 3767 } 3768 m_dwLastXRefOffset = m_dwXRefOffset; 3769 SetStartOffset(m_dwXRefOffset); 3770 m_docStatus = PDF_DATAAVAIL_CROSSREF; 3771 return TRUE; 3772 } 3773 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 3774 return TRUE; 3775 } 3776 pHints->AddSegment(req_pos, dwSize); 3777 return FALSE; 3778 } 3779 int32_t CPDF_DataAvail::CheckCrossRefStream(IFX_DownloadHints* pHints, 3780 FX_FILESIZE& xref_offset) { 3781 xref_offset = 0; 3782 FX_DWORD req_size = 3783 (FX_DWORD)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); 3784 if (m_pFileAvail->IsDataAvail(m_Pos, req_size)) { 3785 int32_t iSize = (int32_t)(m_Pos + req_size - m_dwCurrentXRefSteam); 3786 CFX_BinaryBuf buf(iSize); 3787 uint8_t* pBuf = buf.GetBuffer(); 3788 m_pFileRead->ReadBlock(pBuf, m_dwCurrentXRefSteam, iSize); 3789 ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE)); 3790 m_parser.m_Syntax.InitParser(file.get(), 0); 3791 bool bNumber; 3792 CFX_ByteString objnum = m_parser.m_Syntax.GetNextWord(&bNumber); 3793 if (!bNumber) 3794 return -1; 3795 3796 FX_DWORD objNum = FXSYS_atoi(objnum); 3797 CPDF_Object* pObj = m_parser.ParseIndirectObjectAt(NULL, 0, objNum, NULL); 3798 if (!pObj) { 3799 m_Pos += m_parser.m_Syntax.SavePos(); 3800 return 0; 3801 } 3802 CPDF_Dictionary* pDict = pObj->GetDict(); 3803 CPDF_Name* pName = ToName(pDict ? pDict->GetElement("Type") : nullptr); 3804 if (pName) { 3805 if (pName->GetString() == "XRef") { 3806 m_Pos += m_parser.m_Syntax.SavePos(); 3807 xref_offset = pObj->GetDict()->GetInteger("Prev"); 3808 pObj->Release(); 3809 return 1; 3810 } 3811 } 3812 pObj->Release(); 3813 return -1; 3814 } 3815 pHints->AddSegment(m_Pos, req_size); 3816 return 0; 3817 } 3818 inline void CPDF_DataAvail::SetStartOffset(FX_FILESIZE dwOffset) { 3819 m_Pos = dwOffset; 3820 } 3821 3822 FX_BOOL CPDF_DataAvail::GetNextToken(CFX_ByteString& token) { 3823 uint8_t ch; 3824 if (!GetNextChar(ch)) 3825 return FALSE; 3826 3827 while (1) { 3828 while (PDFCharIsWhitespace(ch)) { 3829 if (!GetNextChar(ch)) 3830 return FALSE; 3831 } 3832 3833 if (ch != '%') 3834 break; 3835 3836 while (1) { 3837 if (!GetNextChar(ch)) 3838 return FALSE; 3839 if (PDFCharIsLineEnding(ch)) 3840 break; 3841 } 3842 } 3843 3844 uint8_t buffer[256]; 3845 FX_DWORD index = 0; 3846 if (PDFCharIsDelimiter(ch)) { 3847 buffer[index++] = ch; 3848 if (ch == '/') { 3849 while (1) { 3850 if (!GetNextChar(ch)) 3851 return FALSE; 3852 3853 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { 3854 m_Pos--; 3855 CFX_ByteString ret(buffer, index); 3856 token = ret; 3857 return TRUE; 3858 } 3859 3860 if (index < sizeof(buffer)) 3861 buffer[index++] = ch; 3862 } 3863 } else if (ch == '<') { 3864 if (!GetNextChar(ch)) 3865 return FALSE; 3866 3867 if (ch == '<') 3868 buffer[index++] = ch; 3869 else 3870 m_Pos--; 3871 } else if (ch == '>') { 3872 if (!GetNextChar(ch)) 3873 return FALSE; 3874 3875 if (ch == '>') 3876 buffer[index++] = ch; 3877 else 3878 m_Pos--; 3879 } 3880 3881 CFX_ByteString ret(buffer, index); 3882 token = ret; 3883 return TRUE; 3884 } 3885 3886 while (1) { 3887 if (index < sizeof(buffer)) 3888 buffer[index++] = ch; 3889 3890 if (!GetNextChar(ch)) 3891 return FALSE; 3892 3893 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { 3894 m_Pos--; 3895 break; 3896 } 3897 } 3898 3899 token = CFX_ByteString(buffer, index); 3900 return TRUE; 3901 } 3902 3903 FX_BOOL CPDF_DataAvail::GetNextChar(uint8_t& ch) { 3904 FX_FILESIZE pos = m_Pos; 3905 if (pos >= m_dwFileLen) { 3906 return FALSE; 3907 } 3908 if (m_bufferOffset >= pos || 3909 (FX_FILESIZE)(m_bufferOffset + m_bufferSize) <= pos) { 3910 FX_FILESIZE read_pos = pos; 3911 FX_DWORD read_size = 512; 3912 if ((FX_FILESIZE)read_size > m_dwFileLen) { 3913 read_size = (FX_DWORD)m_dwFileLen; 3914 } 3915 if ((FX_FILESIZE)(read_pos + read_size) > m_dwFileLen) { 3916 read_pos = m_dwFileLen - read_size; 3917 } 3918 if (!m_pFileRead->ReadBlock(m_bufferData, read_pos, read_size)) { 3919 return FALSE; 3920 } 3921 m_bufferOffset = read_pos; 3922 m_bufferSize = read_size; 3923 } 3924 ch = m_bufferData[pos - m_bufferOffset]; 3925 m_Pos++; 3926 return TRUE; 3927 } 3928 FX_BOOL CPDF_DataAvail::CheckCrossRefItem(IFX_DownloadHints* pHints) { 3929 int32_t iSize = 0; 3930 CFX_ByteString token; 3931 while (1) { 3932 if (!GetNextToken(token)) { 3933 iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); 3934 pHints->AddSegment(m_Pos, iSize); 3935 return FALSE; 3936 } 3937 if (token == "trailer") { 3938 m_dwTrailerOffset = m_Pos; 3939 m_docStatus = PDF_DATAAVAIL_TRAILER; 3940 return TRUE; 3941 } 3942 } 3943 } 3944 FX_BOOL CPDF_DataAvail::CheckAllCrossRefStream(IFX_DownloadHints* pHints) { 3945 FX_FILESIZE xref_offset = 0; 3946 int32_t nRet = CheckCrossRefStream(pHints, xref_offset); 3947 if (nRet == 1) { 3948 if (!xref_offset) { 3949 m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF; 3950 } else { 3951 m_dwCurrentXRefSteam = xref_offset; 3952 m_Pos = xref_offset; 3953 } 3954 return TRUE; 3955 } 3956 if (nRet == -1) { 3957 m_docStatus = PDF_DATAAVAIL_ERROR; 3958 } 3959 return FALSE; 3960 } 3961 FX_BOOL CPDF_DataAvail::CheckCrossRef(IFX_DownloadHints* pHints) { 3962 int32_t iSize = 0; 3963 CFX_ByteString token; 3964 if (!GetNextToken(token)) { 3965 iSize = (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); 3966 pHints->AddSegment(m_Pos, iSize); 3967 return FALSE; 3968 } 3969 if (token == "xref") { 3970 m_CrossOffset.InsertAt(0, m_dwXRefOffset); 3971 while (1) { 3972 if (!GetNextToken(token)) { 3973 iSize = 3974 (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); 3975 pHints->AddSegment(m_Pos, iSize); 3976 m_docStatus = PDF_DATAAVAIL_CROSSREF_ITEM; 3977 return FALSE; 3978 } 3979 if (token == "trailer") { 3980 m_dwTrailerOffset = m_Pos; 3981 m_docStatus = PDF_DATAAVAIL_TRAILER; 3982 return TRUE; 3983 } 3984 } 3985 } else { 3986 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 3987 return TRUE; 3988 } 3989 return FALSE; 3990 } 3991 FX_BOOL CPDF_DataAvail::CheckTrailerAppend(IFX_DownloadHints* pHints) { 3992 if (m_Pos < m_dwFileLen) { 3993 FX_FILESIZE dwAppendPos = m_Pos + m_syntaxParser.SavePos(); 3994 int32_t iSize = (int32_t)( 3995 dwAppendPos + 512 > m_dwFileLen ? m_dwFileLen - dwAppendPos : 512); 3996 if (!m_pFileAvail->IsDataAvail(dwAppendPos, iSize)) { 3997 pHints->AddSegment(dwAppendPos, iSize); 3998 return FALSE; 3999 } 4000 } 4001 if (m_dwPrevXRefOffset) { 4002 SetStartOffset(m_dwPrevXRefOffset); 4003 m_docStatus = PDF_DATAAVAIL_CROSSREF; 4004 } else { 4005 m_docStatus = PDF_DATAAVAIL_LOADALLCROSSREF; 4006 } 4007 return TRUE; 4008 } 4009 4010 FX_BOOL CPDF_DataAvail::CheckTrailer(IFX_DownloadHints* pHints) { 4011 int32_t iTrailerSize = 4012 (int32_t)(m_Pos + 512 > m_dwFileLen ? m_dwFileLen - m_Pos : 512); 4013 if (m_pFileAvail->IsDataAvail(m_Pos, iTrailerSize)) { 4014 int32_t iSize = (int32_t)(m_Pos + iTrailerSize - m_dwTrailerOffset); 4015 CFX_BinaryBuf buf(iSize); 4016 uint8_t* pBuf = buf.GetBuffer(); 4017 if (!pBuf) { 4018 m_docStatus = PDF_DATAAVAIL_ERROR; 4019 return FALSE; 4020 } 4021 if (!m_pFileRead->ReadBlock(pBuf, m_dwTrailerOffset, iSize)) { 4022 return FALSE; 4023 } 4024 ScopedFileStream file(FX_CreateMemoryStream(pBuf, (size_t)iSize, FALSE)); 4025 m_syntaxParser.InitParser(file.get(), 0); 4026 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pTrailer( 4027 m_syntaxParser.GetObject(nullptr, 0, 0, nullptr, true)); 4028 if (!pTrailer) { 4029 m_Pos += m_syntaxParser.SavePos(); 4030 pHints->AddSegment(m_Pos, iTrailerSize); 4031 return FALSE; 4032 } 4033 if (!pTrailer->IsDictionary()) 4034 return FALSE; 4035 4036 CPDF_Dictionary* pTrailerDict = pTrailer->GetDict(); 4037 CPDF_Object* pEncrypt = pTrailerDict->GetElement("Encrypt"); 4038 if (ToReference(pEncrypt)) { 4039 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 4040 return TRUE; 4041 } 4042 4043 FX_DWORD xrefpos = GetDirectInteger(pTrailerDict, "Prev"); 4044 if (xrefpos) { 4045 m_dwPrevXRefOffset = GetDirectInteger(pTrailerDict, "XRefStm"); 4046 if (m_dwPrevXRefOffset) { 4047 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 4048 } else { 4049 m_dwPrevXRefOffset = xrefpos; 4050 if (m_dwPrevXRefOffset >= m_dwFileLen) { 4051 m_docStatus = PDF_DATAAVAIL_LOADALLFILE; 4052 } else { 4053 SetStartOffset(m_dwPrevXRefOffset); 4054 m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND; 4055 } 4056 } 4057 return TRUE; 4058 } 4059 m_dwPrevXRefOffset = 0; 4060 m_docStatus = PDF_DATAAVAIL_TRAILER_APPEND; 4061 return TRUE; 4062 } 4063 pHints->AddSegment(m_Pos, iTrailerSize); 4064 return FALSE; 4065 } 4066 4067 FX_BOOL CPDF_DataAvail::CheckPage(int32_t iPage, IFX_DownloadHints* pHints) { 4068 while (TRUE) { 4069 switch (m_docStatus) { 4070 case PDF_DATAAVAIL_PAGETREE: 4071 if (!LoadDocPages(pHints)) { 4072 return FALSE; 4073 } 4074 break; 4075 case PDF_DATAAVAIL_PAGE: 4076 if (!LoadDocPage(iPage, pHints)) { 4077 return FALSE; 4078 } 4079 break; 4080 case PDF_DATAAVAIL_ERROR: 4081 return LoadAllFile(pHints); 4082 default: 4083 m_bPagesTreeLoad = TRUE; 4084 m_bPagesLoad = TRUE; 4085 m_bCurPageDictLoadOK = TRUE; 4086 m_docStatus = PDF_DATAAVAIL_PAGE; 4087 return TRUE; 4088 } 4089 } 4090 } 4091 FX_BOOL CPDF_DataAvail::CheckArrayPageNode(FX_DWORD dwPageNo, 4092 CPDF_PageNode* pPageNode, 4093 IFX_DownloadHints* pHints) { 4094 FX_BOOL bExist = FALSE; 4095 CPDF_Object* pPages = GetObject(dwPageNo, pHints, &bExist); 4096 if (!bExist) { 4097 m_docStatus = PDF_DATAAVAIL_ERROR; 4098 return FALSE; 4099 } 4100 if (!pPages) { 4101 if (m_docStatus == PDF_DATAAVAIL_ERROR) { 4102 m_docStatus = PDF_DATAAVAIL_ERROR; 4103 return FALSE; 4104 } 4105 return FALSE; 4106 } 4107 4108 CPDF_Array* pArray = pPages->AsArray(); 4109 if (!pArray) { 4110 pPages->Release(); 4111 m_docStatus = PDF_DATAAVAIL_ERROR; 4112 return FALSE; 4113 } 4114 4115 pPageNode->m_type = PDF_PAGENODE_PAGES; 4116 for (FX_DWORD i = 0; i < pArray->GetCount(); ++i) { 4117 CPDF_Reference* pKid = ToReference(pArray->GetElement(i)); 4118 if (!pKid) 4119 continue; 4120 4121 CPDF_PageNode* pNode = new CPDF_PageNode(); 4122 pPageNode->m_childNode.Add(pNode); 4123 pNode->m_dwPageNo = pKid->GetRefObjNum(); 4124 } 4125 pPages->Release(); 4126 return TRUE; 4127 } 4128 FX_BOOL CPDF_DataAvail::CheckUnkownPageNode(FX_DWORD dwPageNo, 4129 CPDF_PageNode* pPageNode, 4130 IFX_DownloadHints* pHints) { 4131 FX_BOOL bExist = FALSE; 4132 CPDF_Object* pPage = GetObject(dwPageNo, pHints, &bExist); 4133 if (!bExist) { 4134 m_docStatus = PDF_DATAAVAIL_ERROR; 4135 return FALSE; 4136 } 4137 if (!pPage) { 4138 if (m_docStatus == PDF_DATAAVAIL_ERROR) { 4139 m_docStatus = PDF_DATAAVAIL_ERROR; 4140 return FALSE; 4141 } 4142 return FALSE; 4143 } 4144 if (pPage->IsArray()) { 4145 pPageNode->m_dwPageNo = dwPageNo; 4146 pPageNode->m_type = PDF_PAGENODE_ARRAY; 4147 pPage->Release(); 4148 return TRUE; 4149 } 4150 if (!pPage->IsDictionary()) { 4151 pPage->Release(); 4152 m_docStatus = PDF_DATAAVAIL_ERROR; 4153 return FALSE; 4154 } 4155 pPageNode->m_dwPageNo = dwPageNo; 4156 CPDF_Dictionary* pDict = pPage->GetDict(); 4157 CFX_ByteString type = pDict->GetString("Type"); 4158 if (type == "Pages") { 4159 pPageNode->m_type = PDF_PAGENODE_PAGES; 4160 CPDF_Object* pKids = pDict->GetElement("Kids"); 4161 if (!pKids) { 4162 m_docStatus = PDF_DATAAVAIL_PAGE; 4163 return TRUE; 4164 } 4165 switch (pKids->GetType()) { 4166 case PDFOBJ_REFERENCE: { 4167 CPDF_Reference* pKid = pKids->AsReference(); 4168 CPDF_PageNode* pNode = new CPDF_PageNode(); 4169 pPageNode->m_childNode.Add(pNode); 4170 pNode->m_dwPageNo = pKid->GetRefObjNum(); 4171 } break; 4172 case PDFOBJ_ARRAY: { 4173 CPDF_Array* pKidsArray = pKids->AsArray(); 4174 for (FX_DWORD i = 0; i < pKidsArray->GetCount(); ++i) { 4175 CPDF_Reference* pKid = ToReference(pKidsArray->GetElement(i)); 4176 if (!pKid) 4177 continue; 4178 4179 CPDF_PageNode* pNode = new CPDF_PageNode(); 4180 pPageNode->m_childNode.Add(pNode); 4181 pNode->m_dwPageNo = pKid->GetRefObjNum(); 4182 } 4183 } break; 4184 default: 4185 break; 4186 } 4187 } else if (type == "Page") { 4188 pPageNode->m_type = PDF_PAGENODE_PAGE; 4189 } else { 4190 pPage->Release(); 4191 m_docStatus = PDF_DATAAVAIL_ERROR; 4192 return FALSE; 4193 } 4194 pPage->Release(); 4195 return TRUE; 4196 } 4197 FX_BOOL CPDF_DataAvail::CheckPageNode(CPDF_PageNode& pageNodes, 4198 int32_t iPage, 4199 int32_t& iCount, 4200 IFX_DownloadHints* pHints, 4201 int level) { 4202 if (level >= kMaxPageRecursionDepth) { 4203 return FALSE; 4204 } 4205 int32_t iSize = pageNodes.m_childNode.GetSize(); 4206 if (iSize <= 0 || iPage >= iSize) { 4207 m_docStatus = PDF_DATAAVAIL_ERROR; 4208 return FALSE; 4209 } 4210 for (int32_t i = 0; i < iSize; ++i) { 4211 CPDF_PageNode* pNode = pageNodes.m_childNode.GetAt(i); 4212 if (!pNode) { 4213 continue; 4214 } 4215 switch (pNode->m_type) { 4216 case PDF_PAGENODE_UNKOWN: 4217 if (!CheckUnkownPageNode(pNode->m_dwPageNo, pNode, pHints)) { 4218 return FALSE; 4219 } 4220 --i; 4221 break; 4222 case PDF_PAGENODE_PAGE: 4223 iCount++; 4224 if (iPage == iCount && m_pDocument) { 4225 m_pDocument->m_PageList.SetAt(iPage, pNode->m_dwPageNo); 4226 } 4227 break; 4228 case PDF_PAGENODE_PAGES: 4229 if (!CheckPageNode(*pNode, iPage, iCount, pHints, level + 1)) { 4230 return FALSE; 4231 } 4232 break; 4233 case PDF_PAGENODE_ARRAY: 4234 if (!CheckArrayPageNode(pNode->m_dwPageNo, pNode, pHints)) { 4235 return FALSE; 4236 } 4237 --i; 4238 break; 4239 } 4240 if (iPage == iCount) { 4241 m_docStatus = PDF_DATAAVAIL_DONE; 4242 return TRUE; 4243 } 4244 } 4245 return TRUE; 4246 } 4247 FX_BOOL CPDF_DataAvail::LoadDocPage(int32_t iPage, IFX_DownloadHints* pHints) { 4248 if (m_pDocument->GetPageCount() <= iPage || 4249 m_pDocument->m_PageList.GetAt(iPage)) { 4250 m_docStatus = PDF_DATAAVAIL_DONE; 4251 return TRUE; 4252 } 4253 if (m_pageNodes.m_type == PDF_PAGENODE_PAGE) { 4254 if (iPage == 0) { 4255 m_docStatus = PDF_DATAAVAIL_DONE; 4256 return TRUE; 4257 } 4258 m_docStatus = PDF_DATAAVAIL_ERROR; 4259 return TRUE; 4260 } 4261 int32_t iCount = -1; 4262 return CheckPageNode(m_pageNodes, iPage, iCount, pHints, 0); 4263 } 4264 FX_BOOL CPDF_DataAvail::CheckPageCount(IFX_DownloadHints* pHints) { 4265 FX_BOOL bExist = FALSE; 4266 CPDF_Object* pPages = GetObject(m_PagesObjNum, pHints, &bExist); 4267 if (!bExist) { 4268 m_docStatus = PDF_DATAAVAIL_ERROR; 4269 return FALSE; 4270 } 4271 if (!pPages) { 4272 return FALSE; 4273 } 4274 CPDF_Dictionary* pPagesDict = pPages->GetDict(); 4275 if (!pPagesDict) { 4276 pPages->Release(); 4277 m_docStatus = PDF_DATAAVAIL_ERROR; 4278 return FALSE; 4279 } 4280 if (!pPagesDict->KeyExist("Kids")) { 4281 pPages->Release(); 4282 return TRUE; 4283 } 4284 int count = pPagesDict->GetInteger("Count"); 4285 if (count > 0) { 4286 pPages->Release(); 4287 return TRUE; 4288 } 4289 pPages->Release(); 4290 return FALSE; 4291 } 4292 FX_BOOL CPDF_DataAvail::LoadDocPages(IFX_DownloadHints* pHints) { 4293 if (!CheckUnkownPageNode(m_PagesObjNum, &m_pageNodes, pHints)) { 4294 return FALSE; 4295 } 4296 if (CheckPageCount(pHints)) { 4297 m_docStatus = PDF_DATAAVAIL_PAGE; 4298 return TRUE; 4299 } 4300 m_bTotalLoadPageTree = TRUE; 4301 return FALSE; 4302 } 4303 FX_BOOL CPDF_DataAvail::LoadPages(IFX_DownloadHints* pHints) { 4304 while (!m_bPagesTreeLoad) { 4305 if (!CheckPageStatus(pHints)) { 4306 return FALSE; 4307 } 4308 } 4309 if (m_bPagesLoad) { 4310 return TRUE; 4311 } 4312 m_pDocument->LoadPages(); 4313 return FALSE; 4314 } 4315 IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedData( 4316 IFX_DownloadHints* pHints) { 4317 if (m_bLinearedDataOK) { 4318 return DataAvailable; 4319 } 4320 4321 if (!m_bMainXRefLoadTried) { 4322 FX_SAFE_DWORD data_size = m_dwFileLen; 4323 data_size -= m_dwLastXRefOffset; 4324 if (!data_size.IsValid()) { 4325 return DataError; 4326 } 4327 if (!m_pFileAvail->IsDataAvail(m_dwLastXRefOffset, 4328 data_size.ValueOrDie())) { 4329 pHints->AddSegment(m_dwLastXRefOffset, data_size.ValueOrDie()); 4330 return DataNotAvailable; 4331 } 4332 FX_DWORD dwRet = m_pDocument->GetParser()->LoadLinearizedMainXRefTable(); 4333 m_bMainXRefLoadTried = TRUE; 4334 if (dwRet != PDFPARSE_ERROR_SUCCESS) { 4335 return DataError; 4336 } 4337 if (!PreparePageItem()) { 4338 return DataNotAvailable; 4339 } 4340 m_bMainXRefLoadedOK = TRUE; 4341 m_bLinearedDataOK = TRUE; 4342 } 4343 4344 return m_bLinearedDataOK ? DataAvailable : DataNotAvailable; 4345 } 4346 FX_BOOL CPDF_DataAvail::CheckPageAnnots(int32_t iPage, 4347 IFX_DownloadHints* pHints) { 4348 if (!m_objs_array.GetSize()) { 4349 m_objs_array.RemoveAll(); 4350 m_ObjectSet.clear(); 4351 CPDF_Dictionary* pPageDict = m_pDocument->GetPage(iPage); 4352 if (!pPageDict) { 4353 return TRUE; 4354 } 4355 CPDF_Object* pAnnots = pPageDict->GetElement("Annots"); 4356 if (!pAnnots) { 4357 return TRUE; 4358 } 4359 CFX_ArrayTemplate<CPDF_Object*> obj_array; 4360 obj_array.Add(pAnnots); 4361 FX_BOOL bRet = IsObjectsAvail(obj_array, FALSE, pHints, m_objs_array); 4362 if (bRet) { 4363 m_objs_array.RemoveAll(); 4364 } 4365 return bRet; 4366 } 4367 CFX_ArrayTemplate<CPDF_Object*> new_objs_array; 4368 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); 4369 m_objs_array.RemoveAll(); 4370 if (!bRet) { 4371 m_objs_array.Append(new_objs_array); 4372 } 4373 return bRet; 4374 } 4375 IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::CheckLinearizedFirstPage( 4376 int32_t iPage, 4377 IFX_DownloadHints* pHints) { 4378 if (!m_bAnnotsLoad) { 4379 if (!CheckPageAnnots(iPage, pHints)) { 4380 return DataNotAvailable; 4381 } 4382 m_bAnnotsLoad = TRUE; 4383 } 4384 4385 DocAvailStatus nRet = CheckLinearizedData(pHints); 4386 if (nRet == DataAvailable) 4387 m_bPageLoadedOK = FALSE; 4388 return nRet; 4389 } 4390 FX_BOOL CPDF_DataAvail::HaveResourceAncestor(CPDF_Dictionary* pDict) { 4391 CFX_AutoRestorer<int> restorer(&s_CurrentDataAvailRecursionDepth); 4392 if (++s_CurrentDataAvailRecursionDepth > kMaxDataAvailRecursionDepth) { 4393 return FALSE; 4394 } 4395 CPDF_Object* pParent = pDict->GetElement("Parent"); 4396 if (!pParent) { 4397 return FALSE; 4398 } 4399 CPDF_Dictionary* pParentDict = pParent->GetDict(); 4400 if (!pParentDict) { 4401 return FALSE; 4402 } 4403 CPDF_Object* pRet = pParentDict->GetElement("Resources"); 4404 if (pRet) { 4405 m_pPageResource = pRet; 4406 return TRUE; 4407 } 4408 return HaveResourceAncestor(pParentDict); 4409 } 4410 IPDF_DataAvail::DocAvailStatus CPDF_DataAvail::IsPageAvail( 4411 int32_t iPage, 4412 IFX_DownloadHints* pHints) { 4413 if (!m_pDocument) { 4414 return DataError; 4415 } 4416 if (IsFirstCheck(iPage)) { 4417 m_bCurPageDictLoadOK = FALSE; 4418 m_bPageLoadedOK = FALSE; 4419 m_bAnnotsLoad = FALSE; 4420 m_bNeedDownLoadResource = FALSE; 4421 m_objs_array.RemoveAll(); 4422 m_ObjectSet.clear(); 4423 } 4424 if (pdfium::ContainsKey(m_pagesLoadState, iPage)) 4425 return DataAvailable; 4426 4427 if (m_bLinearized) { 4428 if ((FX_DWORD)iPage == m_dwFirstPageNo) { 4429 DocAvailStatus nRet = CheckLinearizedFirstPage(iPage, pHints); 4430 if (nRet == DataAvailable) 4431 m_pagesLoadState.insert(iPage); 4432 return nRet; 4433 } 4434 DocAvailStatus nResult = CheckLinearizedData(pHints); 4435 if (nResult != DataAvailable) { 4436 return nResult; 4437 } 4438 if (m_pHintTables) { 4439 nResult = m_pHintTables->CheckPage(iPage, pHints); 4440 if (nResult != DataAvailable) 4441 return nResult; 4442 m_pagesLoadState.insert(iPage); 4443 return DataAvailable; 4444 } 4445 if (m_bMainXRefLoadedOK) { 4446 if (m_bTotalLoadPageTree) { 4447 if (!LoadPages(pHints)) { 4448 return DataNotAvailable; 4449 } 4450 } else { 4451 if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) { 4452 return DataNotAvailable; 4453 } 4454 } 4455 } else { 4456 if (!LoadAllFile(pHints)) { 4457 return DataNotAvailable; 4458 } 4459 ((CPDF_Parser*)m_pDocument->GetParser())->RebuildCrossRef(); 4460 ResetFirstCheck(iPage); 4461 return DataAvailable; 4462 } 4463 } else { 4464 if (!m_bTotalLoadPageTree) { 4465 if (!m_bCurPageDictLoadOK && !CheckPage(iPage, pHints)) { 4466 return DataNotAvailable; 4467 } 4468 } 4469 } 4470 if (m_bHaveAcroForm && !m_bAcroFormLoad) { 4471 if (!CheckAcroFormSubObject(pHints)) { 4472 return DataNotAvailable; 4473 } 4474 m_bAcroFormLoad = TRUE; 4475 } 4476 if (!m_bPageLoadedOK) { 4477 if (!m_objs_array.GetSize()) { 4478 m_objs_array.RemoveAll(); 4479 m_ObjectSet.clear(); 4480 m_pPageDict = m_pDocument->GetPage(iPage); 4481 if (!m_pPageDict) { 4482 ResetFirstCheck(iPage); 4483 return DataAvailable; 4484 } 4485 CFX_ArrayTemplate<CPDF_Object*> obj_array; 4486 obj_array.Add(m_pPageDict); 4487 FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array); 4488 if (!bRet) 4489 return DataNotAvailable; 4490 4491 m_objs_array.RemoveAll(); 4492 } else { 4493 CFX_ArrayTemplate<CPDF_Object*> new_objs_array; 4494 FX_BOOL bRet = 4495 IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); 4496 m_objs_array.RemoveAll(); 4497 if (!bRet) { 4498 m_objs_array.Append(new_objs_array); 4499 return DataNotAvailable; 4500 } 4501 } 4502 m_bPageLoadedOK = TRUE; 4503 } 4504 4505 if (!m_bAnnotsLoad) { 4506 if (!CheckPageAnnots(iPage, pHints)) { 4507 return DataNotAvailable; 4508 } 4509 m_bAnnotsLoad = TRUE; 4510 } 4511 4512 if (m_pPageDict && !m_bNeedDownLoadResource) { 4513 m_pPageResource = m_pPageDict->GetElement("Resources"); 4514 if (!m_pPageResource) { 4515 m_bNeedDownLoadResource = HaveResourceAncestor(m_pPageDict); 4516 } else { 4517 m_bNeedDownLoadResource = TRUE; 4518 } 4519 } 4520 if (m_bNeedDownLoadResource) { 4521 FX_BOOL bRet = CheckResources(pHints); 4522 if (!bRet) { 4523 return DataNotAvailable; 4524 } 4525 m_bNeedDownLoadResource = FALSE; 4526 } 4527 m_bPageLoadedOK = FALSE; 4528 m_bAnnotsLoad = FALSE; 4529 m_bCurPageDictLoadOK = FALSE; 4530 ResetFirstCheck(iPage); 4531 m_pagesLoadState.insert(iPage); 4532 return DataAvailable; 4533 } 4534 FX_BOOL CPDF_DataAvail::CheckResources(IFX_DownloadHints* pHints) { 4535 if (!m_objs_array.GetSize()) { 4536 m_objs_array.RemoveAll(); 4537 CFX_ArrayTemplate<CPDF_Object*> obj_array; 4538 obj_array.Add(m_pPageResource); 4539 FX_BOOL bRet = IsObjectsAvail(obj_array, TRUE, pHints, m_objs_array); 4540 if (bRet) { 4541 m_objs_array.RemoveAll(); 4542 } 4543 return bRet; 4544 } 4545 CFX_ArrayTemplate<CPDF_Object*> new_objs_array; 4546 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); 4547 m_objs_array.RemoveAll(); 4548 if (!bRet) { 4549 m_objs_array.Append(new_objs_array); 4550 } 4551 return bRet; 4552 } 4553 void CPDF_DataAvail::GetLinearizedMainXRefInfo(FX_FILESIZE* pPos, 4554 FX_DWORD* pSize) { 4555 if (pPos) { 4556 *pPos = m_dwLastXRefOffset; 4557 } 4558 if (pSize) { 4559 *pSize = (FX_DWORD)(m_dwFileLen - m_dwLastXRefOffset); 4560 } 4561 } 4562 int CPDF_DataAvail::GetPageCount() const { 4563 if (m_pLinearized) { 4564 CPDF_Dictionary* pDict = m_pLinearized->GetDict(); 4565 CPDF_Object* pObj = pDict ? pDict->GetElementValue("N") : nullptr; 4566 return pObj ? pObj->GetInteger() : 0; 4567 } 4568 return m_pDocument ? m_pDocument->GetPageCount() : 0; 4569 } 4570 CPDF_Dictionary* CPDF_DataAvail::GetPage(int index) { 4571 if (!m_pDocument || index < 0 || index >= this->GetPageCount()) { 4572 return nullptr; 4573 } 4574 if (m_pLinearized) { 4575 CPDF_Dictionary* pDict = m_pLinearized->GetDict(); 4576 CPDF_Object* pObj = pDict ? pDict->GetElementValue("P") : nullptr; 4577 int pageNum = pObj ? pObj->GetInteger() : 0; 4578 if (m_pHintTables && index != pageNum) { 4579 FX_FILESIZE szPageStartPos = 0; 4580 FX_FILESIZE szPageLength = 0; 4581 FX_DWORD dwObjNum = 0; 4582 FX_BOOL bPagePosGot = m_pHintTables->GetPagePos(index, szPageStartPos, 4583 szPageLength, dwObjNum); 4584 if (!bPagePosGot) { 4585 return nullptr; 4586 } 4587 m_syntaxParser.InitParser(m_pFileRead, (FX_DWORD)szPageStartPos); 4588 CPDF_Object* pPageDict = ParseIndirectObjectAt(0, dwObjNum, m_pDocument); 4589 if (!pPageDict) { 4590 return nullptr; 4591 } 4592 if (!m_pDocument->InsertIndirectObject(dwObjNum, pPageDict)) 4593 return nullptr; 4594 return pPageDict->GetDict(); 4595 } 4596 } 4597 return m_pDocument->GetPage(index); 4598 } 4599 IPDF_DataAvail::DocFormStatus CPDF_DataAvail::IsFormAvail( 4600 IFX_DownloadHints* pHints) { 4601 if (!m_pDocument) { 4602 return FormAvailable; 4603 } 4604 if (!m_bLinearizedFormParamLoad) { 4605 CPDF_Dictionary* pRoot = m_pDocument->GetRoot(); 4606 if (!pRoot) { 4607 return FormAvailable; 4608 } 4609 CPDF_Object* pAcroForm = pRoot->GetElement("AcroForm"); 4610 if (!pAcroForm) { 4611 return FormNotExist; 4612 } 4613 DocAvailStatus nDocStatus = CheckLinearizedData(pHints); 4614 if (nDocStatus == DataError) 4615 return FormError; 4616 if (nDocStatus == DataNotAvailable) 4617 return FormNotAvailable; 4618 4619 if (!m_objs_array.GetSize()) { 4620 m_objs_array.Add(pAcroForm->GetDict()); 4621 } 4622 m_bLinearizedFormParamLoad = TRUE; 4623 } 4624 CFX_ArrayTemplate<CPDF_Object*> new_objs_array; 4625 FX_BOOL bRet = IsObjectsAvail(m_objs_array, FALSE, pHints, new_objs_array); 4626 m_objs_array.RemoveAll(); 4627 if (!bRet) { 4628 m_objs_array.Append(new_objs_array); 4629 return FormNotAvailable; 4630 } 4631 return FormAvailable; 4632 } 4633 4634 CPDF_PageNode::~CPDF_PageNode() { 4635 for (int32_t i = 0; i < m_childNode.GetSize(); ++i) { 4636 delete m_childNode[i]; 4637 } 4638 m_childNode.RemoveAll(); 4639 } 4640 CPDF_HintTables::~CPDF_HintTables() { 4641 m_dwDeltaNObjsArray.RemoveAll(); 4642 m_dwNSharedObjsArray.RemoveAll(); 4643 m_dwSharedObjNumArray.RemoveAll(); 4644 m_dwIdentifierArray.RemoveAll(); 4645 m_szPageOffsetArray.RemoveAll(); 4646 m_szSharedObjOffsetArray.RemoveAll(); 4647 } 4648 FX_DWORD CPDF_HintTables::GetItemLength(int index, 4649 const CFX_FileSizeArray& szArray) { 4650 if (index < 0 || szArray.GetSize() < 2 || index > szArray.GetSize() - 2 || 4651 szArray[index] > szArray[index + 1]) 4652 return 0; 4653 return szArray[index + 1] - szArray[index]; 4654 } 4655 FX_BOOL CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) { 4656 if (!hStream || hStream->IsEOF()) 4657 return FALSE; 4658 int nStreamOffset = ReadPrimaryHintStreamOffset(); 4659 int nStreamLen = ReadPrimaryHintStreamLength(); 4660 if (nStreamOffset < 0 || nStreamLen < 1) 4661 return FALSE; 4662 4663 const FX_DWORD kHeaderSize = 288; 4664 if (hStream->BitsRemaining() < kHeaderSize) 4665 return FALSE; 4666 // Item 1: The least number of objects in a page. 4667 FX_DWORD dwObjLeastNum = hStream->GetBits(32); 4668 // Item 2: The location of the first page's page object. 4669 FX_DWORD dwFirstObjLoc = hStream->GetBits(32); 4670 if (dwFirstObjLoc > nStreamOffset) { 4671 FX_SAFE_DWORD safeLoc = pdfium::base::checked_cast<FX_DWORD>(nStreamLen); 4672 safeLoc += dwFirstObjLoc; 4673 if (!safeLoc.IsValid()) 4674 return FALSE; 4675 m_szFirstPageObjOffset = 4676 pdfium::base::checked_cast<FX_FILESIZE>(safeLoc.ValueOrDie()); 4677 } else { 4678 m_szFirstPageObjOffset = 4679 pdfium::base::checked_cast<FX_FILESIZE>(dwFirstObjLoc); 4680 } 4681 // Item 3: The number of bits needed to represent the difference 4682 // between the greatest and least number of objects in a page. 4683 FX_DWORD dwDeltaObjectsBits = hStream->GetBits(16); 4684 // Item 4: The least length of a page in bytes. 4685 FX_DWORD dwPageLeastLen = hStream->GetBits(32); 4686 // Item 5: The number of bits needed to represent the difference 4687 // between the greatest and least length of a page, in bytes. 4688 FX_DWORD dwDeltaPageLenBits = hStream->GetBits(16); 4689 // Skip Item 6, 7, 8, 9 total 96 bits. 4690 hStream->SkipBits(96); 4691 // Item 10: The number of bits needed to represent the greatest 4692 // number of shared object references. 4693 FX_DWORD dwSharedObjBits = hStream->GetBits(16); 4694 // Item 11: The number of bits needed to represent the numerically 4695 // greatest shared object identifier used by the pages. 4696 FX_DWORD dwSharedIdBits = hStream->GetBits(16); 4697 // Item 12: The number of bits needed to represent the numerator of 4698 // the fractional position for each shared object reference. For each 4699 // shared object referenced from a page, there is an indication of 4700 // where in the page's content stream the object is first referenced. 4701 FX_DWORD dwSharedNumeratorBits = hStream->GetBits(16); 4702 // Item 13: Skip Item 13 which has 16 bits. 4703 hStream->SkipBits(16); 4704 CPDF_Object* pPageNum = m_pLinearizedDict->GetElementValue("N"); 4705 int nPages = pPageNum ? pPageNum->GetInteger() : 0; 4706 if (nPages < 1) 4707 return FALSE; 4708 4709 FX_SAFE_DWORD required_bits = dwDeltaObjectsBits; 4710 required_bits *= pdfium::base::checked_cast<FX_DWORD>(nPages); 4711 if (!CanReadFromBitStream(hStream, required_bits)) 4712 return FALSE; 4713 for (int i = 0; i < nPages; ++i) { 4714 FX_SAFE_DWORD safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits); 4715 safeDeltaObj += dwObjLeastNum; 4716 if (!safeDeltaObj.IsValid()) 4717 return FALSE; 4718 m_dwDeltaNObjsArray.Add(safeDeltaObj.ValueOrDie()); 4719 } 4720 hStream->ByteAlign(); 4721 4722 required_bits = dwDeltaPageLenBits; 4723 required_bits *= pdfium::base::checked_cast<FX_DWORD>(nPages); 4724 if (!CanReadFromBitStream(hStream, required_bits)) 4725 return FALSE; 4726 CFX_DWordArray dwPageLenArray; 4727 for (int i = 0; i < nPages; ++i) { 4728 FX_SAFE_DWORD safePageLen = hStream->GetBits(dwDeltaPageLenBits); 4729 safePageLen += dwPageLeastLen; 4730 if (!safePageLen.IsValid()) 4731 return FALSE; 4732 dwPageLenArray.Add(safePageLen.ValueOrDie()); 4733 } 4734 CPDF_Object* pOffsetE = m_pLinearizedDict->GetElementValue("E"); 4735 int nOffsetE = pOffsetE ? pOffsetE->GetInteger() : -1; 4736 if (nOffsetE < 0) 4737 return FALSE; 4738 CPDF_Object* pFirstPageNum = m_pLinearizedDict->GetElementValue("P"); 4739 int nFirstPageNum = pFirstPageNum ? pFirstPageNum->GetInteger() : 0; 4740 for (int i = 0; i < nPages; ++i) { 4741 if (i == nFirstPageNum) { 4742 m_szPageOffsetArray.Add(m_szFirstPageObjOffset); 4743 } else if (i == nFirstPageNum + 1) { 4744 if (i == 1) { 4745 m_szPageOffsetArray.Add(nOffsetE); 4746 } else { 4747 m_szPageOffsetArray.Add(m_szPageOffsetArray[i - 2] + 4748 dwPageLenArray[i - 2]); 4749 } 4750 } else { 4751 if (i == 0) { 4752 m_szPageOffsetArray.Add(nOffsetE); 4753 } else { 4754 m_szPageOffsetArray.Add(m_szPageOffsetArray[i - 1] + 4755 dwPageLenArray[i - 1]); 4756 } 4757 } 4758 } 4759 if (nPages > 0) { 4760 m_szPageOffsetArray.Add(m_szPageOffsetArray[nPages - 1] + 4761 dwPageLenArray[nPages - 1]); 4762 } 4763 hStream->ByteAlign(); 4764 4765 // number of shared objects 4766 required_bits = dwSharedObjBits; 4767 required_bits *= pdfium::base::checked_cast<FX_DWORD>(nPages); 4768 if (!CanReadFromBitStream(hStream, required_bits)) 4769 return FALSE; 4770 for (int i = 0; i < nPages; i++) { 4771 m_dwNSharedObjsArray.Add(hStream->GetBits(dwSharedObjBits)); 4772 } 4773 hStream->ByteAlign(); 4774 4775 // array of identifier, sizes = nshared_objects 4776 for (int i = 0; i < nPages; i++) { 4777 required_bits = dwSharedIdBits; 4778 required_bits *= m_dwNSharedObjsArray[i]; 4779 if (!CanReadFromBitStream(hStream, required_bits)) 4780 return FALSE; 4781 for (int j = 0; j < m_dwNSharedObjsArray[i]; j++) { 4782 m_dwIdentifierArray.Add(hStream->GetBits(dwSharedIdBits)); 4783 } 4784 } 4785 hStream->ByteAlign(); 4786 4787 for (int i = 0; i < nPages; i++) { 4788 FX_SAFE_DWORD safeSize = m_dwNSharedObjsArray[i]; 4789 safeSize *= dwSharedNumeratorBits; 4790 if (!CanReadFromBitStream(hStream, safeSize)) 4791 return FALSE; 4792 hStream->SkipBits(safeSize.ValueOrDie()); 4793 } 4794 hStream->ByteAlign(); 4795 4796 FX_SAFE_DWORD safeTotalPageLen = pdfium::base::checked_cast<FX_DWORD>(nPages); 4797 safeTotalPageLen *= dwDeltaPageLenBits; 4798 if (!CanReadFromBitStream(hStream, safeTotalPageLen)) 4799 return FALSE; 4800 hStream->SkipBits(safeTotalPageLen.ValueOrDie()); 4801 hStream->ByteAlign(); 4802 return TRUE; 4803 } 4804 FX_BOOL CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream, 4805 FX_DWORD offset) { 4806 if (!hStream || hStream->IsEOF()) 4807 return FALSE; 4808 int nStreamOffset = ReadPrimaryHintStreamOffset(); 4809 int nStreamLen = ReadPrimaryHintStreamLength(); 4810 if (nStreamOffset < 0 || nStreamLen < 1) 4811 return FALSE; 4812 4813 FX_SAFE_DWORD bit_offset = offset; 4814 bit_offset *= 8; 4815 if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie()) 4816 return FALSE; 4817 hStream->SkipBits(bit_offset.ValueOrDie() - hStream->GetPos()); 4818 4819 const FX_DWORD kHeaderSize = 192; 4820 if (hStream->BitsRemaining() < kHeaderSize) 4821 return FALSE; 4822 // Item 1: The object number of the first object in the shared objects 4823 // section. 4824 FX_DWORD dwFirstSharedObjNum = hStream->GetBits(32); 4825 // Item 2: The location of the first object in the shared objects section. 4826 FX_DWORD dwFirstSharedObjLoc = hStream->GetBits(32); 4827 if (dwFirstSharedObjLoc > nStreamOffset) 4828 dwFirstSharedObjLoc += nStreamLen; 4829 // Item 3: The number of shared object entries for the first page. 4830 m_nFirstPageSharedObjs = hStream->GetBits(32); 4831 // Item 4: The number of shared object entries for the shared objects 4832 // section, including the number of shared object entries for the first page. 4833 FX_DWORD dwSharedObjTotal = hStream->GetBits(32); 4834 // Item 5: The number of bits needed to represent the greatest number of 4835 // objects in a shared object group. Skipped. 4836 hStream->SkipBits(16); 4837 // Item 6: The least length of a shared object group in bytes. 4838 FX_DWORD dwGroupLeastLen = hStream->GetBits(32); 4839 // Item 7: The number of bits needed to represent the difference between the 4840 // greatest and least length of a shared object group, in bytes. 4841 FX_DWORD dwDeltaGroupLen = hStream->GetBits(16); 4842 CPDF_Object* pFirstPageObj = m_pLinearizedDict->GetElementValue("O"); 4843 int nFirstPageObjNum = pFirstPageObj ? pFirstPageObj->GetInteger() : -1; 4844 if (nFirstPageObjNum < 0) 4845 return FALSE; 4846 FX_DWORD dwPrevObjLen = 0; 4847 FX_DWORD dwCurObjLen = 0; 4848 FX_SAFE_DWORD required_bits = dwSharedObjTotal; 4849 required_bits *= dwDeltaGroupLen; 4850 if (!CanReadFromBitStream(hStream, required_bits)) 4851 return FALSE; 4852 4853 for (int i = 0; i < dwSharedObjTotal; ++i) { 4854 dwPrevObjLen = dwCurObjLen; 4855 FX_SAFE_DWORD safeObjLen = hStream->GetBits(dwDeltaGroupLen); 4856 safeObjLen += dwGroupLeastLen; 4857 if (!safeObjLen.IsValid()) 4858 return FALSE; 4859 dwCurObjLen = safeObjLen.ValueOrDie(); 4860 if (i < m_nFirstPageSharedObjs) { 4861 m_dwSharedObjNumArray.Add(nFirstPageObjNum + i); 4862 if (i == 0) 4863 m_szSharedObjOffsetArray.Add(m_szFirstPageObjOffset); 4864 } else { 4865 FX_SAFE_DWORD safeObjNum = dwFirstSharedObjNum; 4866 safeObjNum += i - m_nFirstPageSharedObjs; 4867 if (!safeObjNum.IsValid()) 4868 return FALSE; 4869 m_dwSharedObjNumArray.Add(safeObjNum.ValueOrDie()); 4870 if (i == m_nFirstPageSharedObjs) 4871 m_szSharedObjOffsetArray.Add( 4872 pdfium::base::checked_cast<int32_t>(dwFirstSharedObjLoc)); 4873 } 4874 if (i != 0 && i != m_nFirstPageSharedObjs) { 4875 FX_SAFE_INT32 safeLoc = pdfium::base::checked_cast<int32_t>(dwPrevObjLen); 4876 safeLoc += m_szSharedObjOffsetArray[i - 1]; 4877 if (!safeLoc.IsValid()) 4878 return FALSE; 4879 m_szSharedObjOffsetArray.Add(safeLoc.ValueOrDie()); 4880 } 4881 } 4882 if (dwSharedObjTotal > 0) { 4883 FX_SAFE_INT32 safeLoc = pdfium::base::checked_cast<int32_t>(dwCurObjLen); 4884 safeLoc += m_szSharedObjOffsetArray[dwSharedObjTotal - 1]; 4885 if (!safeLoc.IsValid()) 4886 return FALSE; 4887 m_szSharedObjOffsetArray.Add(safeLoc.ValueOrDie()); 4888 } 4889 hStream->ByteAlign(); 4890 if (hStream->BitsRemaining() < dwSharedObjTotal) 4891 return FALSE; 4892 hStream->SkipBits(dwSharedObjTotal); 4893 hStream->ByteAlign(); 4894 return TRUE; 4895 } 4896 FX_BOOL CPDF_HintTables::GetPagePos(int index, 4897 FX_FILESIZE& szPageStartPos, 4898 FX_FILESIZE& szPageLength, 4899 FX_DWORD& dwObjNum) { 4900 if (!m_pLinearizedDict) 4901 return FALSE; 4902 szPageStartPos = m_szPageOffsetArray[index]; 4903 szPageLength = GetItemLength(index, m_szPageOffsetArray); 4904 CPDF_Object* pFirstPageNum = m_pLinearizedDict->GetElementValue("P"); 4905 int nFirstPageNum = pFirstPageNum ? pFirstPageNum->GetInteger() : 0; 4906 CPDF_Object* pFirstPageObjNum = m_pLinearizedDict->GetElementValue("O"); 4907 if (!pFirstPageObjNum) 4908 return FALSE; 4909 int nFirstPageObjNum = pFirstPageObjNum->GetInteger(); 4910 if (index == nFirstPageNum) { 4911 dwObjNum = nFirstPageObjNum; 4912 return TRUE; 4913 } 4914 // The object number of remaining pages starts from 1. 4915 dwObjNum = 1; 4916 for (int i = 0; i < index; ++i) { 4917 if (i == nFirstPageNum) 4918 continue; 4919 dwObjNum += m_dwDeltaNObjsArray[i]; 4920 } 4921 return TRUE; 4922 } 4923 IPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage( 4924 int index, 4925 IFX_DownloadHints* pHints) { 4926 if (!m_pLinearizedDict || !pHints) 4927 return IPDF_DataAvail::DataError; 4928 CPDF_Object* pFirstAvailPage = m_pLinearizedDict->GetElementValue("P"); 4929 int nFirstAvailPage = pFirstAvailPage ? pFirstAvailPage->GetInteger() : 0; 4930 if (index == nFirstAvailPage) 4931 return IPDF_DataAvail::DataAvailable; 4932 FX_DWORD dwLength = GetItemLength(index, m_szPageOffsetArray); 4933 // If two pages have the same offset, it should be treated as an error. 4934 if (!dwLength) 4935 return IPDF_DataAvail::DataError; 4936 if (!m_pDataAvail->IsDataAvail(m_szPageOffsetArray[index], dwLength, pHints)) 4937 return IPDF_DataAvail::DataNotAvailable; 4938 // Download data of shared objects in the page. 4939 FX_DWORD offset = 0; 4940 for (int i = 0; i < index; ++i) { 4941 offset += m_dwNSharedObjsArray[i]; 4942 } 4943 CPDF_Object* pFirstPageObj = m_pLinearizedDict->GetElementValue("O"); 4944 int nFirstPageObjNum = pFirstPageObj ? pFirstPageObj->GetInteger() : -1; 4945 if (nFirstPageObjNum < 0) 4946 return IPDF_DataAvail::DataError; 4947 FX_DWORD dwIndex = 0; 4948 FX_DWORD dwObjNum = 0; 4949 for (int j = 0; j < m_dwNSharedObjsArray[index]; ++j) { 4950 dwIndex = m_dwIdentifierArray[offset + j]; 4951 if (dwIndex >= m_dwSharedObjNumArray.GetSize()) 4952 return IPDF_DataAvail::DataNotAvailable; 4953 dwObjNum = m_dwSharedObjNumArray[dwIndex]; 4954 if (dwObjNum >= nFirstPageObjNum && 4955 dwObjNum < nFirstPageObjNum + m_nFirstPageSharedObjs) { 4956 continue; 4957 } 4958 dwLength = GetItemLength(dwIndex, m_szSharedObjOffsetArray); 4959 // If two objects have the same offset, it should be treated as an error. 4960 if (!dwLength) 4961 return IPDF_DataAvail::DataError; 4962 if (!m_pDataAvail->IsDataAvail(m_szSharedObjOffsetArray[dwIndex], dwLength, 4963 pHints)) { 4964 return IPDF_DataAvail::DataNotAvailable; 4965 } 4966 } 4967 return IPDF_DataAvail::DataAvailable; 4968 } 4969 4970 FX_BOOL CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) { 4971 if (!pHintStream || !m_pLinearizedDict) 4972 return FALSE; 4973 CPDF_Dictionary* pDict = pHintStream->GetDict(); 4974 CPDF_Object* pOffset = pDict ? pDict->GetElement("S") : nullptr; 4975 if (!pOffset || pOffset->GetType() != PDFOBJ_NUMBER) 4976 return FALSE; 4977 int shared_hint_table_offset = pOffset->GetInteger(); 4978 CPDF_StreamAcc acc; 4979 acc.LoadAllData(pHintStream); 4980 FX_DWORD size = acc.GetSize(); 4981 // The header section of page offset hint table is 36 bytes. 4982 // The header section of shared object hint table is 24 bytes. 4983 // Hint table has at least 60 bytes. 4984 const FX_DWORD MIN_STREAM_LEN = 60; 4985 if (size < MIN_STREAM_LEN || shared_hint_table_offset <= 0 || 4986 size < shared_hint_table_offset) { 4987 return FALSE; 4988 } 4989 CFX_BitStream bs; 4990 bs.Init(acc.GetData(), size); 4991 return ReadPageHintTable(&bs) && 4992 ReadSharedObjHintTable(&bs, pdfium::base::checked_cast<FX_DWORD>( 4993 shared_hint_table_offset)); 4994 } 4995 4996 int CPDF_HintTables::ReadPrimaryHintStreamOffset() const { 4997 if (!m_pLinearizedDict) 4998 return -1; 4999 CPDF_Array* pRange = m_pLinearizedDict->GetArray("H"); 5000 if (!pRange) 5001 return -1; 5002 CPDF_Object* pStreamOffset = pRange->GetElementValue(0); 5003 if (!pStreamOffset) 5004 return -1; 5005 return pStreamOffset->GetInteger(); 5006 } 5007 int CPDF_HintTables::ReadPrimaryHintStreamLength() const { 5008 if (!m_pLinearizedDict) 5009 return -1; 5010 CPDF_Array* pRange = m_pLinearizedDict->GetArray("H"); 5011 if (!pRange) 5012 return -1; 5013 CPDF_Object* pStreamLen = pRange->GetElementValue(1); 5014 if (!pStreamLen) 5015 return -1; 5016 return pStreamLen->GetInteger(); 5017 } 5018