1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfapi/parser/cpdf_parser.h" 8 9 #include <algorithm> 10 #include <utility> 11 #include <vector> 12 13 #include "core/fpdfapi/parser/cpdf_array.h" 14 #include "core/fpdfapi/parser/cpdf_crypto_handler.h" 15 #include "core/fpdfapi/parser/cpdf_dictionary.h" 16 #include "core/fpdfapi/parser/cpdf_document.h" 17 #include "core/fpdfapi/parser/cpdf_linearized_header.h" 18 #include "core/fpdfapi/parser/cpdf_number.h" 19 #include "core/fpdfapi/parser/cpdf_reference.h" 20 #include "core/fpdfapi/parser/cpdf_security_handler.h" 21 #include "core/fpdfapi/parser/cpdf_stream.h" 22 #include "core/fpdfapi/parser/cpdf_stream_acc.h" 23 #include "core/fpdfapi/parser/cpdf_syntax_parser.h" 24 #include "core/fpdfapi/parser/fpdf_parser_utility.h" 25 #include "core/fxcrt/fx_ext.h" 26 #include "core/fxcrt/fx_safe_types.h" 27 #include "third_party/base/ptr_util.h" 28 #include "third_party/base/stl_util.h" 29 30 namespace { 31 32 // A limit on the size of the xref table. Theoretical limits are higher, but 33 // this may be large enough in practice. 34 const int32_t kMaxXRefSize = 1048576; 35 36 uint32_t GetVarInt(const uint8_t* p, int32_t n) { 37 uint32_t result = 0; 38 for (int32_t i = 0; i < n; ++i) 39 result = result * 256 + p[i]; 40 return result; 41 } 42 43 int32_t GetStreamNCount(CPDF_StreamAcc* pObjStream) { 44 return pObjStream->GetDict()->GetIntegerFor("N"); 45 } 46 47 int32_t GetStreamFirst(CPDF_StreamAcc* pObjStream) { 48 return pObjStream->GetDict()->GetIntegerFor("First"); 49 } 50 51 } // namespace 52 53 CPDF_Parser::CPDF_Parser() 54 : m_pDocument(nullptr), 55 m_bHasParsed(false), 56 m_bXRefStream(false), 57 m_bVersionUpdated(false), 58 m_FileVersion(0), 59 m_pEncryptDict(nullptr), 60 m_dwXrefStartObjNum(0) { 61 m_pSyntax = pdfium::MakeUnique<CPDF_SyntaxParser>(); 62 } 63 64 CPDF_Parser::~CPDF_Parser() { 65 ReleaseEncryptHandler(); 66 SetEncryptDictionary(nullptr); 67 } 68 69 uint32_t CPDF_Parser::GetLastObjNum() const { 70 return m_ObjectInfo.empty() ? 0 : m_ObjectInfo.rbegin()->first; 71 } 72 73 bool CPDF_Parser::IsValidObjectNumber(uint32_t objnum) const { 74 return !m_ObjectInfo.empty() && objnum <= m_ObjectInfo.rbegin()->first; 75 } 76 77 FX_FILESIZE CPDF_Parser::GetObjectPositionOrZero(uint32_t objnum) const { 78 auto it = m_ObjectInfo.find(objnum); 79 return it != m_ObjectInfo.end() ? it->second.pos : 0; 80 } 81 82 uint8_t CPDF_Parser::GetObjectType(uint32_t objnum) const { 83 ASSERT(IsValidObjectNumber(objnum)); 84 auto it = m_ObjectInfo.find(objnum); 85 return it != m_ObjectInfo.end() ? it->second.type : 0; 86 } 87 88 uint16_t CPDF_Parser::GetObjectGenNum(uint32_t objnum) const { 89 ASSERT(IsValidObjectNumber(objnum)); 90 auto it = m_ObjectInfo.find(objnum); 91 return it != m_ObjectInfo.end() ? it->second.gennum : 0; 92 } 93 94 bool CPDF_Parser::IsObjectFreeOrNull(uint32_t objnum) const { 95 uint8_t type = GetObjectType(objnum); 96 return type == 0 || type == 255; 97 } 98 99 void CPDF_Parser::SetEncryptDictionary(CPDF_Dictionary* pDict) { 100 m_pEncryptDict = pDict; 101 } 102 103 CPDF_CryptoHandler* CPDF_Parser::GetCryptoHandler() { 104 return m_pSyntax->m_pCryptoHandler.get(); 105 } 106 107 CFX_RetainPtr<IFX_SeekableReadStream> CPDF_Parser::GetFileAccess() const { 108 return m_pSyntax->m_pFileAccess; 109 } 110 111 void CPDF_Parser::ShrinkObjectMap(uint32_t objnum) { 112 if (objnum == 0) { 113 m_ObjectInfo.clear(); 114 return; 115 } 116 117 auto it = m_ObjectInfo.lower_bound(objnum); 118 while (it != m_ObjectInfo.end()) { 119 auto saved_it = it++; 120 m_ObjectInfo.erase(saved_it); 121 } 122 123 if (!pdfium::ContainsKey(m_ObjectInfo, objnum - 1)) 124 m_ObjectInfo[objnum - 1].pos = 0; 125 } 126 127 CPDF_Parser::Error CPDF_Parser::StartParse( 128 const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess, 129 CPDF_Document* pDocument) { 130 ASSERT(!m_bHasParsed); 131 m_bHasParsed = true; 132 m_bXRefStream = false; 133 m_LastXRefOffset = 0; 134 135 int32_t offset = GetHeaderOffset(pFileAccess); 136 if (offset == -1) 137 return FORMAT_ERROR; 138 139 m_pSyntax->InitParser(pFileAccess, offset); 140 141 uint8_t ch; 142 if (!m_pSyntax->GetCharAt(5, ch)) 143 return FORMAT_ERROR; 144 145 if (std::isdigit(ch)) 146 m_FileVersion = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)) * 10; 147 148 if (!m_pSyntax->GetCharAt(7, ch)) 149 return FORMAT_ERROR; 150 151 if (std::isdigit(ch)) 152 m_FileVersion += FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); 153 154 if (m_pSyntax->m_FileLen < m_pSyntax->m_HeaderOffset + 9) 155 return FORMAT_ERROR; 156 157 m_pSyntax->RestorePos(m_pSyntax->m_FileLen - m_pSyntax->m_HeaderOffset - 9); 158 m_pDocument = pDocument; 159 160 bool bXRefRebuilt = false; 161 if (m_pSyntax->SearchWord("startxref", true, false, 4096)) { 162 m_SortedOffset.insert(m_pSyntax->SavePos()); 163 m_pSyntax->GetKeyword(); 164 165 bool bNumber; 166 CFX_ByteString xrefpos_str = m_pSyntax->GetNextWord(&bNumber); 167 if (!bNumber) 168 return FORMAT_ERROR; 169 170 m_LastXRefOffset = (FX_FILESIZE)FXSYS_atoi64(xrefpos_str.c_str()); 171 if (!LoadAllCrossRefV4(m_LastXRefOffset) && 172 !LoadAllCrossRefV5(m_LastXRefOffset)) { 173 if (!RebuildCrossRef()) 174 return FORMAT_ERROR; 175 176 bXRefRebuilt = true; 177 m_LastXRefOffset = 0; 178 } 179 } else { 180 if (!RebuildCrossRef()) 181 return FORMAT_ERROR; 182 183 bXRefRebuilt = true; 184 } 185 Error eRet = SetEncryptHandler(); 186 if (eRet != SUCCESS) 187 return eRet; 188 189 m_pDocument->LoadDoc(); 190 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { 191 if (bXRefRebuilt) 192 return FORMAT_ERROR; 193 194 ReleaseEncryptHandler(); 195 if (!RebuildCrossRef()) 196 return FORMAT_ERROR; 197 198 eRet = SetEncryptHandler(); 199 if (eRet != SUCCESS) 200 return eRet; 201 202 m_pDocument->LoadDoc(); 203 if (!m_pDocument->GetRoot()) 204 return FORMAT_ERROR; 205 } 206 if (GetRootObjNum() == 0) { 207 ReleaseEncryptHandler(); 208 if (!RebuildCrossRef() || GetRootObjNum() == 0) 209 return FORMAT_ERROR; 210 211 eRet = SetEncryptHandler(); 212 if (eRet != SUCCESS) 213 return eRet; 214 } 215 if (m_pSecurityHandler && !m_pSecurityHandler->IsMetadataEncrypted()) { 216 CPDF_Reference* pMetadata = 217 ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata")); 218 if (pMetadata) 219 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); 220 } 221 return SUCCESS; 222 } 223 CPDF_Parser::Error CPDF_Parser::SetEncryptHandler() { 224 ReleaseEncryptHandler(); 225 SetEncryptDictionary(nullptr); 226 227 if (!m_pTrailer) 228 return FORMAT_ERROR; 229 230 CPDF_Object* pEncryptObj = m_pTrailer->GetObjectFor("Encrypt"); 231 if (pEncryptObj) { 232 if (CPDF_Dictionary* pEncryptDict = pEncryptObj->AsDictionary()) { 233 SetEncryptDictionary(pEncryptDict); 234 } else if (CPDF_Reference* pRef = pEncryptObj->AsReference()) { 235 pEncryptObj = m_pDocument->GetOrParseIndirectObject(pRef->GetRefObjNum()); 236 if (pEncryptObj) 237 SetEncryptDictionary(pEncryptObj->GetDict()); 238 } 239 } 240 241 if (m_pEncryptDict) { 242 CFX_ByteString filter = m_pEncryptDict->GetStringFor("Filter"); 243 std::unique_ptr<CPDF_SecurityHandler> pSecurityHandler; 244 Error err = HANDLER_ERROR; 245 if (filter == "Standard") { 246 pSecurityHandler = pdfium::MakeUnique<CPDF_SecurityHandler>(); 247 err = PASSWORD_ERROR; 248 } 249 if (!pSecurityHandler) 250 return HANDLER_ERROR; 251 252 if (!pSecurityHandler->OnInit(this, m_pEncryptDict)) 253 return err; 254 255 m_pSecurityHandler = std::move(pSecurityHandler); 256 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler( 257 m_pSecurityHandler->CreateCryptoHandler()); 258 if (!pCryptoHandler->Init(m_pEncryptDict, m_pSecurityHandler.get())) 259 return HANDLER_ERROR; 260 m_pSyntax->SetEncrypt(std::move(pCryptoHandler)); 261 } 262 return SUCCESS; 263 } 264 265 void CPDF_Parser::ReleaseEncryptHandler() { 266 m_pSyntax->m_pCryptoHandler.reset(); 267 m_pSecurityHandler.reset(); 268 } 269 270 FX_FILESIZE CPDF_Parser::GetObjectOffset(uint32_t objnum) const { 271 if (!IsValidObjectNumber(objnum)) 272 return 0; 273 274 if (GetObjectType(objnum) == 1) 275 return GetObjectPositionOrZero(objnum); 276 277 if (GetObjectType(objnum) == 2) { 278 FX_FILESIZE pos = GetObjectPositionOrZero(objnum); 279 return GetObjectPositionOrZero(pos); 280 } 281 return 0; 282 } 283 284 // Ideally, all the cross reference entries should be verified. 285 // In reality, we rarely see well-formed cross references don't match 286 // with the objects. crbug/602650 showed a case where object numbers 287 // in the cross reference table are all off by one. 288 bool CPDF_Parser::VerifyCrossRefV4() { 289 for (const auto& it : m_ObjectInfo) { 290 if (it.second.pos == 0) 291 continue; 292 // Find the first non-zero position. 293 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); 294 m_pSyntax->RestorePos(it.second.pos); 295 bool is_num = false; 296 CFX_ByteString num_str = m_pSyntax->GetNextWord(&is_num); 297 m_pSyntax->RestorePos(SavedPos); 298 if (!is_num || num_str.IsEmpty() || 299 FXSYS_atoui(num_str.c_str()) != it.first) { 300 // If the object number read doesn't match the one stored, 301 // something is wrong with the cross reference table. 302 return false; 303 } else { 304 return true; 305 } 306 } 307 return true; 308 } 309 310 bool CPDF_Parser::LoadAllCrossRefV4(FX_FILESIZE xrefpos) { 311 if (!LoadCrossRefV4(xrefpos, 0, true)) 312 return false; 313 314 m_pTrailer = LoadTrailerV4(); 315 if (!m_pTrailer) 316 return false; 317 318 int32_t xrefsize = GetDirectInteger(m_pTrailer.get(), "Size"); 319 if (xrefsize > 0 && xrefsize <= kMaxXRefSize) 320 ShrinkObjectMap(xrefsize); 321 322 std::vector<FX_FILESIZE> CrossRefList; 323 std::vector<FX_FILESIZE> XRefStreamList; 324 std::set<FX_FILESIZE> seen_xrefpos; 325 326 CrossRefList.push_back(xrefpos); 327 XRefStreamList.push_back(GetDirectInteger(m_pTrailer.get(), "XRefStm")); 328 seen_xrefpos.insert(xrefpos); 329 330 // When |m_pTrailer| doesn't have Prev entry or Prev entry value is not 331 // numerical, GetDirectInteger() returns 0. Loading will end. 332 xrefpos = GetDirectInteger(m_pTrailer.get(), "Prev"); 333 while (xrefpos) { 334 // Check for circular references. 335 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) 336 return false; 337 338 seen_xrefpos.insert(xrefpos); 339 340 // SLOW ... 341 CrossRefList.insert(CrossRefList.begin(), xrefpos); 342 LoadCrossRefV4(xrefpos, 0, true); 343 344 std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4()); 345 if (!pDict) 346 return false; 347 348 xrefpos = GetDirectInteger(pDict.get(), "Prev"); 349 350 // SLOW ... 351 XRefStreamList.insert(XRefStreamList.begin(), 352 pDict->GetIntegerFor("XRefStm")); 353 m_Trailers.push_back(std::move(pDict)); 354 } 355 356 for (size_t i = 0; i < CrossRefList.size(); ++i) { 357 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], false)) 358 return false; 359 if (i == 0 && !VerifyCrossRefV4()) 360 return false; 361 } 362 return true; 363 } 364 365 bool CPDF_Parser::LoadLinearizedAllCrossRefV4(FX_FILESIZE xrefpos, 366 uint32_t dwObjCount) { 367 if (!LoadLinearizedCrossRefV4(xrefpos, dwObjCount)) 368 return false; 369 370 m_pTrailer = LoadTrailerV4(); 371 if (!m_pTrailer) 372 return false; 373 374 int32_t xrefsize = GetDirectInteger(m_pTrailer.get(), "Size"); 375 if (xrefsize == 0) 376 return false; 377 378 std::vector<FX_FILESIZE> CrossRefList; 379 std::vector<FX_FILESIZE> XRefStreamList; 380 std::set<FX_FILESIZE> seen_xrefpos; 381 382 CrossRefList.push_back(xrefpos); 383 XRefStreamList.push_back(GetDirectInteger(m_pTrailer.get(), "XRefStm")); 384 seen_xrefpos.insert(xrefpos); 385 386 xrefpos = GetDirectInteger(m_pTrailer.get(), "Prev"); 387 while (xrefpos) { 388 // Check for circular references. 389 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) 390 return false; 391 392 seen_xrefpos.insert(xrefpos); 393 394 // SLOW ... 395 CrossRefList.insert(CrossRefList.begin(), xrefpos); 396 LoadCrossRefV4(xrefpos, 0, true); 397 398 std::unique_ptr<CPDF_Dictionary> pDict(LoadTrailerV4()); 399 if (!pDict) 400 return false; 401 402 xrefpos = GetDirectInteger(pDict.get(), "Prev"); 403 404 // SLOW ... 405 XRefStreamList.insert(XRefStreamList.begin(), 406 pDict->GetIntegerFor("XRefStm")); 407 m_Trailers.push_back(std::move(pDict)); 408 } 409 410 for (size_t i = 1; i < CrossRefList.size(); ++i) { 411 if (!LoadCrossRefV4(CrossRefList[i], XRefStreamList[i], false)) 412 return false; 413 } 414 return true; 415 } 416 417 bool CPDF_Parser::LoadLinearizedCrossRefV4(FX_FILESIZE pos, 418 uint32_t dwObjCount) { 419 FX_FILESIZE dwStartPos = pos - m_pSyntax->m_HeaderOffset; 420 421 m_pSyntax->RestorePos(dwStartPos); 422 m_SortedOffset.insert(pos); 423 424 uint32_t start_objnum = 0; 425 uint32_t count = dwObjCount; 426 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); 427 428 const int32_t recordsize = 20; 429 std::vector<char> buf(1024 * recordsize + 1); 430 buf[1024 * recordsize] = '\0'; 431 432 int32_t nBlocks = count / 1024 + 1; 433 for (int32_t block = 0; block < nBlocks; block++) { 434 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; 435 uint32_t dwReadSize = block_size * recordsize; 436 if ((FX_FILESIZE)(dwStartPos + dwReadSize) > m_pSyntax->m_FileLen) 437 return false; 438 439 if (!m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), 440 dwReadSize)) { 441 return false; 442 } 443 444 for (int32_t i = 0; i < block_size; i++) { 445 uint32_t objnum = start_objnum + block * 1024 + i; 446 char* pEntry = &buf[i * recordsize]; 447 if (pEntry[17] == 'f') { 448 m_ObjectInfo[objnum].pos = 0; 449 m_ObjectInfo[objnum].type = 0; 450 } else { 451 int32_t offset = FXSYS_atoi(pEntry); 452 if (offset == 0) { 453 for (int32_t c = 0; c < 10; c++) { 454 if (!std::isdigit(pEntry[c])) 455 return false; 456 } 457 } 458 459 m_ObjectInfo[objnum].pos = offset; 460 int32_t version = FXSYS_atoi(pEntry + 11); 461 if (version >= 1) 462 m_bVersionUpdated = true; 463 464 m_ObjectInfo[objnum].gennum = version; 465 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) 466 m_SortedOffset.insert(m_ObjectInfo[objnum].pos); 467 468 m_ObjectInfo[objnum].type = 1; 469 } 470 } 471 } 472 m_pSyntax->RestorePos(SavedPos + count * recordsize); 473 return true; 474 } 475 476 bool CPDF_Parser::LoadCrossRefV4(FX_FILESIZE pos, 477 FX_FILESIZE streampos, 478 bool bSkip) { 479 m_pSyntax->RestorePos(pos); 480 if (m_pSyntax->GetKeyword() != "xref") 481 return false; 482 483 m_SortedOffset.insert(pos); 484 if (streampos) 485 m_SortedOffset.insert(streampos); 486 487 while (1) { 488 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); 489 bool bIsNumber; 490 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); 491 if (word.IsEmpty()) 492 return false; 493 494 if (!bIsNumber) { 495 m_pSyntax->RestorePos(SavedPos); 496 break; 497 } 498 499 uint32_t start_objnum = FXSYS_atoui(word.c_str()); 500 if (start_objnum >= kMaxObjectNumber) 501 return false; 502 503 uint32_t count = m_pSyntax->GetDirectNum(); 504 m_pSyntax->ToNextWord(); 505 SavedPos = m_pSyntax->SavePos(); 506 const int32_t recordsize = 20; 507 508 m_dwXrefStartObjNum = start_objnum; 509 if (!bSkip) { 510 std::vector<char> buf(1024 * recordsize + 1); 511 buf[1024 * recordsize] = '\0'; 512 513 int32_t nBlocks = count / 1024 + 1; 514 for (int32_t block = 0; block < nBlocks; block++) { 515 int32_t block_size = block == nBlocks - 1 ? count % 1024 : 1024; 516 m_pSyntax->ReadBlock(reinterpret_cast<uint8_t*>(buf.data()), 517 block_size * recordsize); 518 519 for (int32_t i = 0; i < block_size; i++) { 520 uint32_t objnum = start_objnum + block * 1024 + i; 521 char* pEntry = &buf[i * recordsize]; 522 if (pEntry[17] == 'f') { 523 m_ObjectInfo[objnum].pos = 0; 524 m_ObjectInfo[objnum].type = 0; 525 } else { 526 FX_FILESIZE offset = (FX_FILESIZE)FXSYS_atoi64(pEntry); 527 if (offset == 0) { 528 for (int32_t c = 0; c < 10; c++) { 529 if (!std::isdigit(pEntry[c])) 530 return false; 531 } 532 } 533 534 m_ObjectInfo[objnum].pos = offset; 535 int32_t version = FXSYS_atoi(pEntry + 11); 536 if (version >= 1) 537 m_bVersionUpdated = true; 538 539 m_ObjectInfo[objnum].gennum = version; 540 if (m_ObjectInfo[objnum].pos < m_pSyntax->m_FileLen) 541 m_SortedOffset.insert(m_ObjectInfo[objnum].pos); 542 543 m_ObjectInfo[objnum].type = 1; 544 } 545 } 546 } 547 } 548 m_pSyntax->RestorePos(SavedPos + count * recordsize); 549 } 550 return !streampos || LoadCrossRefV5(&streampos, false); 551 } 552 553 bool CPDF_Parser::LoadAllCrossRefV5(FX_FILESIZE xrefpos) { 554 if (!LoadCrossRefV5(&xrefpos, true)) 555 return false; 556 557 std::set<FX_FILESIZE> seen_xrefpos; 558 while (xrefpos) { 559 seen_xrefpos.insert(xrefpos); 560 if (!LoadCrossRefV5(&xrefpos, false)) 561 return false; 562 563 // Check for circular references. 564 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) 565 return false; 566 } 567 m_ObjectStreamMap.clear(); 568 m_bXRefStream = true; 569 return true; 570 } 571 572 bool CPDF_Parser::RebuildCrossRef() { 573 m_ObjectInfo.clear(); 574 m_SortedOffset.clear(); 575 m_pTrailer.reset(); 576 577 ParserState state = ParserState::kDefault; 578 int32_t inside_index = 0; 579 uint32_t objnum = 0; 580 uint32_t gennum = 0; 581 int32_t depth = 0; 582 const uint32_t kBufferSize = 4096; 583 std::vector<uint8_t> buffer(kBufferSize); 584 585 FX_FILESIZE pos = m_pSyntax->m_HeaderOffset; 586 FX_FILESIZE start_pos = 0; 587 FX_FILESIZE start_pos1 = 0; 588 FX_FILESIZE last_obj = -1; 589 FX_FILESIZE last_xref = -1; 590 FX_FILESIZE last_trailer = -1; 591 592 while (pos < m_pSyntax->m_FileLen) { 593 const FX_FILESIZE saved_pos = pos; 594 bool bOverFlow = false; 595 uint32_t size = 596 std::min((uint32_t)(m_pSyntax->m_FileLen - pos), kBufferSize); 597 if (!m_pSyntax->m_pFileAccess->ReadBlock(buffer.data(), pos, size)) 598 break; 599 600 for (uint32_t i = 0; i < size; i++) { 601 uint8_t byte = buffer[i]; 602 switch (state) { 603 case ParserState::kDefault: 604 if (PDFCharIsWhitespace(byte)) { 605 state = ParserState::kWhitespace; 606 } else if (std::isdigit(byte)) { 607 --i; 608 state = ParserState::kWhitespace; 609 } else if (byte == '%') { 610 inside_index = 0; 611 state = ParserState::kComment; 612 } else if (byte == '(') { 613 state = ParserState::kString; 614 depth = 1; 615 } else if (byte == '<') { 616 inside_index = 1; 617 state = ParserState::kHexString; 618 } else if (byte == '\\') { 619 state = ParserState::kEscapedString; 620 } else if (byte == 't') { 621 state = ParserState::kTrailer; 622 inside_index = 1; 623 } 624 break; 625 626 case ParserState::kWhitespace: 627 if (std::isdigit(byte)) { 628 start_pos = pos + i; 629 state = ParserState::kObjNum; 630 objnum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); 631 } else if (byte == 't') { 632 state = ParserState::kTrailer; 633 inside_index = 1; 634 } else if (byte == 'x') { 635 state = ParserState::kXref; 636 inside_index = 1; 637 } else if (!PDFCharIsWhitespace(byte)) { 638 --i; 639 state = ParserState::kDefault; 640 } 641 break; 642 643 case ParserState::kObjNum: 644 if (std::isdigit(byte)) { 645 objnum = 646 objnum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); 647 } else if (PDFCharIsWhitespace(byte)) { 648 state = ParserState::kPostObjNum; 649 } else { 650 --i; 651 state = ParserState::kEndObj; 652 inside_index = 0; 653 } 654 break; 655 656 case ParserState::kPostObjNum: 657 if (std::isdigit(byte)) { 658 start_pos1 = pos + i; 659 state = ParserState::kGenNum; 660 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); 661 } else if (byte == 't') { 662 state = ParserState::kTrailer; 663 inside_index = 1; 664 } else if (!PDFCharIsWhitespace(byte)) { 665 --i; 666 state = ParserState::kDefault; 667 } 668 break; 669 670 case ParserState::kGenNum: 671 if (std::isdigit(byte)) { 672 gennum = 673 gennum * 10 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); 674 } else if (PDFCharIsWhitespace(byte)) { 675 state = ParserState::kPostGenNum; 676 } else { 677 --i; 678 state = ParserState::kDefault; 679 } 680 break; 681 682 case ParserState::kPostGenNum: 683 if (byte == 'o') { 684 state = ParserState::kBeginObj; 685 inside_index = 1; 686 } else if (std::isdigit(byte)) { 687 objnum = gennum; 688 gennum = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(byte)); 689 start_pos = start_pos1; 690 start_pos1 = pos + i; 691 state = ParserState::kGenNum; 692 } else if (byte == 't') { 693 state = ParserState::kTrailer; 694 inside_index = 1; 695 } else if (!PDFCharIsWhitespace(byte)) { 696 --i; 697 state = ParserState::kDefault; 698 } 699 break; 700 701 case ParserState::kBeginObj: 702 switch (inside_index) { 703 case 1: 704 if (byte != 'b') { 705 --i; 706 state = ParserState::kDefault; 707 } else { 708 inside_index++; 709 } 710 break; 711 case 2: 712 if (byte != 'j') { 713 --i; 714 state = ParserState::kDefault; 715 } else { 716 inside_index++; 717 } 718 break; 719 case 3: 720 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { 721 FX_FILESIZE obj_pos = start_pos - m_pSyntax->m_HeaderOffset; 722 m_SortedOffset.insert(obj_pos); 723 last_obj = start_pos; 724 FX_FILESIZE obj_end = 0; 725 std::unique_ptr<CPDF_Object> pObject = 726 ParseIndirectObjectAtByStrict(m_pDocument, obj_pos, objnum, 727 &obj_end); 728 if (CPDF_Stream* pStream = ToStream(pObject.get())) { 729 if (CPDF_Dictionary* pDict = pStream->GetDict()) { 730 if ((pDict->KeyExist("Type")) && 731 (pDict->GetStringFor("Type") == "XRef" && 732 pDict->KeyExist("Size"))) { 733 CPDF_Object* pRoot = pDict->GetObjectFor("Root"); 734 if (pRoot && pRoot->GetDict() && 735 pRoot->GetDict()->GetObjectFor("Pages")) { 736 m_pTrailer = ToDictionary(pDict->Clone()); 737 } 738 } 739 } 740 } 741 742 FX_FILESIZE offset = 0; 743 m_pSyntax->RestorePos(obj_pos); 744 offset = m_pSyntax->FindTag("obj", 0); 745 if (offset == -1) 746 offset = 0; 747 else 748 offset += 3; 749 750 FX_FILESIZE nLen = obj_end - obj_pos - offset; 751 if ((uint32_t)nLen > size - i) { 752 pos = obj_end + m_pSyntax->m_HeaderOffset; 753 bOverFlow = true; 754 } else { 755 i += (uint32_t)nLen; 756 } 757 758 if (!m_ObjectInfo.empty() && IsValidObjectNumber(objnum) && 759 m_ObjectInfo[objnum].pos) { 760 if (pObject) { 761 uint32_t oldgen = GetObjectGenNum(objnum); 762 m_ObjectInfo[objnum].pos = obj_pos; 763 m_ObjectInfo[objnum].gennum = gennum; 764 if (oldgen != gennum) 765 m_bVersionUpdated = true; 766 } 767 } else { 768 m_ObjectInfo[objnum].pos = obj_pos; 769 m_ObjectInfo[objnum].type = 1; 770 m_ObjectInfo[objnum].gennum = gennum; 771 } 772 } 773 --i; 774 state = ParserState::kDefault; 775 break; 776 } 777 break; 778 779 case ParserState::kTrailer: 780 if (inside_index == 7) { 781 if (PDFCharIsWhitespace(byte) || PDFCharIsDelimiter(byte)) { 782 last_trailer = pos + i - 7; 783 m_pSyntax->RestorePos(pos + i - m_pSyntax->m_HeaderOffset); 784 785 std::unique_ptr<CPDF_Object> pObj = 786 m_pSyntax->GetObject(m_pDocument, 0, 0, true); 787 if (pObj) { 788 if (pObj->IsDictionary() || pObj->AsStream()) { 789 CPDF_Stream* pStream = pObj->AsStream(); 790 if (CPDF_Dictionary* pTrailer = 791 pStream ? pStream->GetDict() : pObj->AsDictionary()) { 792 if (m_pTrailer) { 793 CPDF_Object* pRoot = pTrailer->GetObjectFor("Root"); 794 CPDF_Reference* pRef = ToReference(pRoot); 795 if (!pRoot || 796 (pRef && IsValidObjectNumber(pRef->GetRefObjNum()) && 797 m_ObjectInfo[pRef->GetRefObjNum()].pos != 0)) { 798 auto it = pTrailer->begin(); 799 while (it != pTrailer->end()) { 800 const CFX_ByteString& key = it->first; 801 CPDF_Object* pElement = it->second.get(); 802 ++it; 803 uint32_t dwObjNum = 804 pElement ? pElement->GetObjNum() : 0; 805 if (dwObjNum) { 806 m_pTrailer->SetNewFor<CPDF_Reference>( 807 key, m_pDocument, dwObjNum); 808 } else { 809 m_pTrailer->SetFor(key, pElement->Clone()); 810 } 811 } 812 } 813 } else { 814 if (pObj->IsStream()) { 815 m_pTrailer = ToDictionary(pTrailer->Clone()); 816 } else { 817 m_pTrailer = ToDictionary(std::move(pObj)); 818 } 819 820 FX_FILESIZE dwSavePos = m_pSyntax->SavePos(); 821 CFX_ByteString strWord = m_pSyntax->GetKeyword(); 822 if (!strWord.Compare("startxref")) { 823 bool bNumber; 824 CFX_ByteString bsOffset = 825 m_pSyntax->GetNextWord(&bNumber); 826 if (bNumber) 827 m_LastXRefOffset = FXSYS_atoi(bsOffset.c_str()); 828 } 829 m_pSyntax->RestorePos(dwSavePos); 830 } 831 } 832 } 833 } 834 } 835 --i; 836 state = ParserState::kDefault; 837 } else if (byte == "trailer"[inside_index]) { 838 inside_index++; 839 } else { 840 --i; 841 state = ParserState::kDefault; 842 } 843 break; 844 845 case ParserState::kXref: 846 if (inside_index == 4) { 847 last_xref = pos + i - 4; 848 state = ParserState::kWhitespace; 849 } else if (byte == "xref"[inside_index]) { 850 inside_index++; 851 } else { 852 --i; 853 state = ParserState::kDefault; 854 } 855 break; 856 857 case ParserState::kComment: 858 if (PDFCharIsLineEnding(byte)) 859 state = ParserState::kDefault; 860 break; 861 862 case ParserState::kString: 863 if (byte == ')') { 864 if (depth > 0) 865 depth--; 866 } else if (byte == '(') { 867 depth++; 868 } 869 870 if (!depth) 871 state = ParserState::kDefault; 872 break; 873 874 case ParserState::kHexString: 875 if (byte == '>' || (byte == '<' && inside_index == 1)) 876 state = ParserState::kDefault; 877 inside_index = 0; 878 break; 879 880 case ParserState::kEscapedString: 881 if (PDFCharIsDelimiter(byte) || PDFCharIsWhitespace(byte)) { 882 --i; 883 state = ParserState::kDefault; 884 } 885 break; 886 887 case ParserState::kEndObj: 888 if (PDFCharIsWhitespace(byte)) { 889 state = ParserState::kDefault; 890 } else if (byte == '%' || byte == '(' || byte == '<' || 891 byte == '\\') { 892 state = ParserState::kDefault; 893 --i; 894 } else if (inside_index == 6) { 895 state = ParserState::kDefault; 896 --i; 897 } else if (byte == "endobj"[inside_index]) { 898 inside_index++; 899 } 900 break; 901 } 902 903 if (bOverFlow) { 904 size = 0; 905 break; 906 } 907 } 908 pos += size; 909 910 // If the position has not changed at all or went backwards in a loop 911 // iteration, then break out to prevent infinite looping. 912 if (pos <= saved_pos) 913 break; 914 } 915 916 if (last_xref != -1 && last_xref > last_obj) 917 last_trailer = last_xref; 918 else if (last_trailer == -1 || last_xref < last_obj) 919 last_trailer = m_pSyntax->m_FileLen; 920 921 m_SortedOffset.insert(last_trailer - m_pSyntax->m_HeaderOffset); 922 return m_pTrailer && !m_ObjectInfo.empty(); 923 } 924 925 bool CPDF_Parser::LoadCrossRefV5(FX_FILESIZE* pos, bool bMainXRef) { 926 std::unique_ptr<CPDF_Object> pObject( 927 ParseIndirectObjectAt(m_pDocument, *pos, 0)); 928 if (!pObject) 929 return false; 930 931 uint32_t objnum = pObject->m_ObjNum; 932 if (!objnum) 933 return false; 934 935 CPDF_Object* pUnownedObject = pObject.get(); 936 if (m_pDocument) { 937 CPDF_Dictionary* pRootDict = m_pDocument->GetRoot(); 938 if (pRootDict && pRootDict->GetObjNum() == objnum) 939 return false; 940 if (!m_pDocument->ReplaceIndirectObjectIfHigherGeneration( 941 objnum, std::move(pObject))) { 942 return false; 943 } 944 } 945 946 CPDF_Stream* pStream = pUnownedObject->AsStream(); 947 if (!pStream) 948 return false; 949 950 CPDF_Dictionary* pDict = pStream->GetDict(); 951 *pos = pDict->GetIntegerFor("Prev"); 952 int32_t size = pDict->GetIntegerFor("Size"); 953 if (size < 0) 954 return false; 955 956 std::unique_ptr<CPDF_Dictionary> pNewTrailer = ToDictionary(pDict->Clone()); 957 if (bMainXRef) { 958 m_pTrailer = std::move(pNewTrailer); 959 ShrinkObjectMap(size); 960 for (auto& it : m_ObjectInfo) 961 it.second.type = 0; 962 } else { 963 m_Trailers.push_back(std::move(pNewTrailer)); 964 } 965 966 std::vector<std::pair<int32_t, int32_t>> arrIndex; 967 CPDF_Array* pArray = pDict->GetArrayFor("Index"); 968 if (pArray) { 969 for (size_t i = 0; i < pArray->GetCount() / 2; i++) { 970 CPDF_Object* pStartNumObj = pArray->GetObjectAt(i * 2); 971 CPDF_Object* pCountObj = pArray->GetObjectAt(i * 2 + 1); 972 973 if (ToNumber(pStartNumObj) && ToNumber(pCountObj)) { 974 int nStartNum = pStartNumObj->GetInteger(); 975 int nCount = pCountObj->GetInteger(); 976 if (nStartNum >= 0 && nCount > 0) 977 arrIndex.push_back(std::make_pair(nStartNum, nCount)); 978 } 979 } 980 } 981 982 if (arrIndex.size() == 0) 983 arrIndex.push_back(std::make_pair(0, size)); 984 985 pArray = pDict->GetArrayFor("W"); 986 if (!pArray) 987 return false; 988 989 std::vector<uint32_t> WidthArray; 990 FX_SAFE_UINT32 dwAccWidth = 0; 991 for (size_t i = 0; i < pArray->GetCount(); ++i) { 992 WidthArray.push_back(pArray->GetIntegerAt(i)); 993 dwAccWidth += WidthArray[i]; 994 } 995 996 if (!dwAccWidth.IsValid() || WidthArray.size() < 3) 997 return false; 998 999 uint32_t totalWidth = dwAccWidth.ValueOrDie(); 1000 CPDF_StreamAcc acc; 1001 acc.LoadAllData(pStream); 1002 1003 const uint8_t* pData = acc.GetData(); 1004 uint32_t dwTotalSize = acc.GetSize(); 1005 uint32_t segindex = 0; 1006 for (uint32_t i = 0; i < arrIndex.size(); i++) { 1007 int32_t startnum = arrIndex[i].first; 1008 if (startnum < 0) 1009 continue; 1010 1011 m_dwXrefStartObjNum = pdfium::base::checked_cast<uint32_t>(startnum); 1012 uint32_t count = pdfium::base::checked_cast<uint32_t>(arrIndex[i].second); 1013 FX_SAFE_UINT32 dwCaculatedSize = segindex; 1014 dwCaculatedSize += count; 1015 dwCaculatedSize *= totalWidth; 1016 if (!dwCaculatedSize.IsValid() || 1017 dwCaculatedSize.ValueOrDie() > dwTotalSize) { 1018 continue; 1019 } 1020 1021 const uint8_t* segstart = pData + segindex * totalWidth; 1022 FX_SAFE_UINT32 dwMaxObjNum = startnum; 1023 dwMaxObjNum += count; 1024 uint32_t dwV5Size = m_ObjectInfo.empty() ? 0 : GetLastObjNum() + 1; 1025 if (!dwMaxObjNum.IsValid() || dwMaxObjNum.ValueOrDie() > dwV5Size) 1026 continue; 1027 1028 for (uint32_t j = 0; j < count; j++) { 1029 int32_t type = 1; 1030 const uint8_t* entrystart = segstart + j * totalWidth; 1031 if (WidthArray[0]) 1032 type = GetVarInt(entrystart, WidthArray[0]); 1033 1034 if (GetObjectType(startnum + j) == 255) { 1035 FX_FILESIZE offset = 1036 GetVarInt(entrystart + WidthArray[0], WidthArray[1]); 1037 m_ObjectInfo[startnum + j].pos = offset; 1038 m_SortedOffset.insert(offset); 1039 continue; 1040 } 1041 1042 if (GetObjectType(startnum + j)) 1043 continue; 1044 1045 m_ObjectInfo[startnum + j].type = type; 1046 if (type == 0) { 1047 m_ObjectInfo[startnum + j].pos = 0; 1048 } else { 1049 FX_FILESIZE offset = 1050 GetVarInt(entrystart + WidthArray[0], WidthArray[1]); 1051 m_ObjectInfo[startnum + j].pos = offset; 1052 if (type == 1) { 1053 m_SortedOffset.insert(offset); 1054 } else { 1055 if (offset < 0 || !IsValidObjectNumber(offset)) 1056 return false; 1057 m_ObjectInfo[offset].type = 255; 1058 } 1059 } 1060 } 1061 segindex += count; 1062 } 1063 return true; 1064 } 1065 1066 CPDF_Array* CPDF_Parser::GetIDArray() { 1067 if (!m_pTrailer) 1068 return nullptr; 1069 1070 CPDF_Object* pID = m_pTrailer->GetObjectFor("ID"); 1071 if (!pID) 1072 return nullptr; 1073 1074 CPDF_Reference* pRef = pID->AsReference(); 1075 if (!pRef) 1076 return ToArray(pID); 1077 1078 std::unique_ptr<CPDF_Object> pNewObj = 1079 ParseIndirectObject(nullptr, pRef->GetRefObjNum()); 1080 pID = pNewObj.get(); 1081 m_pTrailer->SetFor("ID", std::move(pNewObj)); 1082 return ToArray(pID); 1083 } 1084 1085 uint32_t CPDF_Parser::GetRootObjNum() { 1086 CPDF_Reference* pRef = 1087 ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Root") : nullptr); 1088 return pRef ? pRef->GetRefObjNum() : 0; 1089 } 1090 1091 uint32_t CPDF_Parser::GetInfoObjNum() { 1092 CPDF_Reference* pRef = 1093 ToReference(m_pTrailer ? m_pTrailer->GetObjectFor("Info") : nullptr); 1094 return pRef ? pRef->GetRefObjNum() : 0; 1095 } 1096 1097 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObject( 1098 CPDF_IndirectObjectHolder* pObjList, 1099 uint32_t objnum) { 1100 if (!IsValidObjectNumber(objnum)) 1101 return nullptr; 1102 1103 // Prevent circular parsing the same object. 1104 if (pdfium::ContainsKey(m_ParsingObjNums, objnum)) 1105 return nullptr; 1106 1107 pdfium::ScopedSetInsertion<uint32_t> local_insert(&m_ParsingObjNums, objnum); 1108 if (GetObjectType(objnum) == 1 || GetObjectType(objnum) == 255) { 1109 FX_FILESIZE pos = m_ObjectInfo[objnum].pos; 1110 if (pos <= 0) 1111 return nullptr; 1112 return ParseIndirectObjectAt(pObjList, pos, objnum); 1113 } 1114 if (GetObjectType(objnum) != 2) 1115 return nullptr; 1116 1117 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); 1118 if (!pObjStream) 1119 return nullptr; 1120 1121 CFX_RetainPtr<IFX_MemoryStream> file = IFX_MemoryStream::Create( 1122 (uint8_t*)pObjStream->GetData(), (size_t)pObjStream->GetSize(), false); 1123 CPDF_SyntaxParser syntax; 1124 syntax.InitParser(file, 0); 1125 const int32_t offset = GetStreamFirst(pObjStream); 1126 1127 // Read object numbers from |pObjStream| into a cache. 1128 if (!pdfium::ContainsKey(m_ObjCache, pObjStream)) { 1129 for (int32_t i = GetStreamNCount(pObjStream); i > 0; --i) { 1130 uint32_t thisnum = syntax.GetDirectNum(); 1131 uint32_t thisoff = syntax.GetDirectNum(); 1132 m_ObjCache[pObjStream][thisnum] = thisoff; 1133 } 1134 } 1135 1136 const auto it = m_ObjCache[pObjStream].find(objnum); 1137 if (it == m_ObjCache[pObjStream].end()) 1138 return nullptr; 1139 1140 syntax.RestorePos(offset + it->second); 1141 return syntax.GetObject(pObjList, 0, 0, true); 1142 } 1143 1144 CPDF_StreamAcc* CPDF_Parser::GetObjectStream(uint32_t objnum) { 1145 auto it = m_ObjectStreamMap.find(objnum); 1146 if (it != m_ObjectStreamMap.end()) 1147 return it->second.get(); 1148 1149 if (!m_pDocument) 1150 return nullptr; 1151 1152 const CPDF_Stream* pStream = 1153 ToStream(m_pDocument->GetOrParseIndirectObject(objnum)); 1154 if (!pStream) 1155 return nullptr; 1156 1157 CPDF_StreamAcc* pStreamAcc = new CPDF_StreamAcc; 1158 pStreamAcc->LoadAllData(pStream); 1159 m_ObjectStreamMap[objnum].reset(pStreamAcc); 1160 return pStreamAcc; 1161 } 1162 1163 FX_FILESIZE CPDF_Parser::GetObjectSize(uint32_t objnum) const { 1164 if (!IsValidObjectNumber(objnum)) 1165 return 0; 1166 1167 if (GetObjectType(objnum) == 2) 1168 objnum = GetObjectPositionOrZero(objnum); 1169 1170 if (GetObjectType(objnum) != 1 && GetObjectType(objnum) != 255) 1171 return 0; 1172 1173 FX_FILESIZE offset = GetObjectPositionOrZero(objnum); 1174 if (offset == 0) 1175 return 0; 1176 1177 auto it = m_SortedOffset.find(offset); 1178 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) 1179 return 0; 1180 1181 return *it - offset; 1182 } 1183 1184 void CPDF_Parser::GetIndirectBinary(uint32_t objnum, 1185 uint8_t*& pBuffer, 1186 uint32_t& size) { 1187 pBuffer = nullptr; 1188 size = 0; 1189 if (!IsValidObjectNumber(objnum)) 1190 return; 1191 1192 if (GetObjectType(objnum) == 2) { 1193 CPDF_StreamAcc* pObjStream = GetObjectStream(m_ObjectInfo[objnum].pos); 1194 if (!pObjStream) 1195 return; 1196 1197 int32_t offset = GetStreamFirst(pObjStream); 1198 const uint8_t* pData = pObjStream->GetData(); 1199 uint32_t totalsize = pObjStream->GetSize(); 1200 CFX_RetainPtr<IFX_MemoryStream> file = 1201 IFX_MemoryStream::Create((uint8_t*)pData, (size_t)totalsize, false); 1202 CPDF_SyntaxParser syntax; 1203 syntax.InitParser(file, 0); 1204 1205 for (int i = GetStreamNCount(pObjStream); i > 0; --i) { 1206 uint32_t thisnum = syntax.GetDirectNum(); 1207 uint32_t thisoff = syntax.GetDirectNum(); 1208 if (thisnum != objnum) 1209 continue; 1210 1211 if (i == 1) { 1212 size = totalsize - (thisoff + offset); 1213 } else { 1214 syntax.GetDirectNum(); // Skip nextnum. 1215 uint32_t nextoff = syntax.GetDirectNum(); 1216 size = nextoff - thisoff; 1217 } 1218 1219 pBuffer = FX_Alloc(uint8_t, size); 1220 FXSYS_memcpy(pBuffer, pData + thisoff + offset, size); 1221 return; 1222 } 1223 return; 1224 } 1225 1226 if (GetObjectType(objnum) != 1) 1227 return; 1228 1229 FX_FILESIZE pos = m_ObjectInfo[objnum].pos; 1230 if (pos == 0) 1231 return; 1232 1233 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); 1234 m_pSyntax->RestorePos(pos); 1235 1236 bool bIsNumber; 1237 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); 1238 if (!bIsNumber) { 1239 m_pSyntax->RestorePos(SavedPos); 1240 return; 1241 } 1242 1243 uint32_t parser_objnum = FXSYS_atoui(word.c_str()); 1244 if (parser_objnum && parser_objnum != objnum) { 1245 m_pSyntax->RestorePos(SavedPos); 1246 return; 1247 } 1248 1249 word = m_pSyntax->GetNextWord(&bIsNumber); 1250 if (!bIsNumber) { 1251 m_pSyntax->RestorePos(SavedPos); 1252 return; 1253 } 1254 1255 if (m_pSyntax->GetKeyword() != "obj") { 1256 m_pSyntax->RestorePos(SavedPos); 1257 return; 1258 } 1259 1260 auto it = m_SortedOffset.find(pos); 1261 if (it == m_SortedOffset.end() || ++it == m_SortedOffset.end()) { 1262 m_pSyntax->RestorePos(SavedPos); 1263 return; 1264 } 1265 1266 FX_FILESIZE nextoff = *it; 1267 bool bNextOffValid = false; 1268 if (nextoff != pos) { 1269 m_pSyntax->RestorePos(nextoff); 1270 word = m_pSyntax->GetNextWord(&bIsNumber); 1271 if (word == "xref") { 1272 bNextOffValid = true; 1273 } else if (bIsNumber) { 1274 word = m_pSyntax->GetNextWord(&bIsNumber); 1275 if (bIsNumber && m_pSyntax->GetKeyword() == "obj") { 1276 bNextOffValid = true; 1277 } 1278 } 1279 } 1280 1281 if (!bNextOffValid) { 1282 m_pSyntax->RestorePos(pos); 1283 while (1) { 1284 if (m_pSyntax->GetKeyword() == "endobj") 1285 break; 1286 1287 if (m_pSyntax->SavePos() == m_pSyntax->m_FileLen) 1288 break; 1289 } 1290 nextoff = m_pSyntax->SavePos(); 1291 } 1292 1293 size = (uint32_t)(nextoff - pos); 1294 pBuffer = FX_Alloc(uint8_t, size); 1295 m_pSyntax->RestorePos(pos); 1296 m_pSyntax->ReadBlock(pBuffer, size); 1297 m_pSyntax->RestorePos(SavedPos); 1298 } 1299 1300 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAt( 1301 CPDF_IndirectObjectHolder* pObjList, 1302 FX_FILESIZE pos, 1303 uint32_t objnum) { 1304 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); 1305 m_pSyntax->RestorePos(pos); 1306 bool bIsNumber; 1307 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); 1308 if (!bIsNumber) { 1309 m_pSyntax->RestorePos(SavedPos); 1310 return nullptr; 1311 } 1312 1313 FX_FILESIZE objOffset = m_pSyntax->SavePos(); 1314 objOffset -= word.GetLength(); 1315 uint32_t parser_objnum = FXSYS_atoui(word.c_str()); 1316 if (objnum && parser_objnum != objnum) { 1317 m_pSyntax->RestorePos(SavedPos); 1318 return nullptr; 1319 } 1320 1321 word = m_pSyntax->GetNextWord(&bIsNumber); 1322 if (!bIsNumber) { 1323 m_pSyntax->RestorePos(SavedPos); 1324 return nullptr; 1325 } 1326 1327 uint32_t parser_gennum = FXSYS_atoui(word.c_str()); 1328 if (m_pSyntax->GetKeyword() != "obj") { 1329 m_pSyntax->RestorePos(SavedPos); 1330 return nullptr; 1331 } 1332 1333 std::unique_ptr<CPDF_Object> pObj = 1334 m_pSyntax->GetObject(pObjList, objnum, parser_gennum, true); 1335 m_pSyntax->SavePos(); 1336 1337 CFX_ByteString bsWord = m_pSyntax->GetKeyword(); 1338 if (bsWord == "endobj") 1339 m_pSyntax->SavePos(); 1340 1341 m_pSyntax->RestorePos(SavedPos); 1342 if (pObj) { 1343 if (!objnum) 1344 pObj->m_ObjNum = parser_objnum; 1345 pObj->m_GenNum = parser_gennum; 1346 } 1347 return pObj; 1348 } 1349 1350 std::unique_ptr<CPDF_Object> CPDF_Parser::ParseIndirectObjectAtByStrict( 1351 CPDF_IndirectObjectHolder* pObjList, 1352 FX_FILESIZE pos, 1353 uint32_t objnum, 1354 FX_FILESIZE* pResultPos) { 1355 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); 1356 m_pSyntax->RestorePos(pos); 1357 1358 bool bIsNumber; 1359 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); 1360 if (!bIsNumber) { 1361 m_pSyntax->RestorePos(SavedPos); 1362 return nullptr; 1363 } 1364 1365 uint32_t parser_objnum = FXSYS_atoui(word.c_str()); 1366 if (objnum && parser_objnum != objnum) { 1367 m_pSyntax->RestorePos(SavedPos); 1368 return nullptr; 1369 } 1370 1371 word = m_pSyntax->GetNextWord(&bIsNumber); 1372 if (!bIsNumber) { 1373 m_pSyntax->RestorePos(SavedPos); 1374 return nullptr; 1375 } 1376 1377 uint32_t gennum = FXSYS_atoui(word.c_str()); 1378 if (m_pSyntax->GetKeyword() != "obj") { 1379 m_pSyntax->RestorePos(SavedPos); 1380 return nullptr; 1381 } 1382 1383 std::unique_ptr<CPDF_Object> pObj = 1384 m_pSyntax->GetObjectForStrict(pObjList, objnum, gennum); 1385 1386 if (pResultPos) 1387 *pResultPos = m_pSyntax->m_Pos; 1388 1389 m_pSyntax->RestorePos(SavedPos); 1390 return pObj; 1391 } 1392 1393 uint32_t CPDF_Parser::GetFirstPageNo() const { 1394 return m_pLinearized ? m_pLinearized->GetFirstPageNo() : 0; 1395 } 1396 1397 std::unique_ptr<CPDF_Dictionary> CPDF_Parser::LoadTrailerV4() { 1398 if (m_pSyntax->GetKeyword() != "trailer") 1399 return nullptr; 1400 1401 return ToDictionary(m_pSyntax->GetObject(m_pDocument, 0, 0, true)); 1402 } 1403 1404 uint32_t CPDF_Parser::GetPermissions() const { 1405 if (!m_pSecurityHandler) 1406 return 0xFFFFFFFF; 1407 1408 uint32_t dwPermission = m_pSecurityHandler->GetPermissions(); 1409 if (m_pEncryptDict && m_pEncryptDict->GetStringFor("Filter") == "Standard") { 1410 // See PDF Reference 1.7, page 123, table 3.20. 1411 dwPermission &= 0xFFFFFFFC; 1412 dwPermission |= 0xFFFFF0C0; 1413 } 1414 return dwPermission; 1415 } 1416 1417 bool CPDF_Parser::IsLinearizedFile( 1418 const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess, 1419 uint32_t offset) { 1420 m_pSyntax->InitParser(pFileAccess, offset); 1421 m_pSyntax->RestorePos(m_pSyntax->m_HeaderOffset + 9); 1422 1423 FX_FILESIZE SavedPos = m_pSyntax->SavePos(); 1424 bool bIsNumber; 1425 CFX_ByteString word = m_pSyntax->GetNextWord(&bIsNumber); 1426 if (!bIsNumber) 1427 return false; 1428 1429 uint32_t objnum = FXSYS_atoui(word.c_str()); 1430 word = m_pSyntax->GetNextWord(&bIsNumber); 1431 if (!bIsNumber) 1432 return false; 1433 1434 uint32_t gennum = FXSYS_atoui(word.c_str()); 1435 if (m_pSyntax->GetKeyword() != "obj") { 1436 m_pSyntax->RestorePos(SavedPos); 1437 return false; 1438 } 1439 1440 m_pLinearized = CPDF_LinearizedHeader::CreateForObject( 1441 m_pSyntax->GetObject(nullptr, objnum, gennum, true)); 1442 if (!m_pLinearized) 1443 return false; 1444 1445 m_LastXRefOffset = m_pLinearized->GetLastXRefOffset(); 1446 // Move parser onto first page xref table start. 1447 m_pSyntax->GetNextWord(nullptr); 1448 return true; 1449 } 1450 1451 CPDF_Parser::Error CPDF_Parser::StartLinearizedParse( 1452 const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess, 1453 CPDF_Document* pDocument) { 1454 ASSERT(!m_bHasParsed); 1455 m_bXRefStream = false; 1456 m_LastXRefOffset = 0; 1457 1458 int32_t offset = GetHeaderOffset(pFileAccess); 1459 if (offset == -1) 1460 return FORMAT_ERROR; 1461 1462 if (!IsLinearizedFile(pFileAccess, offset)) { 1463 m_pSyntax->m_pFileAccess = nullptr; 1464 return StartParse(pFileAccess, std::move(pDocument)); 1465 } 1466 m_bHasParsed = true; 1467 m_pDocument = pDocument; 1468 1469 FX_FILESIZE dwFirstXRefOffset = m_pSyntax->SavePos(); 1470 bool bXRefRebuilt = false; 1471 bool bLoadV4 = LoadCrossRefV4(dwFirstXRefOffset, 0, false); 1472 if (!bLoadV4 && !LoadCrossRefV5(&dwFirstXRefOffset, true)) { 1473 if (!RebuildCrossRef()) 1474 return FORMAT_ERROR; 1475 1476 bXRefRebuilt = true; 1477 m_LastXRefOffset = 0; 1478 } 1479 1480 if (bLoadV4) { 1481 m_pTrailer = LoadTrailerV4(); 1482 if (!m_pTrailer) 1483 return SUCCESS; 1484 1485 int32_t xrefsize = GetDirectInteger(m_pTrailer.get(), "Size"); 1486 if (xrefsize > 0) 1487 ShrinkObjectMap(xrefsize); 1488 } 1489 1490 Error eRet = SetEncryptHandler(); 1491 if (eRet != SUCCESS) 1492 return eRet; 1493 1494 m_pDocument->LoadLinearizedDoc(m_pLinearized.get()); 1495 if (!m_pDocument->GetRoot() || m_pDocument->GetPageCount() == 0) { 1496 if (bXRefRebuilt) 1497 return FORMAT_ERROR; 1498 1499 ReleaseEncryptHandler(); 1500 if (!RebuildCrossRef()) 1501 return FORMAT_ERROR; 1502 1503 eRet = SetEncryptHandler(); 1504 if (eRet != SUCCESS) 1505 return eRet; 1506 1507 m_pDocument->LoadLinearizedDoc(m_pLinearized.get()); 1508 if (!m_pDocument->GetRoot()) 1509 return FORMAT_ERROR; 1510 } 1511 1512 if (GetRootObjNum() == 0) { 1513 ReleaseEncryptHandler(); 1514 if (!RebuildCrossRef() || GetRootObjNum() == 0) 1515 return FORMAT_ERROR; 1516 1517 eRet = SetEncryptHandler(); 1518 if (eRet != SUCCESS) 1519 return eRet; 1520 } 1521 1522 if (m_pSecurityHandler && m_pSecurityHandler->IsMetadataEncrypted()) { 1523 if (CPDF_Reference* pMetadata = 1524 ToReference(m_pDocument->GetRoot()->GetObjectFor("Metadata"))) 1525 m_pSyntax->m_MetadataObjnum = pMetadata->GetRefObjNum(); 1526 } 1527 return SUCCESS; 1528 } 1529 1530 bool CPDF_Parser::LoadLinearizedAllCrossRefV5(FX_FILESIZE xrefpos) { 1531 if (!LoadCrossRefV5(&xrefpos, false)) 1532 return false; 1533 1534 std::set<FX_FILESIZE> seen_xrefpos; 1535 while (xrefpos) { 1536 seen_xrefpos.insert(xrefpos); 1537 if (!LoadCrossRefV5(&xrefpos, false)) 1538 return false; 1539 1540 // Check for circular references. 1541 if (pdfium::ContainsKey(seen_xrefpos, xrefpos)) 1542 return false; 1543 } 1544 m_ObjectStreamMap.clear(); 1545 m_bXRefStream = true; 1546 return true; 1547 } 1548 1549 CPDF_Parser::Error CPDF_Parser::LoadLinearizedMainXRefTable() { 1550 uint32_t dwSaveMetadataObjnum = m_pSyntax->m_MetadataObjnum; 1551 m_pSyntax->m_MetadataObjnum = 0; 1552 m_pTrailer.reset(); 1553 m_pSyntax->RestorePos(m_LastXRefOffset - m_pSyntax->m_HeaderOffset); 1554 1555 uint8_t ch = 0; 1556 uint32_t dwCount = 0; 1557 m_pSyntax->GetNextChar(ch); 1558 while (PDFCharIsWhitespace(ch)) { 1559 ++dwCount; 1560 if (m_pSyntax->m_FileLen <= 1561 (FX_FILESIZE)(m_pSyntax->SavePos() + m_pSyntax->m_HeaderOffset)) { 1562 break; 1563 } 1564 m_pSyntax->GetNextChar(ch); 1565 } 1566 m_LastXRefOffset += dwCount; 1567 m_ObjectStreamMap.clear(); 1568 m_ObjCache.clear(); 1569 1570 if (!LoadLinearizedAllCrossRefV4(m_LastXRefOffset, m_dwXrefStartObjNum) && 1571 !LoadLinearizedAllCrossRefV5(m_LastXRefOffset)) { 1572 m_LastXRefOffset = 0; 1573 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; 1574 return FORMAT_ERROR; 1575 } 1576 1577 m_pSyntax->m_MetadataObjnum = dwSaveMetadataObjnum; 1578 return SUCCESS; 1579 } 1580