1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfapi/parser/cpdf_syntax_parser.h" 8 9 #include <algorithm> 10 #include <utility> 11 #include <vector> 12 13 #include "core/fpdfapi/cpdf_modulemgr.h" 14 #include "core/fpdfapi/parser/cpdf_array.h" 15 #include "core/fpdfapi/parser/cpdf_boolean.h" 16 #include "core/fpdfapi/parser/cpdf_crypto_handler.h" 17 #include "core/fpdfapi/parser/cpdf_dictionary.h" 18 #include "core/fpdfapi/parser/cpdf_name.h" 19 #include "core/fpdfapi/parser/cpdf_null.h" 20 #include "core/fpdfapi/parser/cpdf_number.h" 21 #include "core/fpdfapi/parser/cpdf_reference.h" 22 #include "core/fpdfapi/parser/cpdf_stream.h" 23 #include "core/fpdfapi/parser/cpdf_string.h" 24 #include "core/fpdfapi/parser/fpdf_parser_decode.h" 25 #include "core/fpdfapi/parser/fpdf_parser_utility.h" 26 #include "core/fxcrt/fx_ext.h" 27 #include "third_party/base/numerics/safe_math.h" 28 #include "third_party/base/ptr_util.h" 29 30 namespace { 31 32 enum class ReadStatus { Normal, Backslash, Octal, FinishOctal, CarriageReturn }; 33 34 } // namespace 35 36 // static 37 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0; 38 39 CPDF_SyntaxParser::CPDF_SyntaxParser() 40 : CPDF_SyntaxParser(CFX_WeakPtr<CFX_ByteStringPool>()) {} 41 42 CPDF_SyntaxParser::CPDF_SyntaxParser( 43 const CFX_WeakPtr<CFX_ByteStringPool>& pPool) 44 : m_MetadataObjnum(0), 45 m_pFileAccess(nullptr), 46 m_pFileBuf(nullptr), 47 m_BufSize(CPDF_ModuleMgr::kFileBufSize), 48 m_pPool(pPool) {} 49 50 CPDF_SyntaxParser::~CPDF_SyntaxParser() { 51 FX_Free(m_pFileBuf); 52 } 53 54 bool CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) { 55 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); 56 m_Pos = pos; 57 return GetNextChar(ch); 58 } 59 60 bool CPDF_SyntaxParser::ReadChar(FX_FILESIZE read_pos, uint32_t read_size) { 61 if (static_cast<FX_FILESIZE>(read_pos + read_size) > m_FileLen) { 62 if (m_FileLen < static_cast<FX_FILESIZE>(read_size)) { 63 read_pos = 0; 64 read_size = static_cast<uint32_t>(m_FileLen); 65 } else { 66 read_pos = m_FileLen - read_size; 67 } 68 } 69 if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size)) 70 return false; 71 72 m_BufOffset = read_pos; 73 return true; 74 } 75 76 bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) { 77 FX_FILESIZE pos = m_Pos + m_HeaderOffset; 78 if (pos >= m_FileLen) 79 return false; 80 81 if (CheckPosition(pos)) { 82 FX_FILESIZE read_pos = pos; 83 uint32_t read_size = m_BufSize; 84 read_size = std::min(read_size, static_cast<uint32_t>(m_FileLen)); 85 if (!ReadChar(read_pos, read_size)) 86 return false; 87 } 88 ch = m_pFileBuf[pos - m_BufOffset]; 89 m_Pos++; 90 return true; 91 } 92 93 bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) { 94 pos += m_HeaderOffset; 95 if (pos >= m_FileLen) 96 return false; 97 98 if (CheckPosition(pos)) { 99 FX_FILESIZE read_pos; 100 if (pos < static_cast<FX_FILESIZE>(m_BufSize)) 101 read_pos = 0; 102 else 103 read_pos = pos - m_BufSize + 1; 104 uint32_t read_size = m_BufSize; 105 if (!ReadChar(read_pos, read_size)) 106 return false; 107 } 108 ch = m_pFileBuf[pos - m_BufOffset]; 109 return true; 110 } 111 112 bool CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) { 113 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) 114 return false; 115 m_Pos += size; 116 return true; 117 } 118 119 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) { 120 m_WordSize = 0; 121 if (bIsNumber) 122 *bIsNumber = true; 123 124 uint8_t ch; 125 if (!GetNextChar(ch)) 126 return; 127 128 while (1) { 129 while (PDFCharIsWhitespace(ch)) { 130 if (!GetNextChar(ch)) 131 return; 132 } 133 134 if (ch != '%') 135 break; 136 137 while (1) { 138 if (!GetNextChar(ch)) 139 return; 140 if (PDFCharIsLineEnding(ch)) 141 break; 142 } 143 } 144 145 if (PDFCharIsDelimiter(ch)) { 146 if (bIsNumber) 147 *bIsNumber = false; 148 149 m_WordBuffer[m_WordSize++] = ch; 150 if (ch == '/') { 151 while (1) { 152 if (!GetNextChar(ch)) 153 return; 154 155 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { 156 m_Pos--; 157 return; 158 } 159 160 if (m_WordSize < sizeof(m_WordBuffer) - 1) 161 m_WordBuffer[m_WordSize++] = ch; 162 } 163 } else if (ch == '<') { 164 if (!GetNextChar(ch)) 165 return; 166 167 if (ch == '<') 168 m_WordBuffer[m_WordSize++] = ch; 169 else 170 m_Pos--; 171 } else if (ch == '>') { 172 if (!GetNextChar(ch)) 173 return; 174 175 if (ch == '>') 176 m_WordBuffer[m_WordSize++] = ch; 177 else 178 m_Pos--; 179 } 180 return; 181 } 182 183 while (1) { 184 if (m_WordSize < sizeof(m_WordBuffer) - 1) 185 m_WordBuffer[m_WordSize++] = ch; 186 187 if (!PDFCharIsNumeric(ch)) { 188 if (bIsNumber) 189 *bIsNumber = false; 190 } 191 192 if (!GetNextChar(ch)) 193 return; 194 195 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { 196 m_Pos--; 197 break; 198 } 199 } 200 } 201 202 CFX_ByteString CPDF_SyntaxParser::ReadString() { 203 uint8_t ch; 204 if (!GetNextChar(ch)) 205 return CFX_ByteString(); 206 207 CFX_ByteTextBuf buf; 208 int32_t parlevel = 0; 209 ReadStatus status = ReadStatus::Normal; 210 int32_t iEscCode = 0; 211 while (1) { 212 switch (status) { 213 case ReadStatus::Normal: 214 if (ch == ')') { 215 if (parlevel == 0) 216 return buf.MakeString(); 217 parlevel--; 218 } else if (ch == '(') { 219 parlevel++; 220 } 221 if (ch == '\\') 222 status = ReadStatus::Backslash; 223 else 224 buf.AppendChar(ch); 225 break; 226 case ReadStatus::Backslash: 227 if (ch >= '0' && ch <= '7') { 228 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); 229 status = ReadStatus::Octal; 230 break; 231 } 232 233 if (ch == 'n') { 234 buf.AppendChar('\n'); 235 } else if (ch == 'r') { 236 buf.AppendChar('\r'); 237 } else if (ch == 't') { 238 buf.AppendChar('\t'); 239 } else if (ch == 'b') { 240 buf.AppendChar('\b'); 241 } else if (ch == 'f') { 242 buf.AppendChar('\f'); 243 } else if (ch == '\r') { 244 status = ReadStatus::CarriageReturn; 245 break; 246 } else if (ch != '\n') { 247 buf.AppendChar(ch); 248 } 249 status = ReadStatus::Normal; 250 break; 251 case ReadStatus::Octal: 252 if (ch >= '0' && ch <= '7') { 253 iEscCode = 254 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); 255 status = ReadStatus::FinishOctal; 256 } else { 257 buf.AppendChar(iEscCode); 258 status = ReadStatus::Normal; 259 continue; 260 } 261 break; 262 case ReadStatus::FinishOctal: 263 status = ReadStatus::Normal; 264 if (ch >= '0' && ch <= '7') { 265 iEscCode = 266 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); 267 buf.AppendChar(iEscCode); 268 } else { 269 buf.AppendChar(iEscCode); 270 continue; 271 } 272 break; 273 case ReadStatus::CarriageReturn: 274 status = ReadStatus::Normal; 275 if (ch != '\n') 276 continue; 277 break; 278 } 279 280 if (!GetNextChar(ch)) 281 break; 282 } 283 284 GetNextChar(ch); 285 return buf.MakeString(); 286 } 287 288 CFX_ByteString CPDF_SyntaxParser::ReadHexString() { 289 uint8_t ch; 290 if (!GetNextChar(ch)) 291 return CFX_ByteString(); 292 293 CFX_ByteTextBuf buf; 294 bool bFirst = true; 295 uint8_t code = 0; 296 while (1) { 297 if (ch == '>') 298 break; 299 300 if (std::isxdigit(ch)) { 301 int val = FXSYS_toHexDigit(ch); 302 if (bFirst) { 303 code = val * 16; 304 } else { 305 code += val; 306 buf.AppendByte(code); 307 } 308 bFirst = !bFirst; 309 } 310 311 if (!GetNextChar(ch)) 312 break; 313 } 314 if (!bFirst) 315 buf.AppendByte(code); 316 317 return buf.MakeString(); 318 } 319 320 void CPDF_SyntaxParser::ToNextLine() { 321 uint8_t ch; 322 while (GetNextChar(ch)) { 323 if (ch == '\n') 324 break; 325 326 if (ch == '\r') { 327 GetNextChar(ch); 328 if (ch != '\n') 329 --m_Pos; 330 break; 331 } 332 } 333 } 334 335 void CPDF_SyntaxParser::ToNextWord() { 336 uint8_t ch; 337 if (!GetNextChar(ch)) 338 return; 339 340 while (1) { 341 while (PDFCharIsWhitespace(ch)) { 342 if (!GetNextChar(ch)) 343 return; 344 } 345 346 if (ch != '%') 347 break; 348 349 while (1) { 350 if (!GetNextChar(ch)) 351 return; 352 if (PDFCharIsLineEnding(ch)) 353 break; 354 } 355 } 356 m_Pos--; 357 } 358 359 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) { 360 GetNextWordInternal(bIsNumber); 361 return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize); 362 } 363 364 CFX_ByteString CPDF_SyntaxParser::GetKeyword() { 365 return GetNextWord(nullptr); 366 } 367 368 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObject( 369 CPDF_IndirectObjectHolder* pObjList, 370 uint32_t objnum, 371 uint32_t gennum, 372 bool bDecrypt) { 373 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); 374 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) 375 return nullptr; 376 377 FX_FILESIZE SavedObjPos = m_Pos; 378 bool bIsNumber; 379 CFX_ByteString word = GetNextWord(&bIsNumber); 380 if (word.GetLength() == 0) 381 return nullptr; 382 383 if (bIsNumber) { 384 FX_FILESIZE SavedPos = m_Pos; 385 CFX_ByteString nextword = GetNextWord(&bIsNumber); 386 if (bIsNumber) { 387 CFX_ByteString nextword2 = GetNextWord(nullptr); 388 if (nextword2 == "R") { 389 uint32_t objnum = FXSYS_atoui(word.c_str()); 390 if (objnum == CPDF_Object::kInvalidObjNum) 391 return nullptr; 392 return pdfium::MakeUnique<CPDF_Reference>(pObjList, objnum); 393 } 394 } 395 m_Pos = SavedPos; 396 return pdfium::MakeUnique<CPDF_Number>(word.AsStringC()); 397 } 398 399 if (word == "true" || word == "false") 400 return pdfium::MakeUnique<CPDF_Boolean>(word == "true"); 401 402 if (word == "null") 403 return pdfium::MakeUnique<CPDF_Null>(); 404 405 if (word == "(") { 406 CFX_ByteString str = ReadString(); 407 if (m_pCryptoHandler && bDecrypt) 408 m_pCryptoHandler->Decrypt(objnum, gennum, str); 409 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false); 410 } 411 if (word == "<") { 412 CFX_ByteString str = ReadHexString(); 413 if (m_pCryptoHandler && bDecrypt) 414 m_pCryptoHandler->Decrypt(objnum, gennum, str); 415 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, true); 416 } 417 if (word == "[") { 418 std::unique_ptr<CPDF_Array> pArray = pdfium::MakeUnique<CPDF_Array>(); 419 while (std::unique_ptr<CPDF_Object> pObj = 420 GetObject(pObjList, objnum, gennum, true)) { 421 pArray->Add(std::move(pObj)); 422 } 423 return std::move(pArray); 424 } 425 if (word[0] == '/') { 426 return pdfium::MakeUnique<CPDF_Name>( 427 m_pPool, 428 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); 429 } 430 if (word == "<<") { 431 int32_t nKeys = 0; 432 FX_FILESIZE dwSignValuePos = 0; 433 std::unique_ptr<CPDF_Dictionary> pDict = 434 pdfium::MakeUnique<CPDF_Dictionary>(m_pPool); 435 while (1) { 436 CFX_ByteString key = GetNextWord(nullptr); 437 if (key.IsEmpty()) 438 return nullptr; 439 440 FX_FILESIZE SavedPos = m_Pos - key.GetLength(); 441 if (key == ">>") 442 break; 443 444 if (key == "endobj") { 445 m_Pos = SavedPos; 446 break; 447 } 448 if (key[0] != '/') 449 continue; 450 451 ++nKeys; 452 key = PDF_NameDecode(key); 453 if (key.IsEmpty()) 454 continue; 455 456 if (key == "/Contents") 457 dwSignValuePos = m_Pos; 458 459 std::unique_ptr<CPDF_Object> pObj = 460 GetObject(pObjList, objnum, gennum, true); 461 if (!pObj) 462 continue; 463 464 CFX_ByteString keyNoSlash(key.raw_str() + 1, key.GetLength() - 1); 465 pDict->SetFor(keyNoSlash, std::move(pObj)); 466 } 467 468 // Only when this is a signature dictionary and has contents, we reset the 469 // contents to the un-decrypted form. 470 if (pDict->IsSignatureDict() && dwSignValuePos) { 471 CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); 472 m_Pos = dwSignValuePos; 473 pDict->SetFor("Contents", GetObject(pObjList, objnum, gennum, false)); 474 } 475 476 FX_FILESIZE SavedPos = m_Pos; 477 CFX_ByteString nextword = GetNextWord(nullptr); 478 if (nextword != "stream") { 479 m_Pos = SavedPos; 480 return std::move(pDict); 481 } 482 return ReadStream(std::move(pDict), objnum, gennum); 483 } 484 if (word == ">>") 485 m_Pos = SavedObjPos; 486 487 return nullptr; 488 } 489 490 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObjectForStrict( 491 CPDF_IndirectObjectHolder* pObjList, 492 uint32_t objnum, 493 uint32_t gennum) { 494 CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth); 495 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) 496 return nullptr; 497 498 FX_FILESIZE SavedObjPos = m_Pos; 499 bool bIsNumber; 500 CFX_ByteString word = GetNextWord(&bIsNumber); 501 if (word.GetLength() == 0) 502 return nullptr; 503 504 if (bIsNumber) { 505 FX_FILESIZE SavedPos = m_Pos; 506 CFX_ByteString nextword = GetNextWord(&bIsNumber); 507 if (bIsNumber) { 508 CFX_ByteString nextword2 = GetNextWord(nullptr); 509 if (nextword2 == "R") { 510 uint32_t objnum = FXSYS_atoui(word.c_str()); 511 if (objnum == CPDF_Object::kInvalidObjNum) 512 return nullptr; 513 return pdfium::MakeUnique<CPDF_Reference>(pObjList, objnum); 514 } 515 } 516 m_Pos = SavedPos; 517 return pdfium::MakeUnique<CPDF_Number>(word.AsStringC()); 518 } 519 520 if (word == "true" || word == "false") 521 return pdfium::MakeUnique<CPDF_Boolean>(word == "true"); 522 523 if (word == "null") 524 return pdfium::MakeUnique<CPDF_Null>(); 525 526 if (word == "(") { 527 CFX_ByteString str = ReadString(); 528 if (m_pCryptoHandler) 529 m_pCryptoHandler->Decrypt(objnum, gennum, str); 530 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false); 531 } 532 if (word == "<") { 533 CFX_ByteString str = ReadHexString(); 534 if (m_pCryptoHandler) 535 m_pCryptoHandler->Decrypt(objnum, gennum, str); 536 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, true); 537 } 538 if (word == "[") { 539 std::unique_ptr<CPDF_Array> pArray = pdfium::MakeUnique<CPDF_Array>(); 540 while (std::unique_ptr<CPDF_Object> pObj = 541 GetObject(pObjList, objnum, gennum, true)) { 542 pArray->Add(std::move(pObj)); 543 } 544 return m_WordBuffer[0] == ']' ? std::move(pArray) : nullptr; 545 } 546 if (word[0] == '/') { 547 return pdfium::MakeUnique<CPDF_Name>( 548 m_pPool, 549 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); 550 } 551 if (word == "<<") { 552 std::unique_ptr<CPDF_Dictionary> pDict = 553 pdfium::MakeUnique<CPDF_Dictionary>(m_pPool); 554 while (1) { 555 FX_FILESIZE SavedPos = m_Pos; 556 CFX_ByteString key = GetNextWord(nullptr); 557 if (key.IsEmpty()) 558 return nullptr; 559 560 if (key == ">>") 561 break; 562 563 if (key == "endobj") { 564 m_Pos = SavedPos; 565 break; 566 } 567 if (key[0] != '/') 568 continue; 569 570 key = PDF_NameDecode(key); 571 std::unique_ptr<CPDF_Object> obj( 572 GetObject(pObjList, objnum, gennum, true)); 573 if (!obj) { 574 uint8_t ch; 575 while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) { 576 continue; 577 } 578 return nullptr; 579 } 580 581 if (key.GetLength() > 1) { 582 pDict->SetFor(CFX_ByteString(key.c_str() + 1, key.GetLength() - 1), 583 std::move(obj)); 584 } 585 } 586 587 FX_FILESIZE SavedPos = m_Pos; 588 CFX_ByteString nextword = GetNextWord(nullptr); 589 if (nextword != "stream") { 590 m_Pos = SavedPos; 591 return std::move(pDict); 592 } 593 return ReadStream(std::move(pDict), objnum, gennum); 594 } 595 if (word == ">>") 596 m_Pos = SavedObjPos; 597 598 return nullptr; 599 } 600 601 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) { 602 unsigned char byte1 = 0; 603 unsigned char byte2 = 0; 604 605 GetCharAt(pos, byte1); 606 GetCharAt(pos + 1, byte2); 607 608 if (byte1 == '\r' && byte2 == '\n') 609 return 2; 610 611 if (byte1 == '\r' || byte1 == '\n') 612 return 1; 613 614 return 0; 615 } 616 617 std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream( 618 std::unique_ptr<CPDF_Dictionary> pDict, 619 uint32_t objnum, 620 uint32_t gennum) { 621 CPDF_Object* pLenObj = pDict->GetObjectFor("Length"); 622 FX_FILESIZE len = -1; 623 CPDF_Reference* pLenObjRef = ToReference(pLenObj); 624 625 bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() && 626 pLenObjRef->GetRefObjNum() != objnum); 627 if (pLenObj && differingObjNum) 628 len = pLenObj->GetInteger(); 629 630 // Locate the start of stream. 631 ToNextLine(); 632 FX_FILESIZE streamStartPos = m_Pos; 633 634 const CFX_ByteStringC kEndStreamStr("endstream"); 635 const CFX_ByteStringC kEndObjStr("endobj"); 636 637 CPDF_CryptoHandler* pCryptoHandler = 638 objnum == m_MetadataObjnum ? nullptr : m_pCryptoHandler.get(); 639 if (!pCryptoHandler) { 640 bool bSearchForKeyword = true; 641 if (len >= 0) { 642 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos; 643 pos += len; 644 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) 645 m_Pos = pos.ValueOrDie(); 646 647 m_Pos += ReadEOLMarkers(m_Pos); 648 FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1); 649 GetNextWordInternal(nullptr); 650 // Earlier version of PDF specification doesn't require EOL marker before 651 // 'endstream' keyword. If keyword 'endstream' follows the bytes in 652 // specified length, it signals the end of stream. 653 if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.raw_str(), 654 kEndStreamStr.GetLength()) == 0) { 655 bSearchForKeyword = false; 656 } 657 } 658 659 if (bSearchForKeyword) { 660 // If len is not available, len needs to be calculated 661 // by searching the keywords "endstream" or "endobj". 662 m_Pos = streamStartPos; 663 FX_FILESIZE endStreamOffset = 0; 664 while (endStreamOffset >= 0) { 665 endStreamOffset = FindTag(kEndStreamStr, 0); 666 667 // Can't find "endstream". 668 if (endStreamOffset < 0) 669 break; 670 671 // Stop searching when "endstream" is found. 672 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen, 673 kEndStreamStr, true)) { 674 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength(); 675 break; 676 } 677 } 678 679 m_Pos = streamStartPos; 680 FX_FILESIZE endObjOffset = 0; 681 while (endObjOffset >= 0) { 682 endObjOffset = FindTag(kEndObjStr, 0); 683 684 // Can't find "endobj". 685 if (endObjOffset < 0) 686 break; 687 688 // Stop searching when "endobj" is found. 689 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr, 690 true)) { 691 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength(); 692 break; 693 } 694 } 695 696 // Can't find "endstream" or "endobj". 697 if (endStreamOffset < 0 && endObjOffset < 0) 698 return nullptr; 699 700 if (endStreamOffset < 0 && endObjOffset >= 0) { 701 // Correct the position of end stream. 702 endStreamOffset = endObjOffset; 703 } else if (endStreamOffset >= 0 && endObjOffset < 0) { 704 // Correct the position of end obj. 705 endObjOffset = endStreamOffset; 706 } else if (endStreamOffset > endObjOffset) { 707 endStreamOffset = endObjOffset; 708 } 709 len = endStreamOffset; 710 711 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); 712 if (numMarkers == 2) { 713 len -= 2; 714 } else { 715 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1); 716 if (numMarkers == 1) { 717 len -= 1; 718 } 719 } 720 if (len < 0) 721 return nullptr; 722 723 pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(len)); 724 } 725 m_Pos = streamStartPos; 726 } 727 if (len < 0) 728 return nullptr; 729 730 std::unique_ptr<uint8_t, FxFreeDeleter> pData; 731 if (len > 0) { 732 pData.reset(FX_Alloc(uint8_t, len)); 733 ReadBlock(pData.get(), len); 734 if (pCryptoHandler) { 735 CFX_BinaryBuf dest_buf; 736 dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len)); 737 738 void* context = pCryptoHandler->DecryptStart(objnum, gennum); 739 pCryptoHandler->DecryptStream(context, pData.get(), len, dest_buf); 740 pCryptoHandler->DecryptFinish(context, dest_buf); 741 len = dest_buf.GetSize(); 742 pData = dest_buf.DetachBuffer(); 743 } 744 } 745 746 auto pStream = 747 pdfium::MakeUnique<CPDF_Stream>(std::move(pData), len, std::move(pDict)); 748 streamStartPos = m_Pos; 749 FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1); 750 GetNextWordInternal(nullptr); 751 752 int numMarkers = ReadEOLMarkers(m_Pos); 753 if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) && 754 numMarkers != 0 && 755 FXSYS_memcmp(m_WordBuffer, kEndObjStr.raw_str(), 756 kEndObjStr.GetLength()) == 0) { 757 m_Pos = streamStartPos; 758 } 759 return pStream; 760 } 761 762 void CPDF_SyntaxParser::InitParser( 763 const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess, 764 uint32_t HeaderOffset) { 765 FX_Free(m_pFileBuf); 766 767 m_pFileBuf = FX_Alloc(uint8_t, m_BufSize); 768 m_HeaderOffset = HeaderOffset; 769 m_FileLen = pFileAccess->GetSize(); 770 m_Pos = 0; 771 m_pFileAccess = pFileAccess; 772 m_BufOffset = 0; 773 pFileAccess->ReadBlock(m_pFileBuf, 0, 774 std::min(m_BufSize, static_cast<uint32_t>(m_FileLen))); 775 } 776 777 uint32_t CPDF_SyntaxParser::GetDirectNum() { 778 bool bIsNumber; 779 GetNextWordInternal(&bIsNumber); 780 if (!bIsNumber) 781 return 0; 782 783 m_WordBuffer[m_WordSize] = 0; 784 return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer)); 785 } 786 787 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, 788 FX_FILESIZE limit, 789 const CFX_ByteStringC& tag, 790 bool checkKeyword) { 791 const uint32_t taglen = tag.GetLength(); 792 793 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]); 794 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) && 795 !PDFCharIsWhitespace(tag[taglen - 1]); 796 797 uint8_t ch; 798 if (bCheckRight && startpos + (int32_t)taglen <= limit && 799 GetCharAt(startpos + (int32_t)taglen, ch)) { 800 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || 801 (checkKeyword && PDFCharIsDelimiter(ch))) { 802 return false; 803 } 804 } 805 806 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { 807 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || 808 (checkKeyword && PDFCharIsDelimiter(ch))) { 809 return false; 810 } 811 } 812 return true; 813 } 814 815 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards 816 // and drop the bool. 817 bool CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag, 818 bool bWholeWord, 819 bool bForward, 820 FX_FILESIZE limit) { 821 int32_t taglen = tag.GetLength(); 822 if (taglen == 0) 823 return false; 824 825 FX_FILESIZE pos = m_Pos; 826 int32_t offset = 0; 827 if (!bForward) 828 offset = taglen - 1; 829 830 const uint8_t* tag_data = tag.raw_str(); 831 uint8_t byte; 832 while (1) { 833 if (bForward) { 834 if (limit && pos >= m_Pos + limit) 835 return false; 836 837 if (!GetCharAt(pos, byte)) 838 return false; 839 840 } else { 841 if (limit && pos <= m_Pos - limit) 842 return false; 843 844 if (!GetCharAtBackward(pos, byte)) 845 return false; 846 } 847 848 if (byte == tag_data[offset]) { 849 if (bForward) { 850 offset++; 851 if (offset < taglen) { 852 pos++; 853 continue; 854 } 855 } else { 856 offset--; 857 if (offset >= 0) { 858 pos--; 859 continue; 860 } 861 } 862 863 FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos; 864 if (!bWholeWord || IsWholeWord(startpos, limit, tag, false)) { 865 m_Pos = startpos; 866 return true; 867 } 868 } 869 870 if (bForward) { 871 offset = byte == tag_data[0] ? 1 : 0; 872 pos++; 873 } else { 874 offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1; 875 pos--; 876 } 877 878 if (pos < 0) 879 return false; 880 } 881 882 return false; 883 } 884 885 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag, 886 FX_FILESIZE limit) { 887 int32_t taglen = tag.GetLength(); 888 int32_t match = 0; 889 limit += m_Pos; 890 FX_FILESIZE startpos = m_Pos; 891 892 while (1) { 893 uint8_t ch; 894 if (!GetNextChar(ch)) 895 return -1; 896 897 if (ch == tag[match]) { 898 match++; 899 if (match == taglen) 900 return m_Pos - startpos - taglen; 901 } else { 902 match = ch == tag[0] ? 1 : 0; 903 } 904 905 if (limit && m_Pos == limit) 906 return -1; 907 } 908 return -1; 909 } 910 911 void CPDF_SyntaxParser::SetEncrypt( 912 std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) { 913 m_pCryptoHandler = std::move(pCryptoHandler); 914 } 915