1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfapi/parser/cpdf_syntax_parser.h" 8 9 #include <algorithm> 10 #include <sstream> 11 #include <utility> 12 #include <vector> 13 14 #include "core/fpdfapi/cpdf_modulemgr.h" 15 #include "core/fpdfapi/parser/cpdf_array.h" 16 #include "core/fpdfapi/parser/cpdf_boolean.h" 17 #include "core/fpdfapi/parser/cpdf_crypto_handler.h" 18 #include "core/fpdfapi/parser/cpdf_dictionary.h" 19 #include "core/fpdfapi/parser/cpdf_name.h" 20 #include "core/fpdfapi/parser/cpdf_null.h" 21 #include "core/fpdfapi/parser/cpdf_number.h" 22 #include "core/fpdfapi/parser/cpdf_read_validator.h" 23 #include "core/fpdfapi/parser/cpdf_reference.h" 24 #include "core/fpdfapi/parser/cpdf_stream.h" 25 #include "core/fpdfapi/parser/cpdf_string.h" 26 #include "core/fpdfapi/parser/fpdf_parser_decode.h" 27 #include "core/fpdfapi/parser/fpdf_parser_utility.h" 28 #include "core/fxcrt/autorestorer.h" 29 #include "core/fxcrt/cfx_binarybuf.h" 30 #include "core/fxcrt/fx_extension.h" 31 #include "third_party/base/numerics/safe_math.h" 32 #include "third_party/base/ptr_util.h" 33 34 namespace { 35 36 enum class ReadStatus { Normal, Backslash, Octal, FinishOctal, CarriageReturn }; 37 38 } // namespace 39 40 // static 41 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0; 42 43 CPDF_SyntaxParser::CPDF_SyntaxParser() 44 : CPDF_SyntaxParser(WeakPtr<ByteStringPool>()) {} 45 46 CPDF_SyntaxParser::CPDF_SyntaxParser(const WeakPtr<ByteStringPool>& pPool) 47 : m_pFileAccess(nullptr), m_pPool(pPool) {} 48 49 CPDF_SyntaxParser::~CPDF_SyntaxParser() { 50 } 51 52 bool CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) { 53 AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); 54 m_Pos = pos; 55 return GetNextChar(ch); 56 } 57 58 bool CPDF_SyntaxParser::ReadBlockAt(FX_FILESIZE read_pos) { 59 if (read_pos >= m_FileLen) 60 return false; 61 size_t read_size = CPDF_ModuleMgr::kFileBufSize; 62 FX_SAFE_FILESIZE safe_end = read_pos; 63 safe_end += read_size; 64 if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_FileLen) 65 read_size = m_FileLen - read_pos; 66 67 m_pFileBuf.resize(read_size); 68 if (!m_pFileAccess->ReadBlock(m_pFileBuf.data(), read_pos, read_size)) { 69 m_pFileBuf.clear(); 70 return false; 71 } 72 73 m_BufOffset = read_pos; 74 return true; 75 } 76 77 bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) { 78 FX_FILESIZE pos = m_Pos + m_HeaderOffset; 79 if (pos >= m_FileLen) 80 return false; 81 82 if (!IsPositionRead(pos) && !ReadBlockAt(pos)) 83 return false; 84 85 ch = m_pFileBuf[pos - m_BufOffset]; 86 m_Pos++; 87 return true; 88 } 89 90 bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch) { 91 pos += m_HeaderOffset; 92 if (pos >= m_FileLen) 93 return false; 94 95 if (!IsPositionRead(pos)) { 96 FX_FILESIZE block_start = 0; 97 if (pos >= CPDF_ModuleMgr::kFileBufSize) 98 block_start = pos - CPDF_ModuleMgr::kFileBufSize + 1; 99 if (!ReadBlockAt(block_start) || !IsPositionRead(pos)) 100 return false; 101 } 102 *ch = m_pFileBuf[pos - m_BufOffset]; 103 return true; 104 } 105 106 bool CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) { 107 if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size)) 108 return false; 109 m_Pos += size; 110 return true; 111 } 112 113 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) { 114 m_WordSize = 0; 115 if (bIsNumber) 116 *bIsNumber = true; 117 118 ToNextWord(); 119 uint8_t ch; 120 if (!GetNextChar(ch)) 121 return; 122 123 if (PDFCharIsDelimiter(ch)) { 124 if (bIsNumber) 125 *bIsNumber = false; 126 127 m_WordBuffer[m_WordSize++] = ch; 128 if (ch == '/') { 129 while (1) { 130 if (!GetNextChar(ch)) 131 return; 132 133 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { 134 m_Pos--; 135 return; 136 } 137 138 if (m_WordSize < sizeof(m_WordBuffer) - 1) 139 m_WordBuffer[m_WordSize++] = ch; 140 } 141 } else if (ch == '<') { 142 if (!GetNextChar(ch)) 143 return; 144 145 if (ch == '<') 146 m_WordBuffer[m_WordSize++] = ch; 147 else 148 m_Pos--; 149 } else if (ch == '>') { 150 if (!GetNextChar(ch)) 151 return; 152 153 if (ch == '>') 154 m_WordBuffer[m_WordSize++] = ch; 155 else 156 m_Pos--; 157 } 158 return; 159 } 160 161 while (1) { 162 if (m_WordSize < sizeof(m_WordBuffer) - 1) 163 m_WordBuffer[m_WordSize++] = ch; 164 165 if (!PDFCharIsNumeric(ch)) { 166 if (bIsNumber) 167 *bIsNumber = false; 168 } 169 170 if (!GetNextChar(ch)) 171 return; 172 173 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { 174 m_Pos--; 175 break; 176 } 177 } 178 } 179 180 ByteString CPDF_SyntaxParser::ReadString() { 181 uint8_t ch; 182 if (!GetNextChar(ch)) 183 return ByteString(); 184 185 std::ostringstream buf; 186 int32_t parlevel = 0; 187 ReadStatus status = ReadStatus::Normal; 188 int32_t iEscCode = 0; 189 while (1) { 190 switch (status) { 191 case ReadStatus::Normal: 192 if (ch == ')') { 193 if (parlevel == 0) 194 return ByteString(buf); 195 parlevel--; 196 } else if (ch == '(') { 197 parlevel++; 198 } 199 if (ch == '\\') 200 status = ReadStatus::Backslash; 201 else 202 buf << static_cast<char>(ch); 203 break; 204 case ReadStatus::Backslash: 205 if (ch >= '0' && ch <= '7') { 206 iEscCode = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)); 207 status = ReadStatus::Octal; 208 break; 209 } 210 211 if (ch == '\r') { 212 status = ReadStatus::CarriageReturn; 213 break; 214 } 215 if (ch == 'n') { 216 buf << '\n'; 217 } else if (ch == 'r') { 218 buf << '\r'; 219 } else if (ch == 't') { 220 buf << '\t'; 221 } else if (ch == 'b') { 222 buf << '\b'; 223 } else if (ch == 'f') { 224 buf << '\f'; 225 } else if (ch != '\n') { 226 buf << static_cast<char>(ch); 227 } 228 status = ReadStatus::Normal; 229 break; 230 case ReadStatus::Octal: 231 if (ch >= '0' && ch <= '7') { 232 iEscCode = 233 iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)); 234 status = ReadStatus::FinishOctal; 235 } else { 236 buf << static_cast<char>(iEscCode); 237 status = ReadStatus::Normal; 238 continue; 239 } 240 break; 241 case ReadStatus::FinishOctal: 242 status = ReadStatus::Normal; 243 if (ch >= '0' && ch <= '7') { 244 iEscCode = 245 iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch)); 246 buf << static_cast<char>(iEscCode); 247 } else { 248 buf << static_cast<char>(iEscCode); 249 continue; 250 } 251 break; 252 case ReadStatus::CarriageReturn: 253 status = ReadStatus::Normal; 254 if (ch != '\n') 255 continue; 256 break; 257 } 258 259 if (!GetNextChar(ch)) 260 break; 261 } 262 263 GetNextChar(ch); 264 return ByteString(buf); 265 } 266 267 ByteString CPDF_SyntaxParser::ReadHexString() { 268 uint8_t ch; 269 if (!GetNextChar(ch)) 270 return ByteString(); 271 272 std::ostringstream buf; 273 bool bFirst = true; 274 uint8_t code = 0; 275 while (1) { 276 if (ch == '>') 277 break; 278 279 if (std::isxdigit(ch)) { 280 int val = FXSYS_HexCharToInt(ch); 281 if (bFirst) { 282 code = val * 16; 283 } else { 284 code += val; 285 buf << static_cast<char>(code); 286 } 287 bFirst = !bFirst; 288 } 289 290 if (!GetNextChar(ch)) 291 break; 292 } 293 if (!bFirst) 294 buf << static_cast<char>(code); 295 296 return ByteString(buf); 297 } 298 299 void CPDF_SyntaxParser::ToNextLine() { 300 uint8_t ch; 301 while (GetNextChar(ch)) { 302 if (ch == '\n') 303 break; 304 305 if (ch == '\r') { 306 GetNextChar(ch); 307 if (ch != '\n') 308 --m_Pos; 309 break; 310 } 311 } 312 } 313 314 void CPDF_SyntaxParser::ToNextWord() { 315 uint8_t ch; 316 if (!GetNextChar(ch)) 317 return; 318 319 while (1) { 320 while (PDFCharIsWhitespace(ch)) { 321 if (!GetNextChar(ch)) 322 return; 323 } 324 325 if (ch != '%') 326 break; 327 328 while (1) { 329 if (!GetNextChar(ch)) 330 return; 331 if (PDFCharIsLineEnding(ch)) 332 break; 333 } 334 } 335 m_Pos--; 336 } 337 338 ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) { 339 const CPDF_ReadValidator::Session read_session(GetValidator().Get()); 340 GetNextWordInternal(bIsNumber); 341 ByteString ret; 342 if (!GetValidator()->has_read_problems()) 343 ret = ByteString(m_WordBuffer, m_WordSize); 344 return ret; 345 } 346 347 ByteString CPDF_SyntaxParser::PeekNextWord(bool* bIsNumber) { 348 AutoRestorer<FX_FILESIZE> save_pos(&m_Pos); 349 return GetNextWord(bIsNumber); 350 } 351 352 ByteString CPDF_SyntaxParser::GetKeyword() { 353 return GetNextWord(nullptr); 354 } 355 356 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObjectBody( 357 CPDF_IndirectObjectHolder* pObjList) { 358 const CPDF_ReadValidator::Session read_session(GetValidator().Get()); 359 auto result = GetObjectBodyInternal(pObjList, ParseType::kLoose); 360 if (GetValidator()->has_read_problems()) 361 return nullptr; 362 return result; 363 } 364 365 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObjectBodyInternal( 366 CPDF_IndirectObjectHolder* pObjList, 367 ParseType parse_type) { 368 AutoRestorer<int> restorer(&s_CurrentRecursionDepth); 369 if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth) 370 return nullptr; 371 372 FX_FILESIZE SavedObjPos = m_Pos; 373 bool bIsNumber; 374 ByteString word = GetNextWord(&bIsNumber); 375 if (word.GetLength() == 0) 376 return nullptr; 377 378 if (bIsNumber) { 379 FX_FILESIZE SavedPos = m_Pos; 380 ByteString nextword = GetNextWord(&bIsNumber); 381 if (bIsNumber) { 382 ByteString nextword2 = GetNextWord(nullptr); 383 if (nextword2 == "R") { 384 uint32_t refnum = FXSYS_atoui(word.c_str()); 385 if (refnum == CPDF_Object::kInvalidObjNum) 386 return nullptr; 387 return pdfium::MakeUnique<CPDF_Reference>(pObjList, refnum); 388 } 389 } 390 m_Pos = SavedPos; 391 return pdfium::MakeUnique<CPDF_Number>(word.AsStringView()); 392 } 393 394 if (word == "true" || word == "false") 395 return pdfium::MakeUnique<CPDF_Boolean>(word == "true"); 396 397 if (word == "null") 398 return pdfium::MakeUnique<CPDF_Null>(); 399 400 if (word == "(") { 401 ByteString str = ReadString(); 402 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false); 403 } 404 if (word == "<") { 405 ByteString str = ReadHexString(); 406 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, true); 407 } 408 if (word == "[") { 409 auto pArray = pdfium::MakeUnique<CPDF_Array>(); 410 while (std::unique_ptr<CPDF_Object> pObj = 411 GetObjectBodyInternal(pObjList, ParseType::kLoose)) { 412 pArray->Add(std::move(pObj)); 413 } 414 return (parse_type == ParseType::kLoose || m_WordBuffer[0] == ']') 415 ? std::move(pArray) 416 : nullptr; 417 } 418 if (word[0] == '/') { 419 return pdfium::MakeUnique<CPDF_Name>( 420 m_pPool, 421 PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1))); 422 } 423 if (word == "<<") { 424 std::unique_ptr<CPDF_Dictionary> pDict = 425 pdfium::MakeUnique<CPDF_Dictionary>(m_pPool); 426 while (1) { 427 ByteString key = GetNextWord(nullptr); 428 if (key.IsEmpty()) 429 return nullptr; 430 431 FX_FILESIZE SavedPos = m_Pos - key.GetLength(); 432 if (key == ">>") 433 break; 434 435 if (key == "endobj") { 436 m_Pos = SavedPos; 437 break; 438 } 439 if (key[0] != '/') 440 continue; 441 442 key = PDF_NameDecode(key); 443 444 if (key.IsEmpty() && parse_type == ParseType::kLoose) 445 continue; 446 447 std::unique_ptr<CPDF_Object> pObj = 448 GetObjectBodyInternal(pObjList, ParseType::kLoose); 449 if (!pObj) { 450 if (parse_type == ParseType::kLoose) 451 continue; 452 453 ToNextLine(); 454 return nullptr; 455 } 456 457 if (!key.IsEmpty()) { 458 ByteString keyNoSlash(key.raw_str() + 1, key.GetLength() - 1); 459 pDict->SetFor(keyNoSlash, std::move(pObj)); 460 } 461 } 462 463 FX_FILESIZE SavedPos = m_Pos; 464 ByteString nextword = GetNextWord(nullptr); 465 if (nextword != "stream") { 466 m_Pos = SavedPos; 467 return std::move(pDict); 468 } 469 return ReadStream(std::move(pDict)); 470 } 471 if (word == ">>") 472 m_Pos = SavedObjPos; 473 474 return nullptr; 475 } 476 477 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetIndirectObject( 478 CPDF_IndirectObjectHolder* pObjList, 479 ParseType parse_type) { 480 const CPDF_ReadValidator::Session read_session(GetValidator().Get()); 481 const FX_FILESIZE saved_pos = GetPos(); 482 bool is_number = false; 483 ByteString word = GetNextWord(&is_number); 484 if (!is_number || word.IsEmpty()) { 485 SetPos(saved_pos); 486 return nullptr; 487 } 488 const uint32_t parser_objnum = FXSYS_atoui(word.c_str()); 489 490 word = GetNextWord(&is_number); 491 if (!is_number || word.IsEmpty()) { 492 SetPos(saved_pos); 493 return nullptr; 494 } 495 const uint32_t parser_gennum = FXSYS_atoui(word.c_str()); 496 497 if (GetKeyword() != "obj") { 498 SetPos(saved_pos); 499 return nullptr; 500 } 501 502 std::unique_ptr<CPDF_Object> pObj = 503 GetObjectBodyInternal(pObjList, parse_type); 504 if (pObj) { 505 pObj->SetObjNum(parser_objnum); 506 pObj->SetGenNum(parser_gennum); 507 } 508 509 return GetValidator()->has_read_problems() ? nullptr : std::move(pObj); 510 } 511 512 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) { 513 unsigned char byte1 = 0; 514 unsigned char byte2 = 0; 515 516 GetCharAt(pos, byte1); 517 GetCharAt(pos + 1, byte2); 518 519 if (byte1 == '\r' && byte2 == '\n') 520 return 2; 521 522 if (byte1 == '\r' || byte1 == '\n') 523 return 1; 524 525 return 0; 526 } 527 528 std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream( 529 std::unique_ptr<CPDF_Dictionary> pDict) { 530 const CPDF_Number* pLenObj = ToNumber(pDict->GetDirectObjectFor("Length")); 531 FX_FILESIZE len = pLenObj ? pLenObj->GetInteger() : -1; 532 533 // Locate the start of stream. 534 ToNextLine(); 535 FX_FILESIZE streamStartPos = m_Pos; 536 537 const ByteStringView kEndStreamStr("endstream"); 538 const ByteStringView kEndObjStr("endobj"); 539 540 bool bSearchForKeyword = true; 541 if (len >= 0) { 542 pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos; 543 pos += len; 544 if (pos.IsValid() && pos.ValueOrDie() < m_FileLen) 545 m_Pos = pos.ValueOrDie(); 546 547 m_Pos += ReadEOLMarkers(m_Pos); 548 memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1); 549 GetNextWordInternal(nullptr); 550 // Earlier version of PDF specification doesn't require EOL marker before 551 // 'endstream' keyword. If keyword 'endstream' follows the bytes in 552 // specified length, it signals the end of stream. 553 if (memcmp(m_WordBuffer, kEndStreamStr.raw_str(), 554 kEndStreamStr.GetLength()) == 0) { 555 bSearchForKeyword = false; 556 } 557 } 558 559 if (bSearchForKeyword) { 560 // If len is not available, len needs to be calculated 561 // by searching the keywords "endstream" or "endobj". 562 m_Pos = streamStartPos; 563 FX_FILESIZE endStreamOffset = 0; 564 while (endStreamOffset >= 0) { 565 endStreamOffset = FindTag(kEndStreamStr, 0); 566 567 // Can't find "endstream". 568 if (endStreamOffset < 0) 569 break; 570 571 // Stop searching when "endstream" is found. 572 if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen, 573 kEndStreamStr, true)) { 574 endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength(); 575 break; 576 } 577 } 578 579 m_Pos = streamStartPos; 580 FX_FILESIZE endObjOffset = 0; 581 while (endObjOffset >= 0) { 582 endObjOffset = FindTag(kEndObjStr, 0); 583 584 // Can't find "endobj". 585 if (endObjOffset < 0) 586 break; 587 588 // Stop searching when "endobj" is found. 589 if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr, 590 true)) { 591 endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength(); 592 break; 593 } 594 } 595 596 // Can't find "endstream" or "endobj". 597 if (endStreamOffset < 0 && endObjOffset < 0) 598 return nullptr; 599 600 if (endStreamOffset < 0 && endObjOffset >= 0) { 601 // Correct the position of end stream. 602 endStreamOffset = endObjOffset; 603 } else if (endStreamOffset >= 0 && endObjOffset < 0) { 604 // Correct the position of end obj. 605 endObjOffset = endStreamOffset; 606 } else if (endStreamOffset > endObjOffset) { 607 endStreamOffset = endObjOffset; 608 } 609 len = endStreamOffset; 610 611 int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2); 612 if (numMarkers == 2) { 613 len -= 2; 614 } else { 615 numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1); 616 if (numMarkers == 1) { 617 len -= 1; 618 } 619 } 620 if (len < 0) 621 return nullptr; 622 623 pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(len)); 624 } 625 m_Pos = streamStartPos; 626 627 // Read up to the end of the buffer. Note, we allow zero length streams as 628 // we need to pass them through when we are importing pages into a new 629 // document. 630 len = std::min(len, m_FileLen - m_Pos - m_HeaderOffset); 631 if (len < 0) 632 return nullptr; 633 634 std::unique_ptr<uint8_t, FxFreeDeleter> pData; 635 if (len > 0) { 636 pData.reset(FX_Alloc(uint8_t, len)); 637 ReadBlock(pData.get(), len); 638 } 639 auto pStream = 640 pdfium::MakeUnique<CPDF_Stream>(std::move(pData), len, std::move(pDict)); 641 streamStartPos = m_Pos; 642 memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1); 643 GetNextWordInternal(nullptr); 644 645 int numMarkers = ReadEOLMarkers(m_Pos); 646 if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) && 647 numMarkers != 0 && 648 memcmp(m_WordBuffer, kEndObjStr.raw_str(), kEndObjStr.GetLength()) == 0) { 649 m_Pos = streamStartPos; 650 } 651 return pStream; 652 } 653 654 void CPDF_SyntaxParser::InitParser( 655 const RetainPtr<IFX_SeekableReadStream>& pFileAccess, 656 uint32_t HeaderOffset) { 657 ASSERT(pFileAccess); 658 return InitParserWithValidator( 659 pdfium::MakeRetain<CPDF_ReadValidator>(pFileAccess, nullptr), 660 HeaderOffset); 661 } 662 663 void CPDF_SyntaxParser::InitParserWithValidator( 664 const RetainPtr<CPDF_ReadValidator>& validator, 665 uint32_t HeaderOffset) { 666 ASSERT(validator); 667 m_pFileBuf.clear(); 668 m_HeaderOffset = HeaderOffset; 669 m_FileLen = validator->GetSize(); 670 m_Pos = 0; 671 m_pFileAccess = validator; 672 m_BufOffset = 0; 673 } 674 675 uint32_t CPDF_SyntaxParser::GetDirectNum() { 676 bool bIsNumber; 677 GetNextWordInternal(&bIsNumber); 678 if (!bIsNumber) 679 return 0; 680 681 m_WordBuffer[m_WordSize] = 0; 682 return FXSYS_atoui(reinterpret_cast<const char*>(m_WordBuffer)); 683 } 684 685 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos, 686 FX_FILESIZE limit, 687 const ByteStringView& tag, 688 bool checkKeyword) { 689 const uint32_t taglen = tag.GetLength(); 690 691 bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]); 692 bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) && 693 !PDFCharIsWhitespace(tag[taglen - 1]); 694 695 uint8_t ch; 696 if (bCheckRight && startpos + (int32_t)taglen <= limit && 697 GetCharAt(startpos + (int32_t)taglen, ch)) { 698 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || 699 (checkKeyword && PDFCharIsDelimiter(ch))) { 700 return false; 701 } 702 } 703 704 if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) { 705 if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) || 706 (checkKeyword && PDFCharIsDelimiter(ch))) { 707 return false; 708 } 709 } 710 return true; 711 } 712 713 bool CPDF_SyntaxParser::BackwardsSearchToWord(const ByteStringView& tag, 714 FX_FILESIZE limit) { 715 int32_t taglen = tag.GetLength(); 716 if (taglen == 0) 717 return false; 718 719 FX_FILESIZE pos = m_Pos; 720 int32_t offset = taglen - 1; 721 while (1) { 722 if (limit && pos <= m_Pos - limit) 723 return false; 724 725 uint8_t byte; 726 if (!GetCharAtBackward(pos, &byte)) 727 return false; 728 729 if (byte == tag[offset]) { 730 offset--; 731 if (offset >= 0) { 732 pos--; 733 continue; 734 } 735 if (IsWholeWord(pos, limit, tag, false)) { 736 m_Pos = pos; 737 return true; 738 } 739 } 740 offset = byte == tag[taglen - 1] ? taglen - 2 : taglen - 1; 741 pos--; 742 if (pos < 0) 743 return false; 744 } 745 } 746 747 FX_FILESIZE CPDF_SyntaxParser::FindTag(const ByteStringView& tag, 748 FX_FILESIZE limit) { 749 int32_t taglen = tag.GetLength(); 750 int32_t match = 0; 751 limit += m_Pos; 752 FX_FILESIZE startpos = m_Pos; 753 754 while (1) { 755 uint8_t ch; 756 if (!GetNextChar(ch)) 757 return -1; 758 759 if (ch == tag[match]) { 760 match++; 761 if (match == taglen) 762 return m_Pos - startpos - taglen; 763 } else { 764 match = ch == tag[0] ? 1 : 0; 765 } 766 767 if (limit && m_Pos == limit) 768 return -1; 769 } 770 return -1; 771 } 772 773 RetainPtr<IFX_SeekableReadStream> CPDF_SyntaxParser::GetFileAccess() const { 774 return m_pFileAccess; 775 } 776 777 bool CPDF_SyntaxParser::IsPositionRead(FX_FILESIZE pos) const { 778 return m_BufOffset <= pos && 779 pos < static_cast<FX_FILESIZE>(m_BufOffset + m_pFileBuf.size()); 780 } 781