Home | History | Annotate | Download | only in parser
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
      8 
      9 #include <algorithm>
     10 #include <sstream>
     11 #include <utility>
     12 #include <vector>
     13 
     14 #include "core/fpdfapi/cpdf_modulemgr.h"
     15 #include "core/fpdfapi/parser/cpdf_array.h"
     16 #include "core/fpdfapi/parser/cpdf_boolean.h"
     17 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
     18 #include "core/fpdfapi/parser/cpdf_dictionary.h"
     19 #include "core/fpdfapi/parser/cpdf_name.h"
     20 #include "core/fpdfapi/parser/cpdf_null.h"
     21 #include "core/fpdfapi/parser/cpdf_number.h"
     22 #include "core/fpdfapi/parser/cpdf_read_validator.h"
     23 #include "core/fpdfapi/parser/cpdf_reference.h"
     24 #include "core/fpdfapi/parser/cpdf_stream.h"
     25 #include "core/fpdfapi/parser/cpdf_string.h"
     26 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
     27 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
     28 #include "core/fxcrt/autorestorer.h"
     29 #include "core/fxcrt/cfx_binarybuf.h"
     30 #include "core/fxcrt/fx_extension.h"
     31 #include "third_party/base/numerics/safe_math.h"
     32 #include "third_party/base/ptr_util.h"
     33 
     34 namespace {
     35 
     36 enum class ReadStatus { Normal, Backslash, Octal, FinishOctal, CarriageReturn };
     37 
     38 }  // namespace
     39 
     40 // static
     41 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
     42 
     43 CPDF_SyntaxParser::CPDF_SyntaxParser()
     44     : CPDF_SyntaxParser(WeakPtr<ByteStringPool>()) {}
     45 
     46 CPDF_SyntaxParser::CPDF_SyntaxParser(const WeakPtr<ByteStringPool>& pPool)
     47     : m_pFileAccess(nullptr), m_pPool(pPool) {}
     48 
     49 CPDF_SyntaxParser::~CPDF_SyntaxParser() {
     50 }
     51 
     52 bool CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
     53   AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
     54   m_Pos = pos;
     55   return GetNextChar(ch);
     56 }
     57 
     58 bool CPDF_SyntaxParser::ReadBlockAt(FX_FILESIZE read_pos) {
     59   if (read_pos >= m_FileLen)
     60     return false;
     61   size_t read_size = CPDF_ModuleMgr::kFileBufSize;
     62   FX_SAFE_FILESIZE safe_end = read_pos;
     63   safe_end += read_size;
     64   if (!safe_end.IsValid() || safe_end.ValueOrDie() > m_FileLen)
     65     read_size = m_FileLen - read_pos;
     66 
     67   m_pFileBuf.resize(read_size);
     68   if (!m_pFileAccess->ReadBlock(m_pFileBuf.data(), read_pos, read_size)) {
     69     m_pFileBuf.clear();
     70     return false;
     71   }
     72 
     73   m_BufOffset = read_pos;
     74   return true;
     75 }
     76 
     77 bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
     78   FX_FILESIZE pos = m_Pos + m_HeaderOffset;
     79   if (pos >= m_FileLen)
     80     return false;
     81 
     82   if (!IsPositionRead(pos) && !ReadBlockAt(pos))
     83     return false;
     84 
     85   ch = m_pFileBuf[pos - m_BufOffset];
     86   m_Pos++;
     87   return true;
     88 }
     89 
     90 bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t* ch) {
     91   pos += m_HeaderOffset;
     92   if (pos >= m_FileLen)
     93     return false;
     94 
     95   if (!IsPositionRead(pos)) {
     96     FX_FILESIZE block_start = 0;
     97     if (pos >= CPDF_ModuleMgr::kFileBufSize)
     98       block_start = pos - CPDF_ModuleMgr::kFileBufSize + 1;
     99     if (!ReadBlockAt(block_start) || !IsPositionRead(pos))
    100       return false;
    101   }
    102   *ch = m_pFileBuf[pos - m_BufOffset];
    103   return true;
    104 }
    105 
    106 bool CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) {
    107   if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))
    108     return false;
    109   m_Pos += size;
    110   return true;
    111 }
    112 
    113 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
    114   m_WordSize = 0;
    115   if (bIsNumber)
    116     *bIsNumber = true;
    117 
    118   ToNextWord();
    119   uint8_t ch;
    120   if (!GetNextChar(ch))
    121     return;
    122 
    123   if (PDFCharIsDelimiter(ch)) {
    124     if (bIsNumber)
    125       *bIsNumber = false;
    126 
    127     m_WordBuffer[m_WordSize++] = ch;
    128     if (ch == '/') {
    129       while (1) {
    130         if (!GetNextChar(ch))
    131           return;
    132 
    133         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
    134           m_Pos--;
    135           return;
    136         }
    137 
    138         if (m_WordSize < sizeof(m_WordBuffer) - 1)
    139           m_WordBuffer[m_WordSize++] = ch;
    140       }
    141     } else if (ch == '<') {
    142       if (!GetNextChar(ch))
    143         return;
    144 
    145       if (ch == '<')
    146         m_WordBuffer[m_WordSize++] = ch;
    147       else
    148         m_Pos--;
    149     } else if (ch == '>') {
    150       if (!GetNextChar(ch))
    151         return;
    152 
    153       if (ch == '>')
    154         m_WordBuffer[m_WordSize++] = ch;
    155       else
    156         m_Pos--;
    157     }
    158     return;
    159   }
    160 
    161   while (1) {
    162     if (m_WordSize < sizeof(m_WordBuffer) - 1)
    163       m_WordBuffer[m_WordSize++] = ch;
    164 
    165     if (!PDFCharIsNumeric(ch)) {
    166       if (bIsNumber)
    167         *bIsNumber = false;
    168     }
    169 
    170     if (!GetNextChar(ch))
    171       return;
    172 
    173     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
    174       m_Pos--;
    175       break;
    176     }
    177   }
    178 }
    179 
    180 ByteString CPDF_SyntaxParser::ReadString() {
    181   uint8_t ch;
    182   if (!GetNextChar(ch))
    183     return ByteString();
    184 
    185   std::ostringstream buf;
    186   int32_t parlevel = 0;
    187   ReadStatus status = ReadStatus::Normal;
    188   int32_t iEscCode = 0;
    189   while (1) {
    190     switch (status) {
    191       case ReadStatus::Normal:
    192         if (ch == ')') {
    193           if (parlevel == 0)
    194             return ByteString(buf);
    195           parlevel--;
    196         } else if (ch == '(') {
    197           parlevel++;
    198         }
    199         if (ch == '\\')
    200           status = ReadStatus::Backslash;
    201         else
    202           buf << static_cast<char>(ch);
    203         break;
    204       case ReadStatus::Backslash:
    205         if (ch >= '0' && ch <= '7') {
    206           iEscCode = FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
    207           status = ReadStatus::Octal;
    208           break;
    209         }
    210 
    211         if (ch == '\r') {
    212           status = ReadStatus::CarriageReturn;
    213           break;
    214         }
    215         if (ch == 'n') {
    216           buf << '\n';
    217         } else if (ch == 'r') {
    218           buf << '\r';
    219         } else if (ch == 't') {
    220           buf << '\t';
    221         } else if (ch == 'b') {
    222           buf << '\b';
    223         } else if (ch == 'f') {
    224           buf << '\f';
    225         } else if (ch != '\n') {
    226           buf << static_cast<char>(ch);
    227         }
    228         status = ReadStatus::Normal;
    229         break;
    230       case ReadStatus::Octal:
    231         if (ch >= '0' && ch <= '7') {
    232           iEscCode =
    233               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
    234           status = ReadStatus::FinishOctal;
    235         } else {
    236           buf << static_cast<char>(iEscCode);
    237           status = ReadStatus::Normal;
    238           continue;
    239         }
    240         break;
    241       case ReadStatus::FinishOctal:
    242         status = ReadStatus::Normal;
    243         if (ch >= '0' && ch <= '7') {
    244           iEscCode =
    245               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<wchar_t>(ch));
    246           buf << static_cast<char>(iEscCode);
    247         } else {
    248           buf << static_cast<char>(iEscCode);
    249           continue;
    250         }
    251         break;
    252       case ReadStatus::CarriageReturn:
    253         status = ReadStatus::Normal;
    254         if (ch != '\n')
    255           continue;
    256         break;
    257     }
    258 
    259     if (!GetNextChar(ch))
    260       break;
    261   }
    262 
    263   GetNextChar(ch);
    264   return ByteString(buf);
    265 }
    266 
    267 ByteString CPDF_SyntaxParser::ReadHexString() {
    268   uint8_t ch;
    269   if (!GetNextChar(ch))
    270     return ByteString();
    271 
    272   std::ostringstream buf;
    273   bool bFirst = true;
    274   uint8_t code = 0;
    275   while (1) {
    276     if (ch == '>')
    277       break;
    278 
    279     if (std::isxdigit(ch)) {
    280       int val = FXSYS_HexCharToInt(ch);
    281       if (bFirst) {
    282         code = val * 16;
    283       } else {
    284         code += val;
    285         buf << static_cast<char>(code);
    286       }
    287       bFirst = !bFirst;
    288     }
    289 
    290     if (!GetNextChar(ch))
    291       break;
    292   }
    293   if (!bFirst)
    294     buf << static_cast<char>(code);
    295 
    296   return ByteString(buf);
    297 }
    298 
    299 void CPDF_SyntaxParser::ToNextLine() {
    300   uint8_t ch;
    301   while (GetNextChar(ch)) {
    302     if (ch == '\n')
    303       break;
    304 
    305     if (ch == '\r') {
    306       GetNextChar(ch);
    307       if (ch != '\n')
    308         --m_Pos;
    309       break;
    310     }
    311   }
    312 }
    313 
    314 void CPDF_SyntaxParser::ToNextWord() {
    315   uint8_t ch;
    316   if (!GetNextChar(ch))
    317     return;
    318 
    319   while (1) {
    320     while (PDFCharIsWhitespace(ch)) {
    321       if (!GetNextChar(ch))
    322         return;
    323     }
    324 
    325     if (ch != '%')
    326       break;
    327 
    328     while (1) {
    329       if (!GetNextChar(ch))
    330         return;
    331       if (PDFCharIsLineEnding(ch))
    332         break;
    333     }
    334   }
    335   m_Pos--;
    336 }
    337 
    338 ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
    339   const CPDF_ReadValidator::Session read_session(GetValidator().Get());
    340   GetNextWordInternal(bIsNumber);
    341   ByteString ret;
    342   if (!GetValidator()->has_read_problems())
    343     ret = ByteString(m_WordBuffer, m_WordSize);
    344   return ret;
    345 }
    346 
    347 ByteString CPDF_SyntaxParser::PeekNextWord(bool* bIsNumber) {
    348   AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
    349   return GetNextWord(bIsNumber);
    350 }
    351 
    352 ByteString CPDF_SyntaxParser::GetKeyword() {
    353   return GetNextWord(nullptr);
    354 }
    355 
    356 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObjectBody(
    357     CPDF_IndirectObjectHolder* pObjList) {
    358   const CPDF_ReadValidator::Session read_session(GetValidator().Get());
    359   auto result = GetObjectBodyInternal(pObjList, ParseType::kLoose);
    360   if (GetValidator()->has_read_problems())
    361     return nullptr;
    362   return result;
    363 }
    364 
    365 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObjectBodyInternal(
    366     CPDF_IndirectObjectHolder* pObjList,
    367     ParseType parse_type) {
    368   AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
    369   if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
    370     return nullptr;
    371 
    372   FX_FILESIZE SavedObjPos = m_Pos;
    373   bool bIsNumber;
    374   ByteString word = GetNextWord(&bIsNumber);
    375   if (word.GetLength() == 0)
    376     return nullptr;
    377 
    378   if (bIsNumber) {
    379     FX_FILESIZE SavedPos = m_Pos;
    380     ByteString nextword = GetNextWord(&bIsNumber);
    381     if (bIsNumber) {
    382       ByteString nextword2 = GetNextWord(nullptr);
    383       if (nextword2 == "R") {
    384         uint32_t refnum = FXSYS_atoui(word.c_str());
    385         if (refnum == CPDF_Object::kInvalidObjNum)
    386           return nullptr;
    387         return pdfium::MakeUnique<CPDF_Reference>(pObjList, refnum);
    388       }
    389     }
    390     m_Pos = SavedPos;
    391     return pdfium::MakeUnique<CPDF_Number>(word.AsStringView());
    392   }
    393 
    394   if (word == "true" || word == "false")
    395     return pdfium::MakeUnique<CPDF_Boolean>(word == "true");
    396 
    397   if (word == "null")
    398     return pdfium::MakeUnique<CPDF_Null>();
    399 
    400   if (word == "(") {
    401     ByteString str = ReadString();
    402     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
    403   }
    404   if (word == "<") {
    405     ByteString str = ReadHexString();
    406     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, true);
    407   }
    408   if (word == "[") {
    409     auto pArray = pdfium::MakeUnique<CPDF_Array>();
    410     while (std::unique_ptr<CPDF_Object> pObj =
    411                GetObjectBodyInternal(pObjList, ParseType::kLoose)) {
    412       pArray->Add(std::move(pObj));
    413     }
    414     return (parse_type == ParseType::kLoose || m_WordBuffer[0] == ']')
    415                ? std::move(pArray)
    416                : nullptr;
    417   }
    418   if (word[0] == '/') {
    419     return pdfium::MakeUnique<CPDF_Name>(
    420         m_pPool,
    421         PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1)));
    422   }
    423   if (word == "<<") {
    424     std::unique_ptr<CPDF_Dictionary> pDict =
    425         pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
    426     while (1) {
    427       ByteString key = GetNextWord(nullptr);
    428       if (key.IsEmpty())
    429         return nullptr;
    430 
    431       FX_FILESIZE SavedPos = m_Pos - key.GetLength();
    432       if (key == ">>")
    433         break;
    434 
    435       if (key == "endobj") {
    436         m_Pos = SavedPos;
    437         break;
    438       }
    439       if (key[0] != '/')
    440         continue;
    441 
    442       key = PDF_NameDecode(key);
    443 
    444       if (key.IsEmpty() && parse_type == ParseType::kLoose)
    445         continue;
    446 
    447       std::unique_ptr<CPDF_Object> pObj =
    448           GetObjectBodyInternal(pObjList, ParseType::kLoose);
    449       if (!pObj) {
    450         if (parse_type == ParseType::kLoose)
    451           continue;
    452 
    453         ToNextLine();
    454         return nullptr;
    455       }
    456 
    457       if (!key.IsEmpty()) {
    458         ByteString keyNoSlash(key.raw_str() + 1, key.GetLength() - 1);
    459         pDict->SetFor(keyNoSlash, std::move(pObj));
    460       }
    461     }
    462 
    463     FX_FILESIZE SavedPos = m_Pos;
    464     ByteString nextword = GetNextWord(nullptr);
    465     if (nextword != "stream") {
    466       m_Pos = SavedPos;
    467       return std::move(pDict);
    468     }
    469     return ReadStream(std::move(pDict));
    470   }
    471   if (word == ">>")
    472     m_Pos = SavedObjPos;
    473 
    474   return nullptr;
    475 }
    476 
    477 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetIndirectObject(
    478     CPDF_IndirectObjectHolder* pObjList,
    479     ParseType parse_type) {
    480   const CPDF_ReadValidator::Session read_session(GetValidator().Get());
    481   const FX_FILESIZE saved_pos = GetPos();
    482   bool is_number = false;
    483   ByteString word = GetNextWord(&is_number);
    484   if (!is_number || word.IsEmpty()) {
    485     SetPos(saved_pos);
    486     return nullptr;
    487   }
    488   const uint32_t parser_objnum = FXSYS_atoui(word.c_str());
    489 
    490   word = GetNextWord(&is_number);
    491   if (!is_number || word.IsEmpty()) {
    492     SetPos(saved_pos);
    493     return nullptr;
    494   }
    495   const uint32_t parser_gennum = FXSYS_atoui(word.c_str());
    496 
    497   if (GetKeyword() != "obj") {
    498     SetPos(saved_pos);
    499     return nullptr;
    500   }
    501 
    502   std::unique_ptr<CPDF_Object> pObj =
    503       GetObjectBodyInternal(pObjList, parse_type);
    504   if (pObj) {
    505     pObj->SetObjNum(parser_objnum);
    506     pObj->SetGenNum(parser_gennum);
    507   }
    508 
    509   return GetValidator()->has_read_problems() ? nullptr : std::move(pObj);
    510 }
    511 
    512 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
    513   unsigned char byte1 = 0;
    514   unsigned char byte2 = 0;
    515 
    516   GetCharAt(pos, byte1);
    517   GetCharAt(pos + 1, byte2);
    518 
    519   if (byte1 == '\r' && byte2 == '\n')
    520     return 2;
    521 
    522   if (byte1 == '\r' || byte1 == '\n')
    523     return 1;
    524 
    525   return 0;
    526 }
    527 
    528 std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream(
    529     std::unique_ptr<CPDF_Dictionary> pDict) {
    530   const CPDF_Number* pLenObj = ToNumber(pDict->GetDirectObjectFor("Length"));
    531   FX_FILESIZE len = pLenObj ? pLenObj->GetInteger() : -1;
    532 
    533   // Locate the start of stream.
    534   ToNextLine();
    535   FX_FILESIZE streamStartPos = m_Pos;
    536 
    537   const ByteStringView kEndStreamStr("endstream");
    538   const ByteStringView kEndObjStr("endobj");
    539 
    540     bool bSearchForKeyword = true;
    541     if (len >= 0) {
    542       pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
    543       pos += len;
    544       if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)
    545         m_Pos = pos.ValueOrDie();
    546 
    547       m_Pos += ReadEOLMarkers(m_Pos);
    548       memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
    549       GetNextWordInternal(nullptr);
    550       // Earlier version of PDF specification doesn't require EOL marker before
    551       // 'endstream' keyword. If keyword 'endstream' follows the bytes in
    552       // specified length, it signals the end of stream.
    553       if (memcmp(m_WordBuffer, kEndStreamStr.raw_str(),
    554                  kEndStreamStr.GetLength()) == 0) {
    555         bSearchForKeyword = false;
    556       }
    557     }
    558 
    559     if (bSearchForKeyword) {
    560       // If len is not available, len needs to be calculated
    561       // by searching the keywords "endstream" or "endobj".
    562       m_Pos = streamStartPos;
    563       FX_FILESIZE endStreamOffset = 0;
    564       while (endStreamOffset >= 0) {
    565         endStreamOffset = FindTag(kEndStreamStr, 0);
    566 
    567         // Can't find "endstream".
    568         if (endStreamOffset < 0)
    569           break;
    570 
    571         // Stop searching when "endstream" is found.
    572         if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
    573                         kEndStreamStr, true)) {
    574           endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
    575           break;
    576         }
    577       }
    578 
    579       m_Pos = streamStartPos;
    580       FX_FILESIZE endObjOffset = 0;
    581       while (endObjOffset >= 0) {
    582         endObjOffset = FindTag(kEndObjStr, 0);
    583 
    584         // Can't find "endobj".
    585         if (endObjOffset < 0)
    586           break;
    587 
    588         // Stop searching when "endobj" is found.
    589         if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
    590                         true)) {
    591           endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
    592           break;
    593         }
    594       }
    595 
    596       // Can't find "endstream" or "endobj".
    597       if (endStreamOffset < 0 && endObjOffset < 0)
    598         return nullptr;
    599 
    600       if (endStreamOffset < 0 && endObjOffset >= 0) {
    601         // Correct the position of end stream.
    602         endStreamOffset = endObjOffset;
    603       } else if (endStreamOffset >= 0 && endObjOffset < 0) {
    604         // Correct the position of end obj.
    605         endObjOffset = endStreamOffset;
    606       } else if (endStreamOffset > endObjOffset) {
    607         endStreamOffset = endObjOffset;
    608       }
    609       len = endStreamOffset;
    610 
    611       int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
    612       if (numMarkers == 2) {
    613         len -= 2;
    614       } else {
    615         numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
    616         if (numMarkers == 1) {
    617           len -= 1;
    618         }
    619       }
    620       if (len < 0)
    621         return nullptr;
    622 
    623       pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(len));
    624     }
    625     m_Pos = streamStartPos;
    626 
    627   // Read up to the end of the buffer. Note, we allow zero length streams as
    628   // we need to pass them through when we are importing pages into a new
    629   // document.
    630   len = std::min(len, m_FileLen - m_Pos - m_HeaderOffset);
    631   if (len < 0)
    632     return nullptr;
    633 
    634   std::unique_ptr<uint8_t, FxFreeDeleter> pData;
    635   if (len > 0) {
    636     pData.reset(FX_Alloc(uint8_t, len));
    637     ReadBlock(pData.get(), len);
    638   }
    639   auto pStream =
    640       pdfium::MakeUnique<CPDF_Stream>(std::move(pData), len, std::move(pDict));
    641   streamStartPos = m_Pos;
    642   memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
    643   GetNextWordInternal(nullptr);
    644 
    645   int numMarkers = ReadEOLMarkers(m_Pos);
    646   if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) &&
    647       numMarkers != 0 &&
    648       memcmp(m_WordBuffer, kEndObjStr.raw_str(), kEndObjStr.GetLength()) == 0) {
    649     m_Pos = streamStartPos;
    650   }
    651   return pStream;
    652 }
    653 
    654 void CPDF_SyntaxParser::InitParser(
    655     const RetainPtr<IFX_SeekableReadStream>& pFileAccess,
    656     uint32_t HeaderOffset) {
    657   ASSERT(pFileAccess);
    658   return InitParserWithValidator(
    659       pdfium::MakeRetain<CPDF_ReadValidator>(pFileAccess, nullptr),
    660       HeaderOffset);
    661 }
    662 
    663 void CPDF_SyntaxParser::InitParserWithValidator(
    664     const RetainPtr<CPDF_ReadValidator>& validator,
    665     uint32_t HeaderOffset) {
    666   ASSERT(validator);
    667   m_pFileBuf.clear();
    668   m_HeaderOffset = HeaderOffset;
    669   m_FileLen = validator->GetSize();
    670   m_Pos = 0;
    671   m_pFileAccess = validator;
    672   m_BufOffset = 0;
    673 }
    674 
    675 uint32_t CPDF_SyntaxParser::GetDirectNum() {
    676   bool bIsNumber;
    677   GetNextWordInternal(&bIsNumber);
    678   if (!bIsNumber)
    679     return 0;
    680 
    681   m_WordBuffer[m_WordSize] = 0;
    682   return FXSYS_atoui(reinterpret_cast<const char*>(m_WordBuffer));
    683 }
    684 
    685 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
    686                                     FX_FILESIZE limit,
    687                                     const ByteStringView& tag,
    688                                     bool checkKeyword) {
    689   const uint32_t taglen = tag.GetLength();
    690 
    691   bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
    692   bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
    693                      !PDFCharIsWhitespace(tag[taglen - 1]);
    694 
    695   uint8_t ch;
    696   if (bCheckRight && startpos + (int32_t)taglen <= limit &&
    697       GetCharAt(startpos + (int32_t)taglen, ch)) {
    698     if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
    699         (checkKeyword && PDFCharIsDelimiter(ch))) {
    700       return false;
    701     }
    702   }
    703 
    704   if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
    705     if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
    706         (checkKeyword && PDFCharIsDelimiter(ch))) {
    707       return false;
    708     }
    709   }
    710   return true;
    711 }
    712 
    713 bool CPDF_SyntaxParser::BackwardsSearchToWord(const ByteStringView& tag,
    714                                               FX_FILESIZE limit) {
    715   int32_t taglen = tag.GetLength();
    716   if (taglen == 0)
    717     return false;
    718 
    719   FX_FILESIZE pos = m_Pos;
    720   int32_t offset = taglen - 1;
    721   while (1) {
    722     if (limit && pos <= m_Pos - limit)
    723       return false;
    724 
    725     uint8_t byte;
    726     if (!GetCharAtBackward(pos, &byte))
    727       return false;
    728 
    729     if (byte == tag[offset]) {
    730       offset--;
    731       if (offset >= 0) {
    732         pos--;
    733         continue;
    734       }
    735       if (IsWholeWord(pos, limit, tag, false)) {
    736         m_Pos = pos;
    737         return true;
    738       }
    739     }
    740     offset = byte == tag[taglen - 1] ? taglen - 2 : taglen - 1;
    741     pos--;
    742     if (pos < 0)
    743       return false;
    744   }
    745 }
    746 
    747 FX_FILESIZE CPDF_SyntaxParser::FindTag(const ByteStringView& tag,
    748                                        FX_FILESIZE limit) {
    749   int32_t taglen = tag.GetLength();
    750   int32_t match = 0;
    751   limit += m_Pos;
    752   FX_FILESIZE startpos = m_Pos;
    753 
    754   while (1) {
    755     uint8_t ch;
    756     if (!GetNextChar(ch))
    757       return -1;
    758 
    759     if (ch == tag[match]) {
    760       match++;
    761       if (match == taglen)
    762         return m_Pos - startpos - taglen;
    763     } else {
    764       match = ch == tag[0] ? 1 : 0;
    765     }
    766 
    767     if (limit && m_Pos == limit)
    768       return -1;
    769   }
    770   return -1;
    771 }
    772 
    773 RetainPtr<IFX_SeekableReadStream> CPDF_SyntaxParser::GetFileAccess() const {
    774   return m_pFileAccess;
    775 }
    776 
    777 bool CPDF_SyntaxParser::IsPositionRead(FX_FILESIZE pos) const {
    778   return m_BufOffset <= pos &&
    779          pos < static_cast<FX_FILESIZE>(m_BufOffset + m_pFileBuf.size());
    780 }
    781