Home | History | Annotate | Download | only in parser
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/parser/cpdf_syntax_parser.h"
      8 
      9 #include <algorithm>
     10 #include <utility>
     11 #include <vector>
     12 
     13 #include "core/fpdfapi/cpdf_modulemgr.h"
     14 #include "core/fpdfapi/parser/cpdf_array.h"
     15 #include "core/fpdfapi/parser/cpdf_boolean.h"
     16 #include "core/fpdfapi/parser/cpdf_crypto_handler.h"
     17 #include "core/fpdfapi/parser/cpdf_dictionary.h"
     18 #include "core/fpdfapi/parser/cpdf_name.h"
     19 #include "core/fpdfapi/parser/cpdf_null.h"
     20 #include "core/fpdfapi/parser/cpdf_number.h"
     21 #include "core/fpdfapi/parser/cpdf_reference.h"
     22 #include "core/fpdfapi/parser/cpdf_stream.h"
     23 #include "core/fpdfapi/parser/cpdf_string.h"
     24 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
     25 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
     26 #include "core/fxcrt/fx_ext.h"
     27 #include "third_party/base/numerics/safe_math.h"
     28 #include "third_party/base/ptr_util.h"
     29 
     30 namespace {
     31 
     32 enum class ReadStatus { Normal, Backslash, Octal, FinishOctal, CarriageReturn };
     33 
     34 }  // namespace
     35 
     36 // static
     37 int CPDF_SyntaxParser::s_CurrentRecursionDepth = 0;
     38 
     39 CPDF_SyntaxParser::CPDF_SyntaxParser()
     40     : CPDF_SyntaxParser(CFX_WeakPtr<CFX_ByteStringPool>()) {}
     41 
     42 CPDF_SyntaxParser::CPDF_SyntaxParser(
     43     const CFX_WeakPtr<CFX_ByteStringPool>& pPool)
     44     : m_MetadataObjnum(0),
     45       m_pFileAccess(nullptr),
     46       m_pFileBuf(nullptr),
     47       m_BufSize(CPDF_ModuleMgr::kFileBufSize),
     48       m_pPool(pPool) {}
     49 
     50 CPDF_SyntaxParser::~CPDF_SyntaxParser() {
     51   FX_Free(m_pFileBuf);
     52 }
     53 
     54 bool CPDF_SyntaxParser::GetCharAt(FX_FILESIZE pos, uint8_t& ch) {
     55   CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
     56   m_Pos = pos;
     57   return GetNextChar(ch);
     58 }
     59 
     60 bool CPDF_SyntaxParser::ReadChar(FX_FILESIZE read_pos, uint32_t read_size) {
     61   if (static_cast<FX_FILESIZE>(read_pos + read_size) > m_FileLen) {
     62     if (m_FileLen < static_cast<FX_FILESIZE>(read_size)) {
     63       read_pos = 0;
     64       read_size = static_cast<uint32_t>(m_FileLen);
     65     } else {
     66       read_pos = m_FileLen - read_size;
     67     }
     68   }
     69   if (!m_pFileAccess->ReadBlock(m_pFileBuf, read_pos, read_size))
     70     return false;
     71 
     72   m_BufOffset = read_pos;
     73   return true;
     74 }
     75 
     76 bool CPDF_SyntaxParser::GetNextChar(uint8_t& ch) {
     77   FX_FILESIZE pos = m_Pos + m_HeaderOffset;
     78   if (pos >= m_FileLen)
     79     return false;
     80 
     81   if (CheckPosition(pos)) {
     82     FX_FILESIZE read_pos = pos;
     83     uint32_t read_size = m_BufSize;
     84     read_size = std::min(read_size, static_cast<uint32_t>(m_FileLen));
     85     if (!ReadChar(read_pos, read_size))
     86       return false;
     87   }
     88   ch = m_pFileBuf[pos - m_BufOffset];
     89   m_Pos++;
     90   return true;
     91 }
     92 
     93 bool CPDF_SyntaxParser::GetCharAtBackward(FX_FILESIZE pos, uint8_t& ch) {
     94   pos += m_HeaderOffset;
     95   if (pos >= m_FileLen)
     96     return false;
     97 
     98   if (CheckPosition(pos)) {
     99     FX_FILESIZE read_pos;
    100     if (pos < static_cast<FX_FILESIZE>(m_BufSize))
    101       read_pos = 0;
    102     else
    103       read_pos = pos - m_BufSize + 1;
    104     uint32_t read_size = m_BufSize;
    105     if (!ReadChar(read_pos, read_size))
    106       return false;
    107   }
    108   ch = m_pFileBuf[pos - m_BufOffset];
    109   return true;
    110 }
    111 
    112 bool CPDF_SyntaxParser::ReadBlock(uint8_t* pBuf, uint32_t size) {
    113   if (!m_pFileAccess->ReadBlock(pBuf, m_Pos + m_HeaderOffset, size))
    114     return false;
    115   m_Pos += size;
    116   return true;
    117 }
    118 
    119 void CPDF_SyntaxParser::GetNextWordInternal(bool* bIsNumber) {
    120   m_WordSize = 0;
    121   if (bIsNumber)
    122     *bIsNumber = true;
    123 
    124   uint8_t ch;
    125   if (!GetNextChar(ch))
    126     return;
    127 
    128   while (1) {
    129     while (PDFCharIsWhitespace(ch)) {
    130       if (!GetNextChar(ch))
    131         return;
    132     }
    133 
    134     if (ch != '%')
    135       break;
    136 
    137     while (1) {
    138       if (!GetNextChar(ch))
    139         return;
    140       if (PDFCharIsLineEnding(ch))
    141         break;
    142     }
    143   }
    144 
    145   if (PDFCharIsDelimiter(ch)) {
    146     if (bIsNumber)
    147       *bIsNumber = false;
    148 
    149     m_WordBuffer[m_WordSize++] = ch;
    150     if (ch == '/') {
    151       while (1) {
    152         if (!GetNextChar(ch))
    153           return;
    154 
    155         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
    156           m_Pos--;
    157           return;
    158         }
    159 
    160         if (m_WordSize < sizeof(m_WordBuffer) - 1)
    161           m_WordBuffer[m_WordSize++] = ch;
    162       }
    163     } else if (ch == '<') {
    164       if (!GetNextChar(ch))
    165         return;
    166 
    167       if (ch == '<')
    168         m_WordBuffer[m_WordSize++] = ch;
    169       else
    170         m_Pos--;
    171     } else if (ch == '>') {
    172       if (!GetNextChar(ch))
    173         return;
    174 
    175       if (ch == '>')
    176         m_WordBuffer[m_WordSize++] = ch;
    177       else
    178         m_Pos--;
    179     }
    180     return;
    181   }
    182 
    183   while (1) {
    184     if (m_WordSize < sizeof(m_WordBuffer) - 1)
    185       m_WordBuffer[m_WordSize++] = ch;
    186 
    187     if (!PDFCharIsNumeric(ch)) {
    188       if (bIsNumber)
    189         *bIsNumber = false;
    190     }
    191 
    192     if (!GetNextChar(ch))
    193       return;
    194 
    195     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
    196       m_Pos--;
    197       break;
    198     }
    199   }
    200 }
    201 
    202 CFX_ByteString CPDF_SyntaxParser::ReadString() {
    203   uint8_t ch;
    204   if (!GetNextChar(ch))
    205     return CFX_ByteString();
    206 
    207   CFX_ByteTextBuf buf;
    208   int32_t parlevel = 0;
    209   ReadStatus status = ReadStatus::Normal;
    210   int32_t iEscCode = 0;
    211   while (1) {
    212     switch (status) {
    213       case ReadStatus::Normal:
    214         if (ch == ')') {
    215           if (parlevel == 0)
    216             return buf.MakeString();
    217           parlevel--;
    218         } else if (ch == '(') {
    219           parlevel++;
    220         }
    221         if (ch == '\\')
    222           status = ReadStatus::Backslash;
    223         else
    224           buf.AppendChar(ch);
    225         break;
    226       case ReadStatus::Backslash:
    227         if (ch >= '0' && ch <= '7') {
    228           iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
    229           status = ReadStatus::Octal;
    230           break;
    231         }
    232 
    233         if (ch == 'n') {
    234           buf.AppendChar('\n');
    235         } else if (ch == 'r') {
    236           buf.AppendChar('\r');
    237         } else if (ch == 't') {
    238           buf.AppendChar('\t');
    239         } else if (ch == 'b') {
    240           buf.AppendChar('\b');
    241         } else if (ch == 'f') {
    242           buf.AppendChar('\f');
    243         } else if (ch == '\r') {
    244           status = ReadStatus::CarriageReturn;
    245           break;
    246         } else if (ch != '\n') {
    247           buf.AppendChar(ch);
    248         }
    249         status = ReadStatus::Normal;
    250         break;
    251       case ReadStatus::Octal:
    252         if (ch >= '0' && ch <= '7') {
    253           iEscCode =
    254               iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
    255           status = ReadStatus::FinishOctal;
    256         } else {
    257           buf.AppendChar(iEscCode);
    258           status = ReadStatus::Normal;
    259           continue;
    260         }
    261         break;
    262       case ReadStatus::FinishOctal:
    263         status = ReadStatus::Normal;
    264         if (ch >= '0' && ch <= '7') {
    265           iEscCode =
    266               iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
    267           buf.AppendChar(iEscCode);
    268         } else {
    269           buf.AppendChar(iEscCode);
    270           continue;
    271         }
    272         break;
    273       case ReadStatus::CarriageReturn:
    274         status = ReadStatus::Normal;
    275         if (ch != '\n')
    276           continue;
    277         break;
    278     }
    279 
    280     if (!GetNextChar(ch))
    281       break;
    282   }
    283 
    284   GetNextChar(ch);
    285   return buf.MakeString();
    286 }
    287 
    288 CFX_ByteString CPDF_SyntaxParser::ReadHexString() {
    289   uint8_t ch;
    290   if (!GetNextChar(ch))
    291     return CFX_ByteString();
    292 
    293   CFX_ByteTextBuf buf;
    294   bool bFirst = true;
    295   uint8_t code = 0;
    296   while (1) {
    297     if (ch == '>')
    298       break;
    299 
    300     if (std::isxdigit(ch)) {
    301       int val = FXSYS_toHexDigit(ch);
    302       if (bFirst) {
    303         code = val * 16;
    304       } else {
    305         code += val;
    306         buf.AppendByte(code);
    307       }
    308       bFirst = !bFirst;
    309     }
    310 
    311     if (!GetNextChar(ch))
    312       break;
    313   }
    314   if (!bFirst)
    315     buf.AppendByte(code);
    316 
    317   return buf.MakeString();
    318 }
    319 
    320 void CPDF_SyntaxParser::ToNextLine() {
    321   uint8_t ch;
    322   while (GetNextChar(ch)) {
    323     if (ch == '\n')
    324       break;
    325 
    326     if (ch == '\r') {
    327       GetNextChar(ch);
    328       if (ch != '\n')
    329         --m_Pos;
    330       break;
    331     }
    332   }
    333 }
    334 
    335 void CPDF_SyntaxParser::ToNextWord() {
    336   uint8_t ch;
    337   if (!GetNextChar(ch))
    338     return;
    339 
    340   while (1) {
    341     while (PDFCharIsWhitespace(ch)) {
    342       if (!GetNextChar(ch))
    343         return;
    344     }
    345 
    346     if (ch != '%')
    347       break;
    348 
    349     while (1) {
    350       if (!GetNextChar(ch))
    351         return;
    352       if (PDFCharIsLineEnding(ch))
    353         break;
    354     }
    355   }
    356   m_Pos--;
    357 }
    358 
    359 CFX_ByteString CPDF_SyntaxParser::GetNextWord(bool* bIsNumber) {
    360   GetNextWordInternal(bIsNumber);
    361   return CFX_ByteString((const FX_CHAR*)m_WordBuffer, m_WordSize);
    362 }
    363 
    364 CFX_ByteString CPDF_SyntaxParser::GetKeyword() {
    365   return GetNextWord(nullptr);
    366 }
    367 
    368 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObject(
    369     CPDF_IndirectObjectHolder* pObjList,
    370     uint32_t objnum,
    371     uint32_t gennum,
    372     bool bDecrypt) {
    373   CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
    374   if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
    375     return nullptr;
    376 
    377   FX_FILESIZE SavedObjPos = m_Pos;
    378   bool bIsNumber;
    379   CFX_ByteString word = GetNextWord(&bIsNumber);
    380   if (word.GetLength() == 0)
    381     return nullptr;
    382 
    383   if (bIsNumber) {
    384     FX_FILESIZE SavedPos = m_Pos;
    385     CFX_ByteString nextword = GetNextWord(&bIsNumber);
    386     if (bIsNumber) {
    387       CFX_ByteString nextword2 = GetNextWord(nullptr);
    388       if (nextword2 == "R") {
    389         uint32_t objnum = FXSYS_atoui(word.c_str());
    390         if (objnum == CPDF_Object::kInvalidObjNum)
    391           return nullptr;
    392         return pdfium::MakeUnique<CPDF_Reference>(pObjList, objnum);
    393       }
    394     }
    395     m_Pos = SavedPos;
    396     return pdfium::MakeUnique<CPDF_Number>(word.AsStringC());
    397   }
    398 
    399   if (word == "true" || word == "false")
    400     return pdfium::MakeUnique<CPDF_Boolean>(word == "true");
    401 
    402   if (word == "null")
    403     return pdfium::MakeUnique<CPDF_Null>();
    404 
    405   if (word == "(") {
    406     CFX_ByteString str = ReadString();
    407     if (m_pCryptoHandler && bDecrypt)
    408       m_pCryptoHandler->Decrypt(objnum, gennum, str);
    409     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
    410   }
    411   if (word == "<") {
    412     CFX_ByteString str = ReadHexString();
    413     if (m_pCryptoHandler && bDecrypt)
    414       m_pCryptoHandler->Decrypt(objnum, gennum, str);
    415     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, true);
    416   }
    417   if (word == "[") {
    418     std::unique_ptr<CPDF_Array> pArray = pdfium::MakeUnique<CPDF_Array>();
    419     while (std::unique_ptr<CPDF_Object> pObj =
    420                GetObject(pObjList, objnum, gennum, true)) {
    421       pArray->Add(std::move(pObj));
    422     }
    423     return std::move(pArray);
    424   }
    425   if (word[0] == '/') {
    426     return pdfium::MakeUnique<CPDF_Name>(
    427         m_pPool,
    428         PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
    429   }
    430   if (word == "<<") {
    431     int32_t nKeys = 0;
    432     FX_FILESIZE dwSignValuePos = 0;
    433     std::unique_ptr<CPDF_Dictionary> pDict =
    434         pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
    435     while (1) {
    436       CFX_ByteString key = GetNextWord(nullptr);
    437       if (key.IsEmpty())
    438         return nullptr;
    439 
    440       FX_FILESIZE SavedPos = m_Pos - key.GetLength();
    441       if (key == ">>")
    442         break;
    443 
    444       if (key == "endobj") {
    445         m_Pos = SavedPos;
    446         break;
    447       }
    448       if (key[0] != '/')
    449         continue;
    450 
    451       ++nKeys;
    452       key = PDF_NameDecode(key);
    453       if (key.IsEmpty())
    454         continue;
    455 
    456       if (key == "/Contents")
    457         dwSignValuePos = m_Pos;
    458 
    459       std::unique_ptr<CPDF_Object> pObj =
    460           GetObject(pObjList, objnum, gennum, true);
    461       if (!pObj)
    462         continue;
    463 
    464       CFX_ByteString keyNoSlash(key.raw_str() + 1, key.GetLength() - 1);
    465       pDict->SetFor(keyNoSlash, std::move(pObj));
    466     }
    467 
    468     // Only when this is a signature dictionary and has contents, we reset the
    469     // contents to the un-decrypted form.
    470     if (pDict->IsSignatureDict() && dwSignValuePos) {
    471       CFX_AutoRestorer<FX_FILESIZE> save_pos(&m_Pos);
    472       m_Pos = dwSignValuePos;
    473       pDict->SetFor("Contents", GetObject(pObjList, objnum, gennum, false));
    474     }
    475 
    476     FX_FILESIZE SavedPos = m_Pos;
    477     CFX_ByteString nextword = GetNextWord(nullptr);
    478     if (nextword != "stream") {
    479       m_Pos = SavedPos;
    480       return std::move(pDict);
    481     }
    482     return ReadStream(std::move(pDict), objnum, gennum);
    483   }
    484   if (word == ">>")
    485     m_Pos = SavedObjPos;
    486 
    487   return nullptr;
    488 }
    489 
    490 std::unique_ptr<CPDF_Object> CPDF_SyntaxParser::GetObjectForStrict(
    491     CPDF_IndirectObjectHolder* pObjList,
    492     uint32_t objnum,
    493     uint32_t gennum) {
    494   CFX_AutoRestorer<int> restorer(&s_CurrentRecursionDepth);
    495   if (++s_CurrentRecursionDepth > kParserMaxRecursionDepth)
    496     return nullptr;
    497 
    498   FX_FILESIZE SavedObjPos = m_Pos;
    499   bool bIsNumber;
    500   CFX_ByteString word = GetNextWord(&bIsNumber);
    501   if (word.GetLength() == 0)
    502     return nullptr;
    503 
    504   if (bIsNumber) {
    505     FX_FILESIZE SavedPos = m_Pos;
    506     CFX_ByteString nextword = GetNextWord(&bIsNumber);
    507     if (bIsNumber) {
    508       CFX_ByteString nextword2 = GetNextWord(nullptr);
    509       if (nextword2 == "R") {
    510         uint32_t objnum = FXSYS_atoui(word.c_str());
    511         if (objnum == CPDF_Object::kInvalidObjNum)
    512           return nullptr;
    513         return pdfium::MakeUnique<CPDF_Reference>(pObjList, objnum);
    514       }
    515     }
    516     m_Pos = SavedPos;
    517     return pdfium::MakeUnique<CPDF_Number>(word.AsStringC());
    518   }
    519 
    520   if (word == "true" || word == "false")
    521     return pdfium::MakeUnique<CPDF_Boolean>(word == "true");
    522 
    523   if (word == "null")
    524     return pdfium::MakeUnique<CPDF_Null>();
    525 
    526   if (word == "(") {
    527     CFX_ByteString str = ReadString();
    528     if (m_pCryptoHandler)
    529       m_pCryptoHandler->Decrypt(objnum, gennum, str);
    530     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
    531   }
    532   if (word == "<") {
    533     CFX_ByteString str = ReadHexString();
    534     if (m_pCryptoHandler)
    535       m_pCryptoHandler->Decrypt(objnum, gennum, str);
    536     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, true);
    537   }
    538   if (word == "[") {
    539     std::unique_ptr<CPDF_Array> pArray = pdfium::MakeUnique<CPDF_Array>();
    540     while (std::unique_ptr<CPDF_Object> pObj =
    541                GetObject(pObjList, objnum, gennum, true)) {
    542       pArray->Add(std::move(pObj));
    543     }
    544     return m_WordBuffer[0] == ']' ? std::move(pArray) : nullptr;
    545   }
    546   if (word[0] == '/') {
    547     return pdfium::MakeUnique<CPDF_Name>(
    548         m_pPool,
    549         PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
    550   }
    551   if (word == "<<") {
    552     std::unique_ptr<CPDF_Dictionary> pDict =
    553         pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
    554     while (1) {
    555       FX_FILESIZE SavedPos = m_Pos;
    556       CFX_ByteString key = GetNextWord(nullptr);
    557       if (key.IsEmpty())
    558         return nullptr;
    559 
    560       if (key == ">>")
    561         break;
    562 
    563       if (key == "endobj") {
    564         m_Pos = SavedPos;
    565         break;
    566       }
    567       if (key[0] != '/')
    568         continue;
    569 
    570       key = PDF_NameDecode(key);
    571       std::unique_ptr<CPDF_Object> obj(
    572           GetObject(pObjList, objnum, gennum, true));
    573       if (!obj) {
    574         uint8_t ch;
    575         while (GetNextChar(ch) && ch != 0x0A && ch != 0x0D) {
    576           continue;
    577         }
    578         return nullptr;
    579       }
    580 
    581       if (key.GetLength() > 1) {
    582         pDict->SetFor(CFX_ByteString(key.c_str() + 1, key.GetLength() - 1),
    583                       std::move(obj));
    584       }
    585     }
    586 
    587     FX_FILESIZE SavedPos = m_Pos;
    588     CFX_ByteString nextword = GetNextWord(nullptr);
    589     if (nextword != "stream") {
    590       m_Pos = SavedPos;
    591       return std::move(pDict);
    592     }
    593     return ReadStream(std::move(pDict), objnum, gennum);
    594   }
    595   if (word == ">>")
    596     m_Pos = SavedObjPos;
    597 
    598   return nullptr;
    599 }
    600 
    601 unsigned int CPDF_SyntaxParser::ReadEOLMarkers(FX_FILESIZE pos) {
    602   unsigned char byte1 = 0;
    603   unsigned char byte2 = 0;
    604 
    605   GetCharAt(pos, byte1);
    606   GetCharAt(pos + 1, byte2);
    607 
    608   if (byte1 == '\r' && byte2 == '\n')
    609     return 2;
    610 
    611   if (byte1 == '\r' || byte1 == '\n')
    612     return 1;
    613 
    614   return 0;
    615 }
    616 
    617 std::unique_ptr<CPDF_Stream> CPDF_SyntaxParser::ReadStream(
    618     std::unique_ptr<CPDF_Dictionary> pDict,
    619     uint32_t objnum,
    620     uint32_t gennum) {
    621   CPDF_Object* pLenObj = pDict->GetObjectFor("Length");
    622   FX_FILESIZE len = -1;
    623   CPDF_Reference* pLenObjRef = ToReference(pLenObj);
    624 
    625   bool differingObjNum = !pLenObjRef || (pLenObjRef->GetObjList() &&
    626                                          pLenObjRef->GetRefObjNum() != objnum);
    627   if (pLenObj && differingObjNum)
    628     len = pLenObj->GetInteger();
    629 
    630   // Locate the start of stream.
    631   ToNextLine();
    632   FX_FILESIZE streamStartPos = m_Pos;
    633 
    634   const CFX_ByteStringC kEndStreamStr("endstream");
    635   const CFX_ByteStringC kEndObjStr("endobj");
    636 
    637   CPDF_CryptoHandler* pCryptoHandler =
    638       objnum == m_MetadataObjnum ? nullptr : m_pCryptoHandler.get();
    639   if (!pCryptoHandler) {
    640     bool bSearchForKeyword = true;
    641     if (len >= 0) {
    642       pdfium::base::CheckedNumeric<FX_FILESIZE> pos = m_Pos;
    643       pos += len;
    644       if (pos.IsValid() && pos.ValueOrDie() < m_FileLen)
    645         m_Pos = pos.ValueOrDie();
    646 
    647       m_Pos += ReadEOLMarkers(m_Pos);
    648       FXSYS_memset(m_WordBuffer, 0, kEndStreamStr.GetLength() + 1);
    649       GetNextWordInternal(nullptr);
    650       // Earlier version of PDF specification doesn't require EOL marker before
    651       // 'endstream' keyword. If keyword 'endstream' follows the bytes in
    652       // specified length, it signals the end of stream.
    653       if (FXSYS_memcmp(m_WordBuffer, kEndStreamStr.raw_str(),
    654                        kEndStreamStr.GetLength()) == 0) {
    655         bSearchForKeyword = false;
    656       }
    657     }
    658 
    659     if (bSearchForKeyword) {
    660       // If len is not available, len needs to be calculated
    661       // by searching the keywords "endstream" or "endobj".
    662       m_Pos = streamStartPos;
    663       FX_FILESIZE endStreamOffset = 0;
    664       while (endStreamOffset >= 0) {
    665         endStreamOffset = FindTag(kEndStreamStr, 0);
    666 
    667         // Can't find "endstream".
    668         if (endStreamOffset < 0)
    669           break;
    670 
    671         // Stop searching when "endstream" is found.
    672         if (IsWholeWord(m_Pos - kEndStreamStr.GetLength(), m_FileLen,
    673                         kEndStreamStr, true)) {
    674           endStreamOffset = m_Pos - streamStartPos - kEndStreamStr.GetLength();
    675           break;
    676         }
    677       }
    678 
    679       m_Pos = streamStartPos;
    680       FX_FILESIZE endObjOffset = 0;
    681       while (endObjOffset >= 0) {
    682         endObjOffset = FindTag(kEndObjStr, 0);
    683 
    684         // Can't find "endobj".
    685         if (endObjOffset < 0)
    686           break;
    687 
    688         // Stop searching when "endobj" is found.
    689         if (IsWholeWord(m_Pos - kEndObjStr.GetLength(), m_FileLen, kEndObjStr,
    690                         true)) {
    691           endObjOffset = m_Pos - streamStartPos - kEndObjStr.GetLength();
    692           break;
    693         }
    694       }
    695 
    696       // Can't find "endstream" or "endobj".
    697       if (endStreamOffset < 0 && endObjOffset < 0)
    698         return nullptr;
    699 
    700       if (endStreamOffset < 0 && endObjOffset >= 0) {
    701         // Correct the position of end stream.
    702         endStreamOffset = endObjOffset;
    703       } else if (endStreamOffset >= 0 && endObjOffset < 0) {
    704         // Correct the position of end obj.
    705         endObjOffset = endStreamOffset;
    706       } else if (endStreamOffset > endObjOffset) {
    707         endStreamOffset = endObjOffset;
    708       }
    709       len = endStreamOffset;
    710 
    711       int numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 2);
    712       if (numMarkers == 2) {
    713         len -= 2;
    714       } else {
    715         numMarkers = ReadEOLMarkers(streamStartPos + endStreamOffset - 1);
    716         if (numMarkers == 1) {
    717           len -= 1;
    718         }
    719       }
    720       if (len < 0)
    721         return nullptr;
    722 
    723       pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(len));
    724     }
    725     m_Pos = streamStartPos;
    726   }
    727   if (len < 0)
    728     return nullptr;
    729 
    730   std::unique_ptr<uint8_t, FxFreeDeleter> pData;
    731   if (len > 0) {
    732     pData.reset(FX_Alloc(uint8_t, len));
    733     ReadBlock(pData.get(), len);
    734     if (pCryptoHandler) {
    735       CFX_BinaryBuf dest_buf;
    736       dest_buf.EstimateSize(pCryptoHandler->DecryptGetSize(len));
    737 
    738       void* context = pCryptoHandler->DecryptStart(objnum, gennum);
    739       pCryptoHandler->DecryptStream(context, pData.get(), len, dest_buf);
    740       pCryptoHandler->DecryptFinish(context, dest_buf);
    741       len = dest_buf.GetSize();
    742       pData = dest_buf.DetachBuffer();
    743     }
    744   }
    745 
    746   auto pStream =
    747       pdfium::MakeUnique<CPDF_Stream>(std::move(pData), len, std::move(pDict));
    748   streamStartPos = m_Pos;
    749   FXSYS_memset(m_WordBuffer, 0, kEndObjStr.GetLength() + 1);
    750   GetNextWordInternal(nullptr);
    751 
    752   int numMarkers = ReadEOLMarkers(m_Pos);
    753   if (m_WordSize == static_cast<unsigned int>(kEndObjStr.GetLength()) &&
    754       numMarkers != 0 &&
    755       FXSYS_memcmp(m_WordBuffer, kEndObjStr.raw_str(),
    756                    kEndObjStr.GetLength()) == 0) {
    757     m_Pos = streamStartPos;
    758   }
    759   return pStream;
    760 }
    761 
    762 void CPDF_SyntaxParser::InitParser(
    763     const CFX_RetainPtr<IFX_SeekableReadStream>& pFileAccess,
    764     uint32_t HeaderOffset) {
    765   FX_Free(m_pFileBuf);
    766 
    767   m_pFileBuf = FX_Alloc(uint8_t, m_BufSize);
    768   m_HeaderOffset = HeaderOffset;
    769   m_FileLen = pFileAccess->GetSize();
    770   m_Pos = 0;
    771   m_pFileAccess = pFileAccess;
    772   m_BufOffset = 0;
    773   pFileAccess->ReadBlock(m_pFileBuf, 0,
    774                          std::min(m_BufSize, static_cast<uint32_t>(m_FileLen)));
    775 }
    776 
    777 uint32_t CPDF_SyntaxParser::GetDirectNum() {
    778   bool bIsNumber;
    779   GetNextWordInternal(&bIsNumber);
    780   if (!bIsNumber)
    781     return 0;
    782 
    783   m_WordBuffer[m_WordSize] = 0;
    784   return FXSYS_atoui(reinterpret_cast<const FX_CHAR*>(m_WordBuffer));
    785 }
    786 
    787 bool CPDF_SyntaxParser::IsWholeWord(FX_FILESIZE startpos,
    788                                     FX_FILESIZE limit,
    789                                     const CFX_ByteStringC& tag,
    790                                     bool checkKeyword) {
    791   const uint32_t taglen = tag.GetLength();
    792 
    793   bool bCheckLeft = !PDFCharIsDelimiter(tag[0]) && !PDFCharIsWhitespace(tag[0]);
    794   bool bCheckRight = !PDFCharIsDelimiter(tag[taglen - 1]) &&
    795                      !PDFCharIsWhitespace(tag[taglen - 1]);
    796 
    797   uint8_t ch;
    798   if (bCheckRight && startpos + (int32_t)taglen <= limit &&
    799       GetCharAt(startpos + (int32_t)taglen, ch)) {
    800     if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
    801         (checkKeyword && PDFCharIsDelimiter(ch))) {
    802       return false;
    803     }
    804   }
    805 
    806   if (bCheckLeft && startpos > 0 && GetCharAt(startpos - 1, ch)) {
    807     if (PDFCharIsNumeric(ch) || PDFCharIsOther(ch) ||
    808         (checkKeyword && PDFCharIsDelimiter(ch))) {
    809       return false;
    810     }
    811   }
    812   return true;
    813 }
    814 
    815 // TODO(dsinclair): Split into a SearchWordForward and SearchWordBackwards
    816 // and drop the bool.
    817 bool CPDF_SyntaxParser::SearchWord(const CFX_ByteStringC& tag,
    818                                    bool bWholeWord,
    819                                    bool bForward,
    820                                    FX_FILESIZE limit) {
    821   int32_t taglen = tag.GetLength();
    822   if (taglen == 0)
    823     return false;
    824 
    825   FX_FILESIZE pos = m_Pos;
    826   int32_t offset = 0;
    827   if (!bForward)
    828     offset = taglen - 1;
    829 
    830   const uint8_t* tag_data = tag.raw_str();
    831   uint8_t byte;
    832   while (1) {
    833     if (bForward) {
    834       if (limit && pos >= m_Pos + limit)
    835         return false;
    836 
    837       if (!GetCharAt(pos, byte))
    838         return false;
    839 
    840     } else {
    841       if (limit && pos <= m_Pos - limit)
    842         return false;
    843 
    844       if (!GetCharAtBackward(pos, byte))
    845         return false;
    846     }
    847 
    848     if (byte == tag_data[offset]) {
    849       if (bForward) {
    850         offset++;
    851         if (offset < taglen) {
    852           pos++;
    853           continue;
    854         }
    855       } else {
    856         offset--;
    857         if (offset >= 0) {
    858           pos--;
    859           continue;
    860         }
    861       }
    862 
    863       FX_FILESIZE startpos = bForward ? pos - taglen + 1 : pos;
    864       if (!bWholeWord || IsWholeWord(startpos, limit, tag, false)) {
    865         m_Pos = startpos;
    866         return true;
    867       }
    868     }
    869 
    870     if (bForward) {
    871       offset = byte == tag_data[0] ? 1 : 0;
    872       pos++;
    873     } else {
    874       offset = byte == tag_data[taglen - 1] ? taglen - 2 : taglen - 1;
    875       pos--;
    876     }
    877 
    878     if (pos < 0)
    879       return false;
    880   }
    881 
    882   return false;
    883 }
    884 
    885 FX_FILESIZE CPDF_SyntaxParser::FindTag(const CFX_ByteStringC& tag,
    886                                        FX_FILESIZE limit) {
    887   int32_t taglen = tag.GetLength();
    888   int32_t match = 0;
    889   limit += m_Pos;
    890   FX_FILESIZE startpos = m_Pos;
    891 
    892   while (1) {
    893     uint8_t ch;
    894     if (!GetNextChar(ch))
    895       return -1;
    896 
    897     if (ch == tag[match]) {
    898       match++;
    899       if (match == taglen)
    900         return m_Pos - startpos - taglen;
    901     } else {
    902       match = ch == tag[0] ? 1 : 0;
    903     }
    904 
    905     if (limit && m_Pos == limit)
    906       return -1;
    907   }
    908   return -1;
    909 }
    910 
    911 void CPDF_SyntaxParser::SetEncrypt(
    912     std::unique_ptr<CPDF_CryptoHandler> pCryptoHandler) {
    913   m_pCryptoHandler = std::move(pCryptoHandler);
    914 }
    915