Home | History | Annotate | Download | only in page
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/page/cpdf_streamparser.h"
      8 
      9 #include <limits.h>
     10 
     11 #include <memory>
     12 #include <utility>
     13 
     14 #include "core/fpdfapi/cpdf_modulemgr.h"
     15 #include "core/fpdfapi/page/cpdf_docpagedata.h"
     16 #include "core/fpdfapi/parser/cpdf_array.h"
     17 #include "core/fpdfapi/parser/cpdf_boolean.h"
     18 #include "core/fpdfapi/parser/cpdf_dictionary.h"
     19 #include "core/fpdfapi/parser/cpdf_document.h"
     20 #include "core/fpdfapi/parser/cpdf_name.h"
     21 #include "core/fpdfapi/parser/cpdf_null.h"
     22 #include "core/fpdfapi/parser/cpdf_number.h"
     23 #include "core/fpdfapi/parser/cpdf_stream.h"
     24 #include "core/fpdfapi/parser/cpdf_string.h"
     25 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
     26 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
     27 #include "core/fxcodec/fx_codec.h"
     28 #include "core/fxcrt/fx_ext.h"
     29 
     30 namespace {
     31 
     32 const uint32_t kMaxNestedParsingLevel = 512;
     33 const uint32_t kMaxWordBuffer = 256;
     34 const FX_STRSIZE kMaxStringLength = 32767;
     35 
     36 uint32_t DecodeAllScanlines(std::unique_ptr<CCodec_ScanlineDecoder> pDecoder,
     37                             uint8_t*& dest_buf,
     38                             uint32_t& dest_size) {
     39   if (!pDecoder)
     40     return FX_INVALID_OFFSET;
     41   int ncomps = pDecoder->CountComps();
     42   int bpc = pDecoder->GetBPC();
     43   int width = pDecoder->GetWidth();
     44   int height = pDecoder->GetHeight();
     45   int pitch = (width * ncomps * bpc + 7) / 8;
     46   if (height == 0 || pitch > (1 << 30) / height)
     47     return FX_INVALID_OFFSET;
     48 
     49   dest_buf = FX_Alloc2D(uint8_t, pitch, height);
     50   dest_size = pitch * height;  // Safe since checked alloc returned.
     51   for (int row = 0; row < height; row++) {
     52     const uint8_t* pLine = pDecoder->GetScanline(row);
     53     if (!pLine)
     54       break;
     55 
     56     FXSYS_memcpy(dest_buf + row * pitch, pLine, pitch);
     57   }
     58   return pDecoder->GetSrcOffset();
     59 }
     60 
     61 uint32_t PDF_DecodeInlineStream(const uint8_t* src_buf,
     62                                 uint32_t limit,
     63                                 int width,
     64                                 int height,
     65                                 CFX_ByteString& decoder,
     66                                 CPDF_Dictionary* pParam,
     67                                 uint8_t*& dest_buf,
     68                                 uint32_t& dest_size) {
     69   if (decoder == "CCITTFaxDecode" || decoder == "CCF") {
     70     std::unique_ptr<CCodec_ScanlineDecoder> pDecoder =
     71         FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam);
     72     return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size);
     73   }
     74   if (decoder == "ASCII85Decode" || decoder == "A85")
     75     return A85Decode(src_buf, limit, dest_buf, dest_size);
     76   if (decoder == "ASCIIHexDecode" || decoder == "AHx")
     77     return HexDecode(src_buf, limit, dest_buf, dest_size);
     78   if (decoder == "FlateDecode" || decoder == "Fl") {
     79     return FPDFAPI_FlateOrLZWDecode(false, src_buf, limit, pParam, dest_size,
     80                                     dest_buf, dest_size);
     81   }
     82   if (decoder == "LZWDecode" || decoder == "LZW") {
     83     return FPDFAPI_FlateOrLZWDecode(true, src_buf, limit, pParam, 0, dest_buf,
     84                                     dest_size);
     85   }
     86   if (decoder == "DCTDecode" || decoder == "DCT") {
     87     std::unique_ptr<CCodec_ScanlineDecoder> pDecoder =
     88         CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder(
     89             src_buf, limit, width, height, 0,
     90             !pParam || pParam->GetIntegerFor("ColorTransform", 1));
     91     return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size);
     92   }
     93   if (decoder == "RunLengthDecode" || decoder == "RL")
     94     return RunLengthDecode(src_buf, limit, dest_buf, dest_size);
     95   dest_size = 0;
     96   dest_buf = 0;
     97   return (uint32_t)-1;
     98 }
     99 
    100 }  // namespace
    101 
    102 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize)
    103     : m_pBuf(pData),
    104       m_Size(dwSize),
    105       m_Pos(0),
    106       m_pPool(nullptr) {}
    107 
    108 CPDF_StreamParser::CPDF_StreamParser(
    109     const uint8_t* pData,
    110     uint32_t dwSize,
    111     const CFX_WeakPtr<CFX_ByteStringPool>& pPool)
    112     : m_pBuf(pData),
    113       m_Size(dwSize),
    114       m_Pos(0),
    115       m_pPool(pPool) {}
    116 
    117 CPDF_StreamParser::~CPDF_StreamParser() {}
    118 
    119 std::unique_ptr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream(
    120     CPDF_Document* pDoc,
    121     std::unique_ptr<CPDF_Dictionary> pDict,
    122     CPDF_Object* pCSObj) {
    123   if (m_Pos == m_Size)
    124     return nullptr;
    125 
    126   if (PDFCharIsWhitespace(m_pBuf[m_Pos]))
    127     m_Pos++;
    128 
    129   CFX_ByteString Decoder;
    130   CPDF_Dictionary* pParam = nullptr;
    131   CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter");
    132   if (pFilter) {
    133     if (CPDF_Array* pArray = pFilter->AsArray()) {
    134       Decoder = pArray->GetStringAt(0);
    135       CPDF_Array* pParams = pDict->GetArrayFor("DecodeParms");
    136       if (pParams)
    137         pParam = pParams->GetDictAt(0);
    138     } else {
    139       Decoder = pFilter->GetString();
    140       pParam = pDict->GetDictFor("DecodeParms");
    141     }
    142   }
    143   uint32_t width = pDict->GetIntegerFor("Width");
    144   uint32_t height = pDict->GetIntegerFor("Height");
    145   uint32_t OrigSize = 0;
    146   if (pCSObj) {
    147     uint32_t bpc = pDict->GetIntegerFor("BitsPerComponent");
    148     uint32_t nComponents = 1;
    149     CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj);
    150     if (pCS) {
    151       nComponents = pCS->CountComponents();
    152       pDoc->GetPageData()->ReleaseColorSpace(pCSObj);
    153     } else {
    154       nComponents = 3;
    155     }
    156     uint32_t pitch = width;
    157     if (bpc && pitch > INT_MAX / bpc)
    158       return nullptr;
    159 
    160     pitch *= bpc;
    161     if (nComponents && pitch > INT_MAX / nComponents)
    162       return nullptr;
    163 
    164     pitch *= nComponents;
    165     if (pitch > INT_MAX - 7)
    166       return nullptr;
    167 
    168     pitch += 7;
    169     pitch /= 8;
    170     OrigSize = pitch;
    171   } else {
    172     if (width > INT_MAX - 7)
    173       return nullptr;
    174 
    175     OrigSize = ((width + 7) / 8);
    176   }
    177   if (height && OrigSize > INT_MAX / height)
    178     return nullptr;
    179 
    180   OrigSize *= height;
    181   std::unique_ptr<uint8_t, FxFreeDeleter> pData;
    182   uint32_t dwStreamSize;
    183   if (Decoder.IsEmpty()) {
    184     if (OrigSize > m_Size - m_Pos)
    185       OrigSize = m_Size - m_Pos;
    186     pData.reset(FX_Alloc(uint8_t, OrigSize));
    187     FXSYS_memcpy(pData.get(), m_pBuf + m_Pos, OrigSize);
    188     dwStreamSize = OrigSize;
    189     m_Pos += OrigSize;
    190   } else {
    191     uint8_t* pIgnore = nullptr;
    192     uint32_t dwDestSize = OrigSize;
    193     dwStreamSize =
    194         PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height,
    195                                Decoder, pParam, pIgnore, dwDestSize);
    196     FX_Free(pIgnore);
    197     if (static_cast<int>(dwStreamSize) < 0)
    198       return nullptr;
    199 
    200     uint32_t dwSavePos = m_Pos;
    201     m_Pos += dwStreamSize;
    202     while (1) {
    203       uint32_t dwPrevPos = m_Pos;
    204       CPDF_StreamParser::SyntaxType type = ParseNextElement();
    205       if (type == CPDF_StreamParser::EndOfData)
    206         break;
    207 
    208       if (type != CPDF_StreamParser::Keyword) {
    209         dwStreamSize += m_Pos - dwPrevPos;
    210         continue;
    211       }
    212       if (GetWord() == "EI") {
    213         m_Pos = dwPrevPos;
    214         break;
    215       }
    216       dwStreamSize += m_Pos - dwPrevPos;
    217     }
    218     m_Pos = dwSavePos;
    219     pData.reset(FX_Alloc(uint8_t, dwStreamSize));
    220     FXSYS_memcpy(pData.get(), m_pBuf + m_Pos, dwStreamSize);
    221     m_Pos += dwStreamSize;
    222   }
    223   pDict->SetNewFor<CPDF_Number>("Length", (int)dwStreamSize);
    224   return pdfium::MakeUnique<CPDF_Stream>(std::move(pData), dwStreamSize,
    225                                          std::move(pDict));
    226 }
    227 
    228 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
    229   m_pLastObj.reset();
    230   m_WordSize = 0;
    231   if (!PositionIsInBounds())
    232     return EndOfData;
    233 
    234   int ch = m_pBuf[m_Pos++];
    235   while (1) {
    236     while (PDFCharIsWhitespace(ch)) {
    237       if (!PositionIsInBounds())
    238         return EndOfData;
    239 
    240       ch = m_pBuf[m_Pos++];
    241     }
    242 
    243     if (ch != '%')
    244       break;
    245 
    246     while (1) {
    247       if (!PositionIsInBounds())
    248         return EndOfData;
    249 
    250       ch = m_pBuf[m_Pos++];
    251       if (PDFCharIsLineEnding(ch))
    252         break;
    253     }
    254   }
    255 
    256   if (PDFCharIsDelimiter(ch) && ch != '/') {
    257     m_Pos--;
    258     m_pLastObj = ReadNextObject(false, false, 0);
    259     return Others;
    260   }
    261 
    262   bool bIsNumber = true;
    263   while (1) {
    264     if (m_WordSize < kMaxWordBuffer)
    265       m_WordBuffer[m_WordSize++] = ch;
    266 
    267     if (!PDFCharIsNumeric(ch))
    268       bIsNumber = false;
    269 
    270     if (!PositionIsInBounds())
    271       break;
    272 
    273     ch = m_pBuf[m_Pos++];
    274 
    275     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
    276       m_Pos--;
    277       break;
    278     }
    279   }
    280 
    281   m_WordBuffer[m_WordSize] = 0;
    282   if (bIsNumber)
    283     return Number;
    284 
    285   if (m_WordBuffer[0] == '/')
    286     return Name;
    287 
    288   if (m_WordSize == 4) {
    289     if (memcmp(m_WordBuffer, "true", 4) == 0) {
    290       m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(true);
    291       return Others;
    292     }
    293     if (memcmp(m_WordBuffer, "null", 4) == 0) {
    294       m_pLastObj = pdfium::MakeUnique<CPDF_Null>();
    295       return Others;
    296     }
    297   } else if (m_WordSize == 5) {
    298     if (memcmp(m_WordBuffer, "false", 5) == 0) {
    299       m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(false);
    300       return Others;
    301     }
    302   }
    303   return Keyword;
    304 }
    305 
    306 std::unique_ptr<CPDF_Object> CPDF_StreamParser::ReadNextObject(
    307     bool bAllowNestedArray,
    308     bool bInArray,
    309     uint32_t dwRecursionLevel) {
    310   bool bIsNumber;
    311   // Must get the next word before returning to avoid infinite loops.
    312   GetNextWord(bIsNumber);
    313   if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel)
    314     return nullptr;
    315 
    316   if (bIsNumber) {
    317     m_WordBuffer[m_WordSize] = 0;
    318     return pdfium::MakeUnique<CPDF_Number>(
    319         CFX_ByteStringC(m_WordBuffer, m_WordSize));
    320   }
    321 
    322   int first_char = m_WordBuffer[0];
    323   if (first_char == '/') {
    324     CFX_ByteString name =
    325         PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
    326     return pdfium::MakeUnique<CPDF_Name>(m_pPool, name);
    327   }
    328 
    329   if (first_char == '(') {
    330     CFX_ByteString str = ReadString();
    331     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
    332   }
    333 
    334   if (first_char == '<') {
    335     if (m_WordSize == 1)
    336       return pdfium::MakeUnique<CPDF_String>(m_pPool, ReadHexString(), true);
    337 
    338     auto pDict = pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
    339     while (1) {
    340       GetNextWord(bIsNumber);
    341       if (m_WordSize == 2 && m_WordBuffer[0] == '>')
    342         break;
    343 
    344       if (!m_WordSize || m_WordBuffer[0] != '/')
    345         return nullptr;
    346 
    347       CFX_ByteString key =
    348           PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
    349       std::unique_ptr<CPDF_Object> pObj =
    350           ReadNextObject(true, bInArray, dwRecursionLevel + 1);
    351       if (!pObj)
    352         return nullptr;
    353 
    354       if (!key.IsEmpty())
    355         pDict->SetFor(key, std::move(pObj));
    356     }
    357     return std::move(pDict);
    358   }
    359 
    360   if (first_char == '[') {
    361     if ((!bAllowNestedArray && bInArray))
    362       return nullptr;
    363 
    364     auto pArray = pdfium::MakeUnique<CPDF_Array>();
    365     while (1) {
    366       std::unique_ptr<CPDF_Object> pObj =
    367           ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1);
    368       if (pObj) {
    369         pArray->Add(std::move(pObj));
    370         continue;
    371       }
    372       if (!m_WordSize || m_WordBuffer[0] == ']')
    373         break;
    374     }
    375     return std::move(pArray);
    376   }
    377 
    378   if (m_WordSize == 5 && !memcmp(m_WordBuffer, "false", 5))
    379     return pdfium::MakeUnique<CPDF_Boolean>(false);
    380 
    381   if (m_WordSize == 4) {
    382     if (memcmp(m_WordBuffer, "true", 4) == 0)
    383       return pdfium::MakeUnique<CPDF_Boolean>(true);
    384     if (memcmp(m_WordBuffer, "null", 4) == 0)
    385       return pdfium::MakeUnique<CPDF_Null>();
    386   }
    387 
    388   return nullptr;
    389 }
    390 
    391 // TODO(npm): the following methods are almost identical in cpdf_syntaxparser
    392 void CPDF_StreamParser::GetNextWord(bool& bIsNumber) {
    393   m_WordSize = 0;
    394   bIsNumber = true;
    395   if (!PositionIsInBounds())
    396     return;
    397 
    398   int ch = m_pBuf[m_Pos++];
    399   while (1) {
    400     while (PDFCharIsWhitespace(ch)) {
    401       if (!PositionIsInBounds()) {
    402         return;
    403       }
    404       ch = m_pBuf[m_Pos++];
    405     }
    406 
    407     if (ch != '%')
    408       break;
    409 
    410     while (1) {
    411       if (!PositionIsInBounds())
    412         return;
    413       ch = m_pBuf[m_Pos++];
    414       if (PDFCharIsLineEnding(ch))
    415         break;
    416     }
    417   }
    418 
    419   if (PDFCharIsDelimiter(ch)) {
    420     bIsNumber = false;
    421     m_WordBuffer[m_WordSize++] = ch;
    422     if (ch == '/') {
    423       while (1) {
    424         if (!PositionIsInBounds())
    425           return;
    426         ch = m_pBuf[m_Pos++];
    427         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
    428           m_Pos--;
    429           return;
    430         }
    431 
    432         if (m_WordSize < kMaxWordBuffer)
    433           m_WordBuffer[m_WordSize++] = ch;
    434       }
    435     } else if (ch == '<') {
    436       if (!PositionIsInBounds())
    437         return;
    438       ch = m_pBuf[m_Pos++];
    439       if (ch == '<')
    440         m_WordBuffer[m_WordSize++] = ch;
    441       else
    442         m_Pos--;
    443     } else if (ch == '>') {
    444       if (!PositionIsInBounds())
    445         return;
    446       ch = m_pBuf[m_Pos++];
    447       if (ch == '>')
    448         m_WordBuffer[m_WordSize++] = ch;
    449       else
    450         m_Pos--;
    451     }
    452     return;
    453   }
    454 
    455   while (1) {
    456     if (m_WordSize < kMaxWordBuffer)
    457       m_WordBuffer[m_WordSize++] = ch;
    458     if (!PDFCharIsNumeric(ch))
    459       bIsNumber = false;
    460 
    461     if (!PositionIsInBounds())
    462       return;
    463     ch = m_pBuf[m_Pos++];
    464     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
    465       m_Pos--;
    466       break;
    467     }
    468   }
    469 }
    470 
    471 CFX_ByteString CPDF_StreamParser::ReadString() {
    472   if (!PositionIsInBounds())
    473     return CFX_ByteString();
    474 
    475   uint8_t ch = m_pBuf[m_Pos++];
    476   CFX_ByteTextBuf buf;
    477   int parlevel = 0;
    478   int status = 0;
    479   int iEscCode = 0;
    480   while (1) {
    481     switch (status) {
    482       case 0:
    483         if (ch == ')') {
    484           if (parlevel == 0) {
    485             if (buf.GetLength() > kMaxStringLength) {
    486               return CFX_ByteString(buf.GetBuffer(), kMaxStringLength);
    487             }
    488             return buf.MakeString();
    489           }
    490           parlevel--;
    491           buf.AppendChar(')');
    492         } else if (ch == '(') {
    493           parlevel++;
    494           buf.AppendChar('(');
    495         } else if (ch == '\\') {
    496           status = 1;
    497         } else {
    498           buf.AppendChar((char)ch);
    499         }
    500         break;
    501       case 1:
    502         if (ch >= '0' && ch <= '7') {
    503           iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
    504           status = 2;
    505           break;
    506         }
    507         if (ch == 'n') {
    508           buf.AppendChar('\n');
    509         } else if (ch == 'r') {
    510           buf.AppendChar('\r');
    511         } else if (ch == 't') {
    512           buf.AppendChar('\t');
    513         } else if (ch == 'b') {
    514           buf.AppendChar('\b');
    515         } else if (ch == 'f') {
    516           buf.AppendChar('\f');
    517         } else if (ch == '\r') {
    518           status = 4;
    519           break;
    520         } else if (ch == '\n') {
    521         } else {
    522           buf.AppendChar(ch);
    523         }
    524         status = 0;
    525         break;
    526       case 2:
    527         if (ch >= '0' && ch <= '7') {
    528           iEscCode =
    529               iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
    530           status = 3;
    531         } else {
    532           buf.AppendChar(iEscCode);
    533           status = 0;
    534           continue;
    535         }
    536         break;
    537       case 3:
    538         if (ch >= '0' && ch <= '7') {
    539           iEscCode =
    540               iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch));
    541           buf.AppendChar(iEscCode);
    542           status = 0;
    543         } else {
    544           buf.AppendChar(iEscCode);
    545           status = 0;
    546           continue;
    547         }
    548         break;
    549       case 4:
    550         status = 0;
    551         if (ch != '\n') {
    552           continue;
    553         }
    554         break;
    555     }
    556     if (!PositionIsInBounds())
    557       break;
    558 
    559     ch = m_pBuf[m_Pos++];
    560   }
    561   if (PositionIsInBounds())
    562     ++m_Pos;
    563 
    564   if (buf.GetLength() > kMaxStringLength) {
    565     return CFX_ByteString(buf.GetBuffer(), kMaxStringLength);
    566   }
    567   return buf.MakeString();
    568 }
    569 
    570 CFX_ByteString CPDF_StreamParser::ReadHexString() {
    571   if (!PositionIsInBounds())
    572     return CFX_ByteString();
    573 
    574   CFX_ByteTextBuf buf;
    575   bool bFirst = true;
    576   int code = 0;
    577   while (PositionIsInBounds()) {
    578     int ch = m_pBuf[m_Pos++];
    579 
    580     if (ch == '>')
    581       break;
    582 
    583     if (!std::isxdigit(ch))
    584       continue;
    585 
    586     int val = FXSYS_toHexDigit(ch);
    587     if (bFirst) {
    588       code = val * 16;
    589     } else {
    590       code += val;
    591       buf.AppendByte((uint8_t)code);
    592     }
    593     bFirst = !bFirst;
    594   }
    595   if (!bFirst)
    596     buf.AppendChar((char)code);
    597 
    598   if (buf.GetLength() > kMaxStringLength)
    599     return CFX_ByteString(buf.GetBuffer(), kMaxStringLength);
    600 
    601   return buf.MakeString();
    602 }
    603 
    604 bool CPDF_StreamParser::PositionIsInBounds() const {
    605   return m_Pos < m_Size;
    606 }
    607