Home | History | Annotate | Download | only in page
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/page/cpdf_streamparser.h"
      8 
      9 #include <limits.h>
     10 
     11 #include <algorithm>
     12 #include <memory>
     13 #include <sstream>
     14 #include <utility>
     15 
     16 #include "core/fpdfapi/cpdf_modulemgr.h"
     17 #include "core/fpdfapi/page/cpdf_docpagedata.h"
     18 #include "core/fpdfapi/parser/cpdf_array.h"
     19 #include "core/fpdfapi/parser/cpdf_boolean.h"
     20 #include "core/fpdfapi/parser/cpdf_dictionary.h"
     21 #include "core/fpdfapi/parser/cpdf_document.h"
     22 #include "core/fpdfapi/parser/cpdf_name.h"
     23 #include "core/fpdfapi/parser/cpdf_null.h"
     24 #include "core/fpdfapi/parser/cpdf_number.h"
     25 #include "core/fpdfapi/parser/cpdf_stream.h"
     26 #include "core/fpdfapi/parser/cpdf_string.h"
     27 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
     28 #include "core/fpdfapi/parser/fpdf_parser_utility.h"
     29 #include "core/fxcodec/codec/ccodec_jpegmodule.h"
     30 #include "core/fxcodec/codec/ccodec_scanlinedecoder.h"
     31 #include "core/fxcrt/fx_extension.h"
     32 
     33 namespace {
     34 
     35 const uint32_t kMaxNestedParsingLevel = 512;
     36 const uint32_t kMaxWordBuffer = 256;
     37 const size_t kMaxStringLength = 32767;
     38 
     39 uint32_t DecodeAllScanlines(std::unique_ptr<CCodec_ScanlineDecoder> pDecoder,
     40                             uint8_t** dest_buf,
     41                             uint32_t* dest_size) {
     42   if (!pDecoder)
     43     return FX_INVALID_OFFSET;
     44   int ncomps = pDecoder->CountComps();
     45   int bpc = pDecoder->GetBPC();
     46   int width = pDecoder->GetWidth();
     47   int height = pDecoder->GetHeight();
     48   int pitch = (width * ncomps * bpc + 7) / 8;
     49   if (height == 0 || pitch > (1 << 30) / height)
     50     return FX_INVALID_OFFSET;
     51 
     52   *dest_buf = FX_Alloc2D(uint8_t, pitch, height);
     53   *dest_size = pitch * height;  // Safe since checked alloc returned.
     54   for (int row = 0; row < height; ++row) {
     55     const uint8_t* pLine = pDecoder->GetScanline(row);
     56     if (!pLine)
     57       break;
     58 
     59     memcpy(*dest_buf + row * pitch, pLine, pitch);
     60   }
     61   return pDecoder->GetSrcOffset();
     62 }
     63 
     64 uint32_t DecodeInlineStream(const uint8_t* src_buf,
     65                             uint32_t limit,
     66                             int width,
     67                             int height,
     68                             const ByteString& decoder,
     69                             CPDF_Dictionary* pParam,
     70                             uint8_t** dest_buf,
     71                             uint32_t* dest_size) {
     72   if (decoder == "CCITTFaxDecode" || decoder == "CCF") {
     73     std::unique_ptr<CCodec_ScanlineDecoder> pDecoder =
     74         FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam);
     75     return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size);
     76   }
     77   if (decoder == "ASCII85Decode" || decoder == "A85")
     78     return A85Decode(src_buf, limit, dest_buf, dest_size);
     79   if (decoder == "ASCIIHexDecode" || decoder == "AHx")
     80     return HexDecode(src_buf, limit, dest_buf, dest_size);
     81   if (decoder == "FlateDecode" || decoder == "Fl") {
     82     return FPDFAPI_FlateOrLZWDecode(false, src_buf, limit, pParam, *dest_size,
     83                                     dest_buf, dest_size);
     84   }
     85   if (decoder == "LZWDecode" || decoder == "LZW") {
     86     return FPDFAPI_FlateOrLZWDecode(true, src_buf, limit, pParam, 0, dest_buf,
     87                                     dest_size);
     88   }
     89   if (decoder == "DCTDecode" || decoder == "DCT") {
     90     std::unique_ptr<CCodec_ScanlineDecoder> pDecoder =
     91         CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder(
     92             src_buf, limit, width, height, 0,
     93             !pParam || pParam->GetIntegerFor("ColorTransform", 1));
     94     return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size);
     95   }
     96   if (decoder == "RunLengthDecode" || decoder == "RL")
     97     return RunLengthDecode(src_buf, limit, dest_buf, dest_size);
     98   *dest_size = 0;
     99   *dest_buf = 0;
    100   return 0xFFFFFFFF;
    101 }
    102 
    103 }  // namespace
    104 
    105 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize)
    106     : m_pBuf(pData), m_Size(dwSize), m_Pos(0), m_pPool(nullptr) {}
    107 
    108 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData,
    109                                      uint32_t dwSize,
    110                                      const WeakPtr<ByteStringPool>& pPool)
    111     : m_pBuf(pData), m_Size(dwSize), m_Pos(0), m_pPool(pPool) {}
    112 
    113 CPDF_StreamParser::~CPDF_StreamParser() {}
    114 
    115 std::unique_ptr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream(
    116     CPDF_Document* pDoc,
    117     std::unique_ptr<CPDF_Dictionary> pDict,
    118     CPDF_Object* pCSObj) {
    119   if (m_Pos == m_Size)
    120     return nullptr;
    121 
    122   if (PDFCharIsWhitespace(m_pBuf[m_Pos]))
    123     m_Pos++;
    124 
    125   ByteString Decoder;
    126   CPDF_Dictionary* pParam = nullptr;
    127   CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter");
    128   if (pFilter) {
    129     if (CPDF_Array* pArray = pFilter->AsArray()) {
    130       Decoder = pArray->GetStringAt(0);
    131       CPDF_Array* pParams = pDict->GetArrayFor("DecodeParms");
    132       if (pParams)
    133         pParam = pParams->GetDictAt(0);
    134     } else {
    135       Decoder = pFilter->GetString();
    136       pParam = pDict->GetDictFor("DecodeParms");
    137     }
    138   }
    139   uint32_t width = pDict->GetIntegerFor("Width");
    140   uint32_t height = pDict->GetIntegerFor("Height");
    141   uint32_t OrigSize = 0;
    142   if (pCSObj) {
    143     uint32_t bpc = pDict->GetIntegerFor("BitsPerComponent");
    144     uint32_t nComponents = 1;
    145     CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj);
    146     if (pCS) {
    147       nComponents = pCS->CountComponents();
    148       pDoc->GetPageData()->ReleaseColorSpace(pCSObj);
    149     } else {
    150       nComponents = 3;
    151     }
    152     uint32_t pitch = width;
    153     if (bpc && pitch > INT_MAX / bpc)
    154       return nullptr;
    155 
    156     pitch *= bpc;
    157     if (nComponents && pitch > INT_MAX / nComponents)
    158       return nullptr;
    159 
    160     pitch *= nComponents;
    161     if (pitch > INT_MAX - 7)
    162       return nullptr;
    163 
    164     pitch += 7;
    165     pitch /= 8;
    166     OrigSize = pitch;
    167   } else {
    168     if (width > INT_MAX - 7)
    169       return nullptr;
    170 
    171     OrigSize = ((width + 7) / 8);
    172   }
    173   if (height && OrigSize > INT_MAX / height)
    174     return nullptr;
    175 
    176   OrigSize *= height;
    177   std::unique_ptr<uint8_t, FxFreeDeleter> pData;
    178   uint32_t dwStreamSize;
    179   if (Decoder.IsEmpty()) {
    180     if (OrigSize > m_Size - m_Pos)
    181       OrigSize = m_Size - m_Pos;
    182     pData.reset(FX_Alloc(uint8_t, OrigSize));
    183     memcpy(pData.get(), m_pBuf + m_Pos, OrigSize);
    184     dwStreamSize = OrigSize;
    185     m_Pos += OrigSize;
    186   } else {
    187     uint8_t* pIgnore = nullptr;
    188     uint32_t dwDestSize = OrigSize;
    189     dwStreamSize =
    190         DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height,
    191                            Decoder, pParam, &pIgnore, &dwDestSize);
    192     FX_Free(pIgnore);
    193     if (static_cast<int>(dwStreamSize) < 0)
    194       return nullptr;
    195 
    196     uint32_t dwSavePos = m_Pos;
    197     m_Pos += dwStreamSize;
    198     while (1) {
    199       uint32_t dwPrevPos = m_Pos;
    200       CPDF_StreamParser::SyntaxType type = ParseNextElement();
    201       if (type == CPDF_StreamParser::EndOfData)
    202         break;
    203 
    204       if (type != CPDF_StreamParser::Keyword) {
    205         dwStreamSize += m_Pos - dwPrevPos;
    206         continue;
    207       }
    208       if (GetWord() == "EI") {
    209         m_Pos = dwPrevPos;
    210         break;
    211       }
    212       dwStreamSize += m_Pos - dwPrevPos;
    213     }
    214     m_Pos = dwSavePos;
    215     pData.reset(FX_Alloc(uint8_t, dwStreamSize));
    216     memcpy(pData.get(), m_pBuf + m_Pos, dwStreamSize);
    217     m_Pos += dwStreamSize;
    218   }
    219   pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(dwStreamSize));
    220   return pdfium::MakeUnique<CPDF_Stream>(std::move(pData), dwStreamSize,
    221                                          std::move(pDict));
    222 }
    223 
    224 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
    225   m_pLastObj.reset();
    226   m_WordSize = 0;
    227   if (!PositionIsInBounds())
    228     return EndOfData;
    229 
    230   int ch = m_pBuf[m_Pos++];
    231   while (1) {
    232     while (PDFCharIsWhitespace(ch)) {
    233       if (!PositionIsInBounds())
    234         return EndOfData;
    235 
    236       ch = m_pBuf[m_Pos++];
    237     }
    238 
    239     if (ch != '%')
    240       break;
    241 
    242     while (1) {
    243       if (!PositionIsInBounds())
    244         return EndOfData;
    245 
    246       ch = m_pBuf[m_Pos++];
    247       if (PDFCharIsLineEnding(ch))
    248         break;
    249     }
    250   }
    251 
    252   if (PDFCharIsDelimiter(ch) && ch != '/') {
    253     m_Pos--;
    254     m_pLastObj = ReadNextObject(false, false, 0);
    255     return Others;
    256   }
    257 
    258   bool bIsNumber = true;
    259   while (1) {
    260     if (m_WordSize < kMaxWordBuffer)
    261       m_WordBuffer[m_WordSize++] = ch;
    262 
    263     if (!PDFCharIsNumeric(ch))
    264       bIsNumber = false;
    265 
    266     if (!PositionIsInBounds())
    267       break;
    268 
    269     ch = m_pBuf[m_Pos++];
    270 
    271     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
    272       m_Pos--;
    273       break;
    274     }
    275   }
    276 
    277   m_WordBuffer[m_WordSize] = 0;
    278   if (bIsNumber)
    279     return Number;
    280 
    281   if (m_WordBuffer[0] == '/')
    282     return Name;
    283 
    284   if (m_WordSize == 4) {
    285     if (memcmp(m_WordBuffer, "true", 4) == 0) {
    286       m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(true);
    287       return Others;
    288     }
    289     if (memcmp(m_WordBuffer, "null", 4) == 0) {
    290       m_pLastObj = pdfium::MakeUnique<CPDF_Null>();
    291       return Others;
    292     }
    293   } else if (m_WordSize == 5) {
    294     if (memcmp(m_WordBuffer, "false", 5) == 0) {
    295       m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(false);
    296       return Others;
    297     }
    298   }
    299   return Keyword;
    300 }
    301 
    302 std::unique_ptr<CPDF_Object> CPDF_StreamParser::ReadNextObject(
    303     bool bAllowNestedArray,
    304     bool bInArray,
    305     uint32_t dwRecursionLevel) {
    306   bool bIsNumber;
    307   // Must get the next word before returning to avoid infinite loops.
    308   GetNextWord(bIsNumber);
    309   if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel)
    310     return nullptr;
    311 
    312   if (bIsNumber) {
    313     m_WordBuffer[m_WordSize] = 0;
    314     return pdfium::MakeUnique<CPDF_Number>(
    315         ByteStringView(m_WordBuffer, m_WordSize));
    316   }
    317 
    318   int first_char = m_WordBuffer[0];
    319   if (first_char == '/') {
    320     ByteString name =
    321         PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1));
    322     return pdfium::MakeUnique<CPDF_Name>(m_pPool, name);
    323   }
    324 
    325   if (first_char == '(') {
    326     ByteString str = ReadString();
    327     return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false);
    328   }
    329 
    330   if (first_char == '<') {
    331     if (m_WordSize == 1)
    332       return pdfium::MakeUnique<CPDF_String>(m_pPool, ReadHexString(), true);
    333 
    334     auto pDict = pdfium::MakeUnique<CPDF_Dictionary>(m_pPool);
    335     while (1) {
    336       GetNextWord(bIsNumber);
    337       if (m_WordSize == 2 && m_WordBuffer[0] == '>')
    338         break;
    339 
    340       if (!m_WordSize || m_WordBuffer[0] != '/')
    341         return nullptr;
    342 
    343       ByteString key =
    344           PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1));
    345       std::unique_ptr<CPDF_Object> pObj =
    346           ReadNextObject(true, bInArray, dwRecursionLevel + 1);
    347       if (!pObj)
    348         return nullptr;
    349 
    350       if (!key.IsEmpty())
    351         pDict->SetFor(key, std::move(pObj));
    352     }
    353     return std::move(pDict);
    354   }
    355 
    356   if (first_char == '[') {
    357     if ((!bAllowNestedArray && bInArray))
    358       return nullptr;
    359 
    360     auto pArray = pdfium::MakeUnique<CPDF_Array>();
    361     while (1) {
    362       std::unique_ptr<CPDF_Object> pObj =
    363           ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1);
    364       if (pObj) {
    365         pArray->Add(std::move(pObj));
    366         continue;
    367       }
    368       if (!m_WordSize || m_WordBuffer[0] == ']')
    369         break;
    370     }
    371     return std::move(pArray);
    372   }
    373 
    374   if (m_WordSize == 5 && !memcmp(m_WordBuffer, "false", 5))
    375     return pdfium::MakeUnique<CPDF_Boolean>(false);
    376 
    377   if (m_WordSize == 4) {
    378     if (memcmp(m_WordBuffer, "true", 4) == 0)
    379       return pdfium::MakeUnique<CPDF_Boolean>(true);
    380     if (memcmp(m_WordBuffer, "null", 4) == 0)
    381       return pdfium::MakeUnique<CPDF_Null>();
    382   }
    383 
    384   return nullptr;
    385 }
    386 
    387 // TODO(npm): the following methods are almost identical in cpdf_syntaxparser
    388 void CPDF_StreamParser::GetNextWord(bool& bIsNumber) {
    389   m_WordSize = 0;
    390   bIsNumber = true;
    391   if (!PositionIsInBounds())
    392     return;
    393 
    394   int ch = m_pBuf[m_Pos++];
    395   while (1) {
    396     while (PDFCharIsWhitespace(ch)) {
    397       if (!PositionIsInBounds()) {
    398         return;
    399       }
    400       ch = m_pBuf[m_Pos++];
    401     }
    402 
    403     if (ch != '%')
    404       break;
    405 
    406     while (1) {
    407       if (!PositionIsInBounds())
    408         return;
    409       ch = m_pBuf[m_Pos++];
    410       if (PDFCharIsLineEnding(ch))
    411         break;
    412     }
    413   }
    414 
    415   if (PDFCharIsDelimiter(ch)) {
    416     bIsNumber = false;
    417     m_WordBuffer[m_WordSize++] = ch;
    418     if (ch == '/') {
    419       while (1) {
    420         if (!PositionIsInBounds())
    421           return;
    422         ch = m_pBuf[m_Pos++];
    423         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
    424           m_Pos--;
    425           return;
    426         }
    427 
    428         if (m_WordSize < kMaxWordBuffer)
    429           m_WordBuffer[m_WordSize++] = ch;
    430       }
    431     } else if (ch == '<') {
    432       if (!PositionIsInBounds())
    433         return;
    434       ch = m_pBuf[m_Pos++];
    435       if (ch == '<')
    436         m_WordBuffer[m_WordSize++] = ch;
    437       else
    438         m_Pos--;
    439     } else if (ch == '>') {
    440       if (!PositionIsInBounds())
    441         return;
    442       ch = m_pBuf[m_Pos++];
    443       if (ch == '>')
    444         m_WordBuffer[m_WordSize++] = ch;
    445       else
    446         m_Pos--;
    447     }
    448     return;
    449   }
    450 
    451   while (1) {
    452     if (m_WordSize < kMaxWordBuffer)
    453       m_WordBuffer[m_WordSize++] = ch;
    454     if (!PDFCharIsNumeric(ch))
    455       bIsNumber = false;
    456 
    457     if (!PositionIsInBounds())
    458       return;
    459     ch = m_pBuf[m_Pos++];
    460     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
    461       m_Pos--;
    462       break;
    463     }
    464   }
    465 }
    466 
    467 ByteString CPDF_StreamParser::ReadString() {
    468   if (!PositionIsInBounds())
    469     return ByteString();
    470 
    471   uint8_t ch = m_pBuf[m_Pos++];
    472   std::ostringstream buf;
    473   int parlevel = 0;
    474   int status = 0;
    475   int iEscCode = 0;
    476   while (1) {
    477     switch (status) {
    478       case 0:
    479         if (ch == ')') {
    480           if (parlevel == 0) {
    481             return ByteString(
    482                 buf.str().c_str(),
    483                 std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
    484           }
    485           parlevel--;
    486           buf << ')';
    487         } else if (ch == '(') {
    488           parlevel++;
    489           buf << '(';
    490         } else if (ch == '\\') {
    491           status = 1;
    492         } else {
    493           buf << static_cast<char>(ch);
    494         }
    495         break;
    496       case 1:
    497         if (ch >= '0' && ch <= '7') {
    498           iEscCode = FXSYS_DecimalCharToInt(static_cast<char>(ch));
    499           status = 2;
    500           break;
    501         }
    502         if (ch == '\r') {
    503           status = 4;
    504           break;
    505         }
    506         if (ch == '\n') {
    507           // Do nothing.
    508         } else if (ch == 'n') {
    509           buf << '\n';
    510         } else if (ch == 'r') {
    511           buf << '\r';
    512         } else if (ch == 't') {
    513           buf << '\t';
    514         } else if (ch == 'b') {
    515           buf << '\b';
    516         } else if (ch == 'f') {
    517           buf << '\f';
    518         } else {
    519           buf << static_cast<char>(ch);
    520         }
    521         status = 0;
    522         break;
    523       case 2:
    524         if (ch >= '0' && ch <= '7') {
    525           iEscCode =
    526               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
    527           status = 3;
    528         } else {
    529           buf << static_cast<char>(iEscCode);
    530           status = 0;
    531           continue;
    532         }
    533         break;
    534       case 3:
    535         if (ch >= '0' && ch <= '7') {
    536           iEscCode =
    537               iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch));
    538           buf << static_cast<char>(iEscCode);
    539           status = 0;
    540         } else {
    541           buf << static_cast<char>(iEscCode);
    542           status = 0;
    543           continue;
    544         }
    545         break;
    546       case 4:
    547         status = 0;
    548         if (ch != '\n')
    549           continue;
    550         break;
    551     }
    552     if (!PositionIsInBounds())
    553       break;
    554 
    555     ch = m_pBuf[m_Pos++];
    556   }
    557   if (PositionIsInBounds())
    558     ++m_Pos;
    559 
    560   return ByteString(
    561       buf.str().c_str(),
    562       std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
    563 }
    564 
    565 ByteString CPDF_StreamParser::ReadHexString() {
    566   if (!PositionIsInBounds())
    567     return ByteString();
    568 
    569   std::ostringstream buf;
    570   bool bFirst = true;
    571   int code = 0;
    572   while (PositionIsInBounds()) {
    573     int ch = m_pBuf[m_Pos++];
    574 
    575     if (ch == '>')
    576       break;
    577 
    578     if (!std::isxdigit(ch))
    579       continue;
    580 
    581     int val = FXSYS_HexCharToInt(ch);
    582     if (bFirst) {
    583       code = val * 16;
    584     } else {
    585       code += val;
    586       buf << static_cast<uint8_t>(code);
    587     }
    588     bFirst = !bFirst;
    589   }
    590   if (!bFirst)
    591     buf << static_cast<char>(code);
    592 
    593   return ByteString(
    594       buf.str().c_str(),
    595       std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength));
    596 }
    597 
    598 bool CPDF_StreamParser::PositionIsInBounds() const {
    599   return m_Pos < m_Size;
    600 }
    601