1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfapi/page/cpdf_streamparser.h" 8 9 #include <limits.h> 10 11 #include <algorithm> 12 #include <memory> 13 #include <sstream> 14 #include <utility> 15 16 #include "core/fpdfapi/cpdf_modulemgr.h" 17 #include "core/fpdfapi/page/cpdf_docpagedata.h" 18 #include "core/fpdfapi/parser/cpdf_array.h" 19 #include "core/fpdfapi/parser/cpdf_boolean.h" 20 #include "core/fpdfapi/parser/cpdf_dictionary.h" 21 #include "core/fpdfapi/parser/cpdf_document.h" 22 #include "core/fpdfapi/parser/cpdf_name.h" 23 #include "core/fpdfapi/parser/cpdf_null.h" 24 #include "core/fpdfapi/parser/cpdf_number.h" 25 #include "core/fpdfapi/parser/cpdf_stream.h" 26 #include "core/fpdfapi/parser/cpdf_string.h" 27 #include "core/fpdfapi/parser/fpdf_parser_decode.h" 28 #include "core/fpdfapi/parser/fpdf_parser_utility.h" 29 #include "core/fxcodec/codec/ccodec_jpegmodule.h" 30 #include "core/fxcodec/codec/ccodec_scanlinedecoder.h" 31 #include "core/fxcrt/fx_extension.h" 32 33 namespace { 34 35 const uint32_t kMaxNestedParsingLevel = 512; 36 const uint32_t kMaxWordBuffer = 256; 37 const size_t kMaxStringLength = 32767; 38 39 uint32_t DecodeAllScanlines(std::unique_ptr<CCodec_ScanlineDecoder> pDecoder, 40 uint8_t** dest_buf, 41 uint32_t* dest_size) { 42 if (!pDecoder) 43 return FX_INVALID_OFFSET; 44 int ncomps = pDecoder->CountComps(); 45 int bpc = pDecoder->GetBPC(); 46 int width = pDecoder->GetWidth(); 47 int height = pDecoder->GetHeight(); 48 int pitch = (width * ncomps * bpc + 7) / 8; 49 if (height == 0 || pitch > (1 << 30) / height) 50 return FX_INVALID_OFFSET; 51 52 *dest_buf = FX_Alloc2D(uint8_t, pitch, height); 53 *dest_size = pitch * height; // Safe since checked alloc returned. 54 for (int row = 0; row < height; ++row) { 55 const uint8_t* pLine = pDecoder->GetScanline(row); 56 if (!pLine) 57 break; 58 59 memcpy(*dest_buf + row * pitch, pLine, pitch); 60 } 61 return pDecoder->GetSrcOffset(); 62 } 63 64 uint32_t DecodeInlineStream(const uint8_t* src_buf, 65 uint32_t limit, 66 int width, 67 int height, 68 const ByteString& decoder, 69 CPDF_Dictionary* pParam, 70 uint8_t** dest_buf, 71 uint32_t* dest_size) { 72 if (decoder == "CCITTFaxDecode" || decoder == "CCF") { 73 std::unique_ptr<CCodec_ScanlineDecoder> pDecoder = 74 FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam); 75 return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size); 76 } 77 if (decoder == "ASCII85Decode" || decoder == "A85") 78 return A85Decode(src_buf, limit, dest_buf, dest_size); 79 if (decoder == "ASCIIHexDecode" || decoder == "AHx") 80 return HexDecode(src_buf, limit, dest_buf, dest_size); 81 if (decoder == "FlateDecode" || decoder == "Fl") { 82 return FPDFAPI_FlateOrLZWDecode(false, src_buf, limit, pParam, *dest_size, 83 dest_buf, dest_size); 84 } 85 if (decoder == "LZWDecode" || decoder == "LZW") { 86 return FPDFAPI_FlateOrLZWDecode(true, src_buf, limit, pParam, 0, dest_buf, 87 dest_size); 88 } 89 if (decoder == "DCTDecode" || decoder == "DCT") { 90 std::unique_ptr<CCodec_ScanlineDecoder> pDecoder = 91 CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder( 92 src_buf, limit, width, height, 0, 93 !pParam || pParam->GetIntegerFor("ColorTransform", 1)); 94 return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size); 95 } 96 if (decoder == "RunLengthDecode" || decoder == "RL") 97 return RunLengthDecode(src_buf, limit, dest_buf, dest_size); 98 *dest_size = 0; 99 *dest_buf = 0; 100 return 0xFFFFFFFF; 101 } 102 103 } // namespace 104 105 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize) 106 : m_pBuf(pData), m_Size(dwSize), m_Pos(0), m_pPool(nullptr) {} 107 108 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, 109 uint32_t dwSize, 110 const WeakPtr<ByteStringPool>& pPool) 111 : m_pBuf(pData), m_Size(dwSize), m_Pos(0), m_pPool(pPool) {} 112 113 CPDF_StreamParser::~CPDF_StreamParser() {} 114 115 std::unique_ptr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream( 116 CPDF_Document* pDoc, 117 std::unique_ptr<CPDF_Dictionary> pDict, 118 CPDF_Object* pCSObj) { 119 if (m_Pos == m_Size) 120 return nullptr; 121 122 if (PDFCharIsWhitespace(m_pBuf[m_Pos])) 123 m_Pos++; 124 125 ByteString Decoder; 126 CPDF_Dictionary* pParam = nullptr; 127 CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter"); 128 if (pFilter) { 129 if (CPDF_Array* pArray = pFilter->AsArray()) { 130 Decoder = pArray->GetStringAt(0); 131 CPDF_Array* pParams = pDict->GetArrayFor("DecodeParms"); 132 if (pParams) 133 pParam = pParams->GetDictAt(0); 134 } else { 135 Decoder = pFilter->GetString(); 136 pParam = pDict->GetDictFor("DecodeParms"); 137 } 138 } 139 uint32_t width = pDict->GetIntegerFor("Width"); 140 uint32_t height = pDict->GetIntegerFor("Height"); 141 uint32_t OrigSize = 0; 142 if (pCSObj) { 143 uint32_t bpc = pDict->GetIntegerFor("BitsPerComponent"); 144 uint32_t nComponents = 1; 145 CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj); 146 if (pCS) { 147 nComponents = pCS->CountComponents(); 148 pDoc->GetPageData()->ReleaseColorSpace(pCSObj); 149 } else { 150 nComponents = 3; 151 } 152 uint32_t pitch = width; 153 if (bpc && pitch > INT_MAX / bpc) 154 return nullptr; 155 156 pitch *= bpc; 157 if (nComponents && pitch > INT_MAX / nComponents) 158 return nullptr; 159 160 pitch *= nComponents; 161 if (pitch > INT_MAX - 7) 162 return nullptr; 163 164 pitch += 7; 165 pitch /= 8; 166 OrigSize = pitch; 167 } else { 168 if (width > INT_MAX - 7) 169 return nullptr; 170 171 OrigSize = ((width + 7) / 8); 172 } 173 if (height && OrigSize > INT_MAX / height) 174 return nullptr; 175 176 OrigSize *= height; 177 std::unique_ptr<uint8_t, FxFreeDeleter> pData; 178 uint32_t dwStreamSize; 179 if (Decoder.IsEmpty()) { 180 if (OrigSize > m_Size - m_Pos) 181 OrigSize = m_Size - m_Pos; 182 pData.reset(FX_Alloc(uint8_t, OrigSize)); 183 memcpy(pData.get(), m_pBuf + m_Pos, OrigSize); 184 dwStreamSize = OrigSize; 185 m_Pos += OrigSize; 186 } else { 187 uint8_t* pIgnore = nullptr; 188 uint32_t dwDestSize = OrigSize; 189 dwStreamSize = 190 DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height, 191 Decoder, pParam, &pIgnore, &dwDestSize); 192 FX_Free(pIgnore); 193 if (static_cast<int>(dwStreamSize) < 0) 194 return nullptr; 195 196 uint32_t dwSavePos = m_Pos; 197 m_Pos += dwStreamSize; 198 while (1) { 199 uint32_t dwPrevPos = m_Pos; 200 CPDF_StreamParser::SyntaxType type = ParseNextElement(); 201 if (type == CPDF_StreamParser::EndOfData) 202 break; 203 204 if (type != CPDF_StreamParser::Keyword) { 205 dwStreamSize += m_Pos - dwPrevPos; 206 continue; 207 } 208 if (GetWord() == "EI") { 209 m_Pos = dwPrevPos; 210 break; 211 } 212 dwStreamSize += m_Pos - dwPrevPos; 213 } 214 m_Pos = dwSavePos; 215 pData.reset(FX_Alloc(uint8_t, dwStreamSize)); 216 memcpy(pData.get(), m_pBuf + m_Pos, dwStreamSize); 217 m_Pos += dwStreamSize; 218 } 219 pDict->SetNewFor<CPDF_Number>("Length", static_cast<int>(dwStreamSize)); 220 return pdfium::MakeUnique<CPDF_Stream>(std::move(pData), dwStreamSize, 221 std::move(pDict)); 222 } 223 224 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() { 225 m_pLastObj.reset(); 226 m_WordSize = 0; 227 if (!PositionIsInBounds()) 228 return EndOfData; 229 230 int ch = m_pBuf[m_Pos++]; 231 while (1) { 232 while (PDFCharIsWhitespace(ch)) { 233 if (!PositionIsInBounds()) 234 return EndOfData; 235 236 ch = m_pBuf[m_Pos++]; 237 } 238 239 if (ch != '%') 240 break; 241 242 while (1) { 243 if (!PositionIsInBounds()) 244 return EndOfData; 245 246 ch = m_pBuf[m_Pos++]; 247 if (PDFCharIsLineEnding(ch)) 248 break; 249 } 250 } 251 252 if (PDFCharIsDelimiter(ch) && ch != '/') { 253 m_Pos--; 254 m_pLastObj = ReadNextObject(false, false, 0); 255 return Others; 256 } 257 258 bool bIsNumber = true; 259 while (1) { 260 if (m_WordSize < kMaxWordBuffer) 261 m_WordBuffer[m_WordSize++] = ch; 262 263 if (!PDFCharIsNumeric(ch)) 264 bIsNumber = false; 265 266 if (!PositionIsInBounds()) 267 break; 268 269 ch = m_pBuf[m_Pos++]; 270 271 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { 272 m_Pos--; 273 break; 274 } 275 } 276 277 m_WordBuffer[m_WordSize] = 0; 278 if (bIsNumber) 279 return Number; 280 281 if (m_WordBuffer[0] == '/') 282 return Name; 283 284 if (m_WordSize == 4) { 285 if (memcmp(m_WordBuffer, "true", 4) == 0) { 286 m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(true); 287 return Others; 288 } 289 if (memcmp(m_WordBuffer, "null", 4) == 0) { 290 m_pLastObj = pdfium::MakeUnique<CPDF_Null>(); 291 return Others; 292 } 293 } else if (m_WordSize == 5) { 294 if (memcmp(m_WordBuffer, "false", 5) == 0) { 295 m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(false); 296 return Others; 297 } 298 } 299 return Keyword; 300 } 301 302 std::unique_ptr<CPDF_Object> CPDF_StreamParser::ReadNextObject( 303 bool bAllowNestedArray, 304 bool bInArray, 305 uint32_t dwRecursionLevel) { 306 bool bIsNumber; 307 // Must get the next word before returning to avoid infinite loops. 308 GetNextWord(bIsNumber); 309 if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel) 310 return nullptr; 311 312 if (bIsNumber) { 313 m_WordBuffer[m_WordSize] = 0; 314 return pdfium::MakeUnique<CPDF_Number>( 315 ByteStringView(m_WordBuffer, m_WordSize)); 316 } 317 318 int first_char = m_WordBuffer[0]; 319 if (first_char == '/') { 320 ByteString name = 321 PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1)); 322 return pdfium::MakeUnique<CPDF_Name>(m_pPool, name); 323 } 324 325 if (first_char == '(') { 326 ByteString str = ReadString(); 327 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false); 328 } 329 330 if (first_char == '<') { 331 if (m_WordSize == 1) 332 return pdfium::MakeUnique<CPDF_String>(m_pPool, ReadHexString(), true); 333 334 auto pDict = pdfium::MakeUnique<CPDF_Dictionary>(m_pPool); 335 while (1) { 336 GetNextWord(bIsNumber); 337 if (m_WordSize == 2 && m_WordBuffer[0] == '>') 338 break; 339 340 if (!m_WordSize || m_WordBuffer[0] != '/') 341 return nullptr; 342 343 ByteString key = 344 PDF_NameDecode(ByteStringView(m_WordBuffer + 1, m_WordSize - 1)); 345 std::unique_ptr<CPDF_Object> pObj = 346 ReadNextObject(true, bInArray, dwRecursionLevel + 1); 347 if (!pObj) 348 return nullptr; 349 350 if (!key.IsEmpty()) 351 pDict->SetFor(key, std::move(pObj)); 352 } 353 return std::move(pDict); 354 } 355 356 if (first_char == '[') { 357 if ((!bAllowNestedArray && bInArray)) 358 return nullptr; 359 360 auto pArray = pdfium::MakeUnique<CPDF_Array>(); 361 while (1) { 362 std::unique_ptr<CPDF_Object> pObj = 363 ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1); 364 if (pObj) { 365 pArray->Add(std::move(pObj)); 366 continue; 367 } 368 if (!m_WordSize || m_WordBuffer[0] == ']') 369 break; 370 } 371 return std::move(pArray); 372 } 373 374 if (m_WordSize == 5 && !memcmp(m_WordBuffer, "false", 5)) 375 return pdfium::MakeUnique<CPDF_Boolean>(false); 376 377 if (m_WordSize == 4) { 378 if (memcmp(m_WordBuffer, "true", 4) == 0) 379 return pdfium::MakeUnique<CPDF_Boolean>(true); 380 if (memcmp(m_WordBuffer, "null", 4) == 0) 381 return pdfium::MakeUnique<CPDF_Null>(); 382 } 383 384 return nullptr; 385 } 386 387 // TODO(npm): the following methods are almost identical in cpdf_syntaxparser 388 void CPDF_StreamParser::GetNextWord(bool& bIsNumber) { 389 m_WordSize = 0; 390 bIsNumber = true; 391 if (!PositionIsInBounds()) 392 return; 393 394 int ch = m_pBuf[m_Pos++]; 395 while (1) { 396 while (PDFCharIsWhitespace(ch)) { 397 if (!PositionIsInBounds()) { 398 return; 399 } 400 ch = m_pBuf[m_Pos++]; 401 } 402 403 if (ch != '%') 404 break; 405 406 while (1) { 407 if (!PositionIsInBounds()) 408 return; 409 ch = m_pBuf[m_Pos++]; 410 if (PDFCharIsLineEnding(ch)) 411 break; 412 } 413 } 414 415 if (PDFCharIsDelimiter(ch)) { 416 bIsNumber = false; 417 m_WordBuffer[m_WordSize++] = ch; 418 if (ch == '/') { 419 while (1) { 420 if (!PositionIsInBounds()) 421 return; 422 ch = m_pBuf[m_Pos++]; 423 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { 424 m_Pos--; 425 return; 426 } 427 428 if (m_WordSize < kMaxWordBuffer) 429 m_WordBuffer[m_WordSize++] = ch; 430 } 431 } else if (ch == '<') { 432 if (!PositionIsInBounds()) 433 return; 434 ch = m_pBuf[m_Pos++]; 435 if (ch == '<') 436 m_WordBuffer[m_WordSize++] = ch; 437 else 438 m_Pos--; 439 } else if (ch == '>') { 440 if (!PositionIsInBounds()) 441 return; 442 ch = m_pBuf[m_Pos++]; 443 if (ch == '>') 444 m_WordBuffer[m_WordSize++] = ch; 445 else 446 m_Pos--; 447 } 448 return; 449 } 450 451 while (1) { 452 if (m_WordSize < kMaxWordBuffer) 453 m_WordBuffer[m_WordSize++] = ch; 454 if (!PDFCharIsNumeric(ch)) 455 bIsNumber = false; 456 457 if (!PositionIsInBounds()) 458 return; 459 ch = m_pBuf[m_Pos++]; 460 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { 461 m_Pos--; 462 break; 463 } 464 } 465 } 466 467 ByteString CPDF_StreamParser::ReadString() { 468 if (!PositionIsInBounds()) 469 return ByteString(); 470 471 uint8_t ch = m_pBuf[m_Pos++]; 472 std::ostringstream buf; 473 int parlevel = 0; 474 int status = 0; 475 int iEscCode = 0; 476 while (1) { 477 switch (status) { 478 case 0: 479 if (ch == ')') { 480 if (parlevel == 0) { 481 return ByteString( 482 buf.str().c_str(), 483 std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength)); 484 } 485 parlevel--; 486 buf << ')'; 487 } else if (ch == '(') { 488 parlevel++; 489 buf << '('; 490 } else if (ch == '\\') { 491 status = 1; 492 } else { 493 buf << static_cast<char>(ch); 494 } 495 break; 496 case 1: 497 if (ch >= '0' && ch <= '7') { 498 iEscCode = FXSYS_DecimalCharToInt(static_cast<char>(ch)); 499 status = 2; 500 break; 501 } 502 if (ch == '\r') { 503 status = 4; 504 break; 505 } 506 if (ch == '\n') { 507 // Do nothing. 508 } else if (ch == 'n') { 509 buf << '\n'; 510 } else if (ch == 'r') { 511 buf << '\r'; 512 } else if (ch == 't') { 513 buf << '\t'; 514 } else if (ch == 'b') { 515 buf << '\b'; 516 } else if (ch == 'f') { 517 buf << '\f'; 518 } else { 519 buf << static_cast<char>(ch); 520 } 521 status = 0; 522 break; 523 case 2: 524 if (ch >= '0' && ch <= '7') { 525 iEscCode = 526 iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch)); 527 status = 3; 528 } else { 529 buf << static_cast<char>(iEscCode); 530 status = 0; 531 continue; 532 } 533 break; 534 case 3: 535 if (ch >= '0' && ch <= '7') { 536 iEscCode = 537 iEscCode * 8 + FXSYS_DecimalCharToInt(static_cast<char>(ch)); 538 buf << static_cast<char>(iEscCode); 539 status = 0; 540 } else { 541 buf << static_cast<char>(iEscCode); 542 status = 0; 543 continue; 544 } 545 break; 546 case 4: 547 status = 0; 548 if (ch != '\n') 549 continue; 550 break; 551 } 552 if (!PositionIsInBounds()) 553 break; 554 555 ch = m_pBuf[m_Pos++]; 556 } 557 if (PositionIsInBounds()) 558 ++m_Pos; 559 560 return ByteString( 561 buf.str().c_str(), 562 std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength)); 563 } 564 565 ByteString CPDF_StreamParser::ReadHexString() { 566 if (!PositionIsInBounds()) 567 return ByteString(); 568 569 std::ostringstream buf; 570 bool bFirst = true; 571 int code = 0; 572 while (PositionIsInBounds()) { 573 int ch = m_pBuf[m_Pos++]; 574 575 if (ch == '>') 576 break; 577 578 if (!std::isxdigit(ch)) 579 continue; 580 581 int val = FXSYS_HexCharToInt(ch); 582 if (bFirst) { 583 code = val * 16; 584 } else { 585 code += val; 586 buf << static_cast<uint8_t>(code); 587 } 588 bFirst = !bFirst; 589 } 590 if (!bFirst) 591 buf << static_cast<char>(code); 592 593 return ByteString( 594 buf.str().c_str(), 595 std::min(static_cast<size_t>(buf.tellp()), kMaxStringLength)); 596 } 597 598 bool CPDF_StreamParser::PositionIsInBounds() const { 599 return m_Pos < m_Size; 600 } 601