1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfapi/page/cpdf_streamparser.h" 8 9 #include <limits.h> 10 11 #include <memory> 12 #include <utility> 13 14 #include "core/fpdfapi/cpdf_modulemgr.h" 15 #include "core/fpdfapi/page/cpdf_docpagedata.h" 16 #include "core/fpdfapi/parser/cpdf_array.h" 17 #include "core/fpdfapi/parser/cpdf_boolean.h" 18 #include "core/fpdfapi/parser/cpdf_dictionary.h" 19 #include "core/fpdfapi/parser/cpdf_document.h" 20 #include "core/fpdfapi/parser/cpdf_name.h" 21 #include "core/fpdfapi/parser/cpdf_null.h" 22 #include "core/fpdfapi/parser/cpdf_number.h" 23 #include "core/fpdfapi/parser/cpdf_stream.h" 24 #include "core/fpdfapi/parser/cpdf_string.h" 25 #include "core/fpdfapi/parser/fpdf_parser_decode.h" 26 #include "core/fpdfapi/parser/fpdf_parser_utility.h" 27 #include "core/fxcodec/fx_codec.h" 28 #include "core/fxcrt/fx_ext.h" 29 30 namespace { 31 32 const uint32_t kMaxNestedParsingLevel = 512; 33 const uint32_t kMaxWordBuffer = 256; 34 const FX_STRSIZE kMaxStringLength = 32767; 35 36 uint32_t DecodeAllScanlines(std::unique_ptr<CCodec_ScanlineDecoder> pDecoder, 37 uint8_t*& dest_buf, 38 uint32_t& dest_size) { 39 if (!pDecoder) 40 return FX_INVALID_OFFSET; 41 int ncomps = pDecoder->CountComps(); 42 int bpc = pDecoder->GetBPC(); 43 int width = pDecoder->GetWidth(); 44 int height = pDecoder->GetHeight(); 45 int pitch = (width * ncomps * bpc + 7) / 8; 46 if (height == 0 || pitch > (1 << 30) / height) 47 return FX_INVALID_OFFSET; 48 49 dest_buf = FX_Alloc2D(uint8_t, pitch, height); 50 dest_size = pitch * height; // Safe since checked alloc returned. 51 for (int row = 0; row < height; row++) { 52 const uint8_t* pLine = pDecoder->GetScanline(row); 53 if (!pLine) 54 break; 55 56 FXSYS_memcpy(dest_buf + row * pitch, pLine, pitch); 57 } 58 return pDecoder->GetSrcOffset(); 59 } 60 61 uint32_t PDF_DecodeInlineStream(const uint8_t* src_buf, 62 uint32_t limit, 63 int width, 64 int height, 65 CFX_ByteString& decoder, 66 CPDF_Dictionary* pParam, 67 uint8_t*& dest_buf, 68 uint32_t& dest_size) { 69 if (decoder == "CCITTFaxDecode" || decoder == "CCF") { 70 std::unique_ptr<CCodec_ScanlineDecoder> pDecoder = 71 FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam); 72 return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size); 73 } 74 if (decoder == "ASCII85Decode" || decoder == "A85") 75 return A85Decode(src_buf, limit, dest_buf, dest_size); 76 if (decoder == "ASCIIHexDecode" || decoder == "AHx") 77 return HexDecode(src_buf, limit, dest_buf, dest_size); 78 if (decoder == "FlateDecode" || decoder == "Fl") { 79 return FPDFAPI_FlateOrLZWDecode(false, src_buf, limit, pParam, dest_size, 80 dest_buf, dest_size); 81 } 82 if (decoder == "LZWDecode" || decoder == "LZW") { 83 return FPDFAPI_FlateOrLZWDecode(true, src_buf, limit, pParam, 0, dest_buf, 84 dest_size); 85 } 86 if (decoder == "DCTDecode" || decoder == "DCT") { 87 std::unique_ptr<CCodec_ScanlineDecoder> pDecoder = 88 CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder( 89 src_buf, limit, width, height, 0, 90 !pParam || pParam->GetIntegerFor("ColorTransform", 1)); 91 return DecodeAllScanlines(std::move(pDecoder), dest_buf, dest_size); 92 } 93 if (decoder == "RunLengthDecode" || decoder == "RL") 94 return RunLengthDecode(src_buf, limit, dest_buf, dest_size); 95 dest_size = 0; 96 dest_buf = 0; 97 return (uint32_t)-1; 98 } 99 100 } // namespace 101 102 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, uint32_t dwSize) 103 : m_pBuf(pData), 104 m_Size(dwSize), 105 m_Pos(0), 106 m_pPool(nullptr) {} 107 108 CPDF_StreamParser::CPDF_StreamParser( 109 const uint8_t* pData, 110 uint32_t dwSize, 111 const CFX_WeakPtr<CFX_ByteStringPool>& pPool) 112 : m_pBuf(pData), 113 m_Size(dwSize), 114 m_Pos(0), 115 m_pPool(pPool) {} 116 117 CPDF_StreamParser::~CPDF_StreamParser() {} 118 119 std::unique_ptr<CPDF_Stream> CPDF_StreamParser::ReadInlineStream( 120 CPDF_Document* pDoc, 121 std::unique_ptr<CPDF_Dictionary> pDict, 122 CPDF_Object* pCSObj) { 123 if (m_Pos == m_Size) 124 return nullptr; 125 126 if (PDFCharIsWhitespace(m_pBuf[m_Pos])) 127 m_Pos++; 128 129 CFX_ByteString Decoder; 130 CPDF_Dictionary* pParam = nullptr; 131 CPDF_Object* pFilter = pDict->GetDirectObjectFor("Filter"); 132 if (pFilter) { 133 if (CPDF_Array* pArray = pFilter->AsArray()) { 134 Decoder = pArray->GetStringAt(0); 135 CPDF_Array* pParams = pDict->GetArrayFor("DecodeParms"); 136 if (pParams) 137 pParam = pParams->GetDictAt(0); 138 } else { 139 Decoder = pFilter->GetString(); 140 pParam = pDict->GetDictFor("DecodeParms"); 141 } 142 } 143 uint32_t width = pDict->GetIntegerFor("Width"); 144 uint32_t height = pDict->GetIntegerFor("Height"); 145 uint32_t OrigSize = 0; 146 if (pCSObj) { 147 uint32_t bpc = pDict->GetIntegerFor("BitsPerComponent"); 148 uint32_t nComponents = 1; 149 CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj); 150 if (pCS) { 151 nComponents = pCS->CountComponents(); 152 pDoc->GetPageData()->ReleaseColorSpace(pCSObj); 153 } else { 154 nComponents = 3; 155 } 156 uint32_t pitch = width; 157 if (bpc && pitch > INT_MAX / bpc) 158 return nullptr; 159 160 pitch *= bpc; 161 if (nComponents && pitch > INT_MAX / nComponents) 162 return nullptr; 163 164 pitch *= nComponents; 165 if (pitch > INT_MAX - 7) 166 return nullptr; 167 168 pitch += 7; 169 pitch /= 8; 170 OrigSize = pitch; 171 } else { 172 if (width > INT_MAX - 7) 173 return nullptr; 174 175 OrigSize = ((width + 7) / 8); 176 } 177 if (height && OrigSize > INT_MAX / height) 178 return nullptr; 179 180 OrigSize *= height; 181 std::unique_ptr<uint8_t, FxFreeDeleter> pData; 182 uint32_t dwStreamSize; 183 if (Decoder.IsEmpty()) { 184 if (OrigSize > m_Size - m_Pos) 185 OrigSize = m_Size - m_Pos; 186 pData.reset(FX_Alloc(uint8_t, OrigSize)); 187 FXSYS_memcpy(pData.get(), m_pBuf + m_Pos, OrigSize); 188 dwStreamSize = OrigSize; 189 m_Pos += OrigSize; 190 } else { 191 uint8_t* pIgnore = nullptr; 192 uint32_t dwDestSize = OrigSize; 193 dwStreamSize = 194 PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height, 195 Decoder, pParam, pIgnore, dwDestSize); 196 FX_Free(pIgnore); 197 if (static_cast<int>(dwStreamSize) < 0) 198 return nullptr; 199 200 uint32_t dwSavePos = m_Pos; 201 m_Pos += dwStreamSize; 202 while (1) { 203 uint32_t dwPrevPos = m_Pos; 204 CPDF_StreamParser::SyntaxType type = ParseNextElement(); 205 if (type == CPDF_StreamParser::EndOfData) 206 break; 207 208 if (type != CPDF_StreamParser::Keyword) { 209 dwStreamSize += m_Pos - dwPrevPos; 210 continue; 211 } 212 if (GetWord() == "EI") { 213 m_Pos = dwPrevPos; 214 break; 215 } 216 dwStreamSize += m_Pos - dwPrevPos; 217 } 218 m_Pos = dwSavePos; 219 pData.reset(FX_Alloc(uint8_t, dwStreamSize)); 220 FXSYS_memcpy(pData.get(), m_pBuf + m_Pos, dwStreamSize); 221 m_Pos += dwStreamSize; 222 } 223 pDict->SetNewFor<CPDF_Number>("Length", (int)dwStreamSize); 224 return pdfium::MakeUnique<CPDF_Stream>(std::move(pData), dwStreamSize, 225 std::move(pDict)); 226 } 227 228 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() { 229 m_pLastObj.reset(); 230 m_WordSize = 0; 231 if (!PositionIsInBounds()) 232 return EndOfData; 233 234 int ch = m_pBuf[m_Pos++]; 235 while (1) { 236 while (PDFCharIsWhitespace(ch)) { 237 if (!PositionIsInBounds()) 238 return EndOfData; 239 240 ch = m_pBuf[m_Pos++]; 241 } 242 243 if (ch != '%') 244 break; 245 246 while (1) { 247 if (!PositionIsInBounds()) 248 return EndOfData; 249 250 ch = m_pBuf[m_Pos++]; 251 if (PDFCharIsLineEnding(ch)) 252 break; 253 } 254 } 255 256 if (PDFCharIsDelimiter(ch) && ch != '/') { 257 m_Pos--; 258 m_pLastObj = ReadNextObject(false, false, 0); 259 return Others; 260 } 261 262 bool bIsNumber = true; 263 while (1) { 264 if (m_WordSize < kMaxWordBuffer) 265 m_WordBuffer[m_WordSize++] = ch; 266 267 if (!PDFCharIsNumeric(ch)) 268 bIsNumber = false; 269 270 if (!PositionIsInBounds()) 271 break; 272 273 ch = m_pBuf[m_Pos++]; 274 275 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { 276 m_Pos--; 277 break; 278 } 279 } 280 281 m_WordBuffer[m_WordSize] = 0; 282 if (bIsNumber) 283 return Number; 284 285 if (m_WordBuffer[0] == '/') 286 return Name; 287 288 if (m_WordSize == 4) { 289 if (memcmp(m_WordBuffer, "true", 4) == 0) { 290 m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(true); 291 return Others; 292 } 293 if (memcmp(m_WordBuffer, "null", 4) == 0) { 294 m_pLastObj = pdfium::MakeUnique<CPDF_Null>(); 295 return Others; 296 } 297 } else if (m_WordSize == 5) { 298 if (memcmp(m_WordBuffer, "false", 5) == 0) { 299 m_pLastObj = pdfium::MakeUnique<CPDF_Boolean>(false); 300 return Others; 301 } 302 } 303 return Keyword; 304 } 305 306 std::unique_ptr<CPDF_Object> CPDF_StreamParser::ReadNextObject( 307 bool bAllowNestedArray, 308 bool bInArray, 309 uint32_t dwRecursionLevel) { 310 bool bIsNumber; 311 // Must get the next word before returning to avoid infinite loops. 312 GetNextWord(bIsNumber); 313 if (!m_WordSize || dwRecursionLevel > kMaxNestedParsingLevel) 314 return nullptr; 315 316 if (bIsNumber) { 317 m_WordBuffer[m_WordSize] = 0; 318 return pdfium::MakeUnique<CPDF_Number>( 319 CFX_ByteStringC(m_WordBuffer, m_WordSize)); 320 } 321 322 int first_char = m_WordBuffer[0]; 323 if (first_char == '/') { 324 CFX_ByteString name = 325 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)); 326 return pdfium::MakeUnique<CPDF_Name>(m_pPool, name); 327 } 328 329 if (first_char == '(') { 330 CFX_ByteString str = ReadString(); 331 return pdfium::MakeUnique<CPDF_String>(m_pPool, str, false); 332 } 333 334 if (first_char == '<') { 335 if (m_WordSize == 1) 336 return pdfium::MakeUnique<CPDF_String>(m_pPool, ReadHexString(), true); 337 338 auto pDict = pdfium::MakeUnique<CPDF_Dictionary>(m_pPool); 339 while (1) { 340 GetNextWord(bIsNumber); 341 if (m_WordSize == 2 && m_WordBuffer[0] == '>') 342 break; 343 344 if (!m_WordSize || m_WordBuffer[0] != '/') 345 return nullptr; 346 347 CFX_ByteString key = 348 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)); 349 std::unique_ptr<CPDF_Object> pObj = 350 ReadNextObject(true, bInArray, dwRecursionLevel + 1); 351 if (!pObj) 352 return nullptr; 353 354 if (!key.IsEmpty()) 355 pDict->SetFor(key, std::move(pObj)); 356 } 357 return std::move(pDict); 358 } 359 360 if (first_char == '[') { 361 if ((!bAllowNestedArray && bInArray)) 362 return nullptr; 363 364 auto pArray = pdfium::MakeUnique<CPDF_Array>(); 365 while (1) { 366 std::unique_ptr<CPDF_Object> pObj = 367 ReadNextObject(bAllowNestedArray, true, dwRecursionLevel + 1); 368 if (pObj) { 369 pArray->Add(std::move(pObj)); 370 continue; 371 } 372 if (!m_WordSize || m_WordBuffer[0] == ']') 373 break; 374 } 375 return std::move(pArray); 376 } 377 378 if (m_WordSize == 5 && !memcmp(m_WordBuffer, "false", 5)) 379 return pdfium::MakeUnique<CPDF_Boolean>(false); 380 381 if (m_WordSize == 4) { 382 if (memcmp(m_WordBuffer, "true", 4) == 0) 383 return pdfium::MakeUnique<CPDF_Boolean>(true); 384 if (memcmp(m_WordBuffer, "null", 4) == 0) 385 return pdfium::MakeUnique<CPDF_Null>(); 386 } 387 388 return nullptr; 389 } 390 391 // TODO(npm): the following methods are almost identical in cpdf_syntaxparser 392 void CPDF_StreamParser::GetNextWord(bool& bIsNumber) { 393 m_WordSize = 0; 394 bIsNumber = true; 395 if (!PositionIsInBounds()) 396 return; 397 398 int ch = m_pBuf[m_Pos++]; 399 while (1) { 400 while (PDFCharIsWhitespace(ch)) { 401 if (!PositionIsInBounds()) { 402 return; 403 } 404 ch = m_pBuf[m_Pos++]; 405 } 406 407 if (ch != '%') 408 break; 409 410 while (1) { 411 if (!PositionIsInBounds()) 412 return; 413 ch = m_pBuf[m_Pos++]; 414 if (PDFCharIsLineEnding(ch)) 415 break; 416 } 417 } 418 419 if (PDFCharIsDelimiter(ch)) { 420 bIsNumber = false; 421 m_WordBuffer[m_WordSize++] = ch; 422 if (ch == '/') { 423 while (1) { 424 if (!PositionIsInBounds()) 425 return; 426 ch = m_pBuf[m_Pos++]; 427 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { 428 m_Pos--; 429 return; 430 } 431 432 if (m_WordSize < kMaxWordBuffer) 433 m_WordBuffer[m_WordSize++] = ch; 434 } 435 } else if (ch == '<') { 436 if (!PositionIsInBounds()) 437 return; 438 ch = m_pBuf[m_Pos++]; 439 if (ch == '<') 440 m_WordBuffer[m_WordSize++] = ch; 441 else 442 m_Pos--; 443 } else if (ch == '>') { 444 if (!PositionIsInBounds()) 445 return; 446 ch = m_pBuf[m_Pos++]; 447 if (ch == '>') 448 m_WordBuffer[m_WordSize++] = ch; 449 else 450 m_Pos--; 451 } 452 return; 453 } 454 455 while (1) { 456 if (m_WordSize < kMaxWordBuffer) 457 m_WordBuffer[m_WordSize++] = ch; 458 if (!PDFCharIsNumeric(ch)) 459 bIsNumber = false; 460 461 if (!PositionIsInBounds()) 462 return; 463 ch = m_pBuf[m_Pos++]; 464 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { 465 m_Pos--; 466 break; 467 } 468 } 469 } 470 471 CFX_ByteString CPDF_StreamParser::ReadString() { 472 if (!PositionIsInBounds()) 473 return CFX_ByteString(); 474 475 uint8_t ch = m_pBuf[m_Pos++]; 476 CFX_ByteTextBuf buf; 477 int parlevel = 0; 478 int status = 0; 479 int iEscCode = 0; 480 while (1) { 481 switch (status) { 482 case 0: 483 if (ch == ')') { 484 if (parlevel == 0) { 485 if (buf.GetLength() > kMaxStringLength) { 486 return CFX_ByteString(buf.GetBuffer(), kMaxStringLength); 487 } 488 return buf.MakeString(); 489 } 490 parlevel--; 491 buf.AppendChar(')'); 492 } else if (ch == '(') { 493 parlevel++; 494 buf.AppendChar('('); 495 } else if (ch == '\\') { 496 status = 1; 497 } else { 498 buf.AppendChar((char)ch); 499 } 500 break; 501 case 1: 502 if (ch >= '0' && ch <= '7') { 503 iEscCode = FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); 504 status = 2; 505 break; 506 } 507 if (ch == 'n') { 508 buf.AppendChar('\n'); 509 } else if (ch == 'r') { 510 buf.AppendChar('\r'); 511 } else if (ch == 't') { 512 buf.AppendChar('\t'); 513 } else if (ch == 'b') { 514 buf.AppendChar('\b'); 515 } else if (ch == 'f') { 516 buf.AppendChar('\f'); 517 } else if (ch == '\r') { 518 status = 4; 519 break; 520 } else if (ch == '\n') { 521 } else { 522 buf.AppendChar(ch); 523 } 524 status = 0; 525 break; 526 case 2: 527 if (ch >= '0' && ch <= '7') { 528 iEscCode = 529 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); 530 status = 3; 531 } else { 532 buf.AppendChar(iEscCode); 533 status = 0; 534 continue; 535 } 536 break; 537 case 3: 538 if (ch >= '0' && ch <= '7') { 539 iEscCode = 540 iEscCode * 8 + FXSYS_toDecimalDigit(static_cast<FX_WCHAR>(ch)); 541 buf.AppendChar(iEscCode); 542 status = 0; 543 } else { 544 buf.AppendChar(iEscCode); 545 status = 0; 546 continue; 547 } 548 break; 549 case 4: 550 status = 0; 551 if (ch != '\n') { 552 continue; 553 } 554 break; 555 } 556 if (!PositionIsInBounds()) 557 break; 558 559 ch = m_pBuf[m_Pos++]; 560 } 561 if (PositionIsInBounds()) 562 ++m_Pos; 563 564 if (buf.GetLength() > kMaxStringLength) { 565 return CFX_ByteString(buf.GetBuffer(), kMaxStringLength); 566 } 567 return buf.MakeString(); 568 } 569 570 CFX_ByteString CPDF_StreamParser::ReadHexString() { 571 if (!PositionIsInBounds()) 572 return CFX_ByteString(); 573 574 CFX_ByteTextBuf buf; 575 bool bFirst = true; 576 int code = 0; 577 while (PositionIsInBounds()) { 578 int ch = m_pBuf[m_Pos++]; 579 580 if (ch == '>') 581 break; 582 583 if (!std::isxdigit(ch)) 584 continue; 585 586 int val = FXSYS_toHexDigit(ch); 587 if (bFirst) { 588 code = val * 16; 589 } else { 590 code += val; 591 buf.AppendByte((uint8_t)code); 592 } 593 bFirst = !bFirst; 594 } 595 if (!bFirst) 596 buf.AppendChar((char)code); 597 598 if (buf.GetLength() > kMaxStringLength) 599 return CFX_ByteString(buf.GetBuffer(), kMaxStringLength); 600 601 return buf.MakeString(); 602 } 603 604 bool CPDF_StreamParser::PositionIsInBounds() const { 605 return m_Pos < m_Size; 606 } 607