1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/src/fpdfapi/fpdf_page/pageint.h" 8 9 #include <limits.h> 10 11 #include "core/include/fpdfapi/fpdf_module.h" 12 #include "core/include/fpdfapi/fpdf_page.h" 13 #include "core/include/fxcodec/fx_codec.h" 14 #include "core/include/fxcrt/fx_ext.h" 15 #include "core/include/fxcrt/fx_safe_types.h" 16 17 namespace { 18 19 const char kPathOperatorSubpath = 'm'; 20 const char kPathOperatorLine = 'l'; 21 const char kPathOperatorCubicBezier1 = 'c'; 22 const char kPathOperatorCubicBezier2 = 'v'; 23 const char kPathOperatorCubicBezier3 = 'y'; 24 const char kPathOperatorClosePath = 'h'; 25 const char kPathOperatorRectangle[] = "re"; 26 27 } // namespace 28 29 class CPDF_StreamParserAutoClearer { 30 public: 31 CPDF_StreamParserAutoClearer(CPDF_StreamParser** scoped_variable, 32 CPDF_StreamParser* new_parser) 33 : scoped_variable_(scoped_variable) { 34 *scoped_variable_ = new_parser; 35 } 36 ~CPDF_StreamParserAutoClearer() { *scoped_variable_ = NULL; } 37 38 private: 39 CPDF_StreamParser** scoped_variable_; 40 }; 41 FX_DWORD CPDF_StreamContentParser::Parse(const uint8_t* pData, 42 FX_DWORD dwSize, 43 FX_DWORD max_cost) { 44 if (m_Level > _FPDF_MAX_FORM_LEVEL_) { 45 return dwSize; 46 } 47 FX_DWORD InitObjCount = m_pObjectList->CountObjects(); 48 CPDF_StreamParser syntax(pData, dwSize); 49 CPDF_StreamParserAutoClearer auto_clearer(&m_pSyntax, &syntax); 50 m_CompatCount = 0; 51 while (1) { 52 FX_DWORD cost = m_pObjectList->CountObjects() - InitObjCount; 53 if (max_cost && cost >= max_cost) { 54 break; 55 } 56 switch (syntax.ParseNextElement()) { 57 case CPDF_StreamParser::EndOfData: 58 return m_pSyntax->GetPos(); 59 case CPDF_StreamParser::Keyword: 60 OnOperator((char*)syntax.GetWordBuf()); 61 ClearAllParams(); 62 break; 63 case CPDF_StreamParser::Number: 64 AddNumberParam((char*)syntax.GetWordBuf(), syntax.GetWordSize()); 65 break; 66 case CPDF_StreamParser::Name: 67 AddNameParam((const FX_CHAR*)syntax.GetWordBuf() + 1, 68 syntax.GetWordSize() - 1); 69 break; 70 default: 71 AddObjectParam(syntax.GetObject()); 72 } 73 } 74 return m_pSyntax->GetPos(); 75 } 76 77 void CPDF_StreamContentParser::Handle_BeginImage() { 78 FX_FILESIZE savePos = m_pSyntax->GetPos(); 79 CPDF_Dictionary* pDict = new CPDF_Dictionary; 80 while (1) { 81 CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement(); 82 if (type == CPDF_StreamParser::Keyword) { 83 CFX_ByteString bsKeyword(m_pSyntax->GetWordBuf(), 84 m_pSyntax->GetWordSize()); 85 if (bsKeyword != "ID") { 86 m_pSyntax->SetPos(savePos); 87 pDict->Release(); 88 return; 89 } 90 } 91 if (type != CPDF_StreamParser::Name) { 92 break; 93 } 94 CFX_ByteString key((const FX_CHAR*)m_pSyntax->GetWordBuf() + 1, 95 m_pSyntax->GetWordSize() - 1); 96 std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj( 97 m_pSyntax->ReadNextObject()); 98 if (!key.IsEmpty()) { 99 FX_DWORD dwObjNum = pObj ? pObj->GetObjNum() : 0; 100 if (dwObjNum) 101 pDict->SetAtReference(key, m_pDocument, dwObjNum); 102 else 103 pDict->SetAt(key, pObj.release()); 104 } 105 } 106 PDF_ReplaceAbbr(pDict); 107 CPDF_Object* pCSObj = NULL; 108 if (pDict->KeyExist("ColorSpace")) { 109 pCSObj = pDict->GetElementValue("ColorSpace"); 110 if (pCSObj->IsName()) { 111 CFX_ByteString name = pCSObj->GetString(); 112 if (name != "DeviceRGB" && name != "DeviceGray" && name != "DeviceCMYK") { 113 pCSObj = FindResourceObj("ColorSpace", name); 114 if (pCSObj && !pCSObj->GetObjNum()) { 115 pCSObj = pCSObj->Clone(); 116 pDict->SetAt("ColorSpace", pCSObj); 117 } 118 } 119 } 120 } 121 CPDF_Stream* pStream = m_pSyntax->ReadInlineStream( 122 m_pDocument, pDict, pCSObj, m_Options.m_bDecodeInlineImage); 123 while (1) { 124 CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement(); 125 if (type == CPDF_StreamParser::EndOfData) { 126 break; 127 } 128 if (type != CPDF_StreamParser::Keyword) { 129 continue; 130 } 131 if (m_pSyntax->GetWordSize() == 2 && m_pSyntax->GetWordBuf()[0] == 'E' && 132 m_pSyntax->GetWordBuf()[1] == 'I') { 133 break; 134 } 135 } 136 if (m_Options.m_bTextOnly) { 137 if (pStream) { 138 pStream->Release(); 139 } else { 140 pDict->Release(); 141 } 142 return; 143 } 144 pDict->SetAtName("Subtype", "Image"); 145 CPDF_ImageObject* pImgObj = AddImage(pStream, NULL, TRUE); 146 if (!pImgObj) { 147 if (pStream) { 148 pStream->Release(); 149 } else { 150 pDict->Release(); 151 } 152 } 153 } 154 void CPDF_StreamContentParser::ParsePathObject() { 155 FX_FLOAT params[6] = {}; 156 int nParams = 0; 157 int last_pos = m_pSyntax->GetPos(); 158 while (1) { 159 CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement(); 160 FX_BOOL bProcessed = TRUE; 161 switch (type) { 162 case CPDF_StreamParser::EndOfData: 163 return; 164 case CPDF_StreamParser::Keyword: { 165 int len = m_pSyntax->GetWordSize(); 166 if (len == 1) { 167 switch (m_pSyntax->GetWordBuf()[0]) { 168 case kPathOperatorSubpath: 169 AddPathPoint(params[0], params[1], FXPT_MOVETO); 170 nParams = 0; 171 break; 172 case kPathOperatorLine: 173 AddPathPoint(params[0], params[1], FXPT_LINETO); 174 nParams = 0; 175 break; 176 case kPathOperatorCubicBezier1: 177 AddPathPoint(params[0], params[1], FXPT_BEZIERTO); 178 AddPathPoint(params[2], params[3], FXPT_BEZIERTO); 179 AddPathPoint(params[4], params[5], FXPT_BEZIERTO); 180 nParams = 0; 181 break; 182 case kPathOperatorCubicBezier2: 183 AddPathPoint(m_PathCurrentX, m_PathCurrentY, FXPT_BEZIERTO); 184 AddPathPoint(params[0], params[1], FXPT_BEZIERTO); 185 AddPathPoint(params[2], params[3], FXPT_BEZIERTO); 186 nParams = 0; 187 break; 188 case kPathOperatorCubicBezier3: 189 AddPathPoint(params[0], params[1], FXPT_BEZIERTO); 190 AddPathPoint(params[2], params[3], FXPT_BEZIERTO); 191 AddPathPoint(params[2], params[3], FXPT_BEZIERTO); 192 nParams = 0; 193 break; 194 case kPathOperatorClosePath: 195 Handle_ClosePath(); 196 nParams = 0; 197 break; 198 default: 199 bProcessed = FALSE; 200 break; 201 } 202 } else if (len == 2) { 203 if (m_pSyntax->GetWordBuf()[0] == kPathOperatorRectangle[0] && 204 m_pSyntax->GetWordBuf()[1] == kPathOperatorRectangle[1]) { 205 AddPathRect(params[0], params[1], params[2], params[3]); 206 nParams = 0; 207 } else { 208 bProcessed = FALSE; 209 } 210 } else { 211 bProcessed = FALSE; 212 } 213 if (bProcessed) { 214 last_pos = m_pSyntax->GetPos(); 215 } 216 break; 217 } 218 case CPDF_StreamParser::Number: { 219 if (nParams == 6) { 220 break; 221 } 222 FX_BOOL bInteger; 223 int value; 224 FX_atonum( 225 CFX_ByteStringC(m_pSyntax->GetWordBuf(), m_pSyntax->GetWordSize()), 226 bInteger, &value); 227 params[nParams++] = bInteger ? (FX_FLOAT)value : *(FX_FLOAT*)&value; 228 break; 229 } 230 default: 231 bProcessed = FALSE; 232 } 233 if (!bProcessed) { 234 m_pSyntax->SetPos(last_pos); 235 return; 236 } 237 } 238 } 239 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, FX_DWORD dwSize) { 240 m_pBuf = pData; 241 m_Size = dwSize; 242 m_Pos = 0; 243 m_pLastObj = NULL; 244 } 245 CPDF_StreamParser::~CPDF_StreamParser() { 246 if (m_pLastObj) { 247 m_pLastObj->Release(); 248 } 249 } 250 FX_DWORD _DecodeAllScanlines(ICodec_ScanlineDecoder* pDecoder, 251 uint8_t*& dest_buf, 252 FX_DWORD& dest_size) { 253 if (!pDecoder) { 254 return (FX_DWORD)-1; 255 } 256 int ncomps = pDecoder->CountComps(); 257 int bpc = pDecoder->GetBPC(); 258 int width = pDecoder->GetWidth(); 259 int height = pDecoder->GetHeight(); 260 int pitch = (width * ncomps * bpc + 7) / 8; 261 if (height == 0 || pitch > (1 << 30) / height) { 262 delete pDecoder; 263 return -1; 264 } 265 dest_buf = FX_Alloc2D(uint8_t, pitch, height); 266 dest_size = pitch * height; // Safe since checked alloc returned. 267 for (int row = 0; row < height; row++) { 268 const uint8_t* pLine = pDecoder->GetScanline(row); 269 if (!pLine) 270 break; 271 272 FXSYS_memcpy(dest_buf + row * pitch, pLine, pitch); 273 } 274 FX_DWORD srcoff = pDecoder->GetSrcOffset(); 275 delete pDecoder; 276 return srcoff; 277 } 278 ICodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder( 279 const uint8_t* src_buf, 280 FX_DWORD src_size, 281 int width, 282 int height, 283 const CPDF_Dictionary* pParams); 284 285 FX_DWORD PDF_DecodeInlineStream(const uint8_t* src_buf, 286 FX_DWORD limit, 287 int width, 288 int height, 289 CFX_ByteString& decoder, 290 CPDF_Dictionary* pParam, 291 uint8_t*& dest_buf, 292 FX_DWORD& dest_size) { 293 if (decoder == "CCITTFaxDecode" || decoder == "CCF") { 294 ICodec_ScanlineDecoder* pDecoder = 295 FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam); 296 return _DecodeAllScanlines(pDecoder, dest_buf, dest_size); 297 } 298 if (decoder == "ASCII85Decode" || decoder == "A85") { 299 return A85Decode(src_buf, limit, dest_buf, dest_size); 300 } 301 if (decoder == "ASCIIHexDecode" || decoder == "AHx") { 302 return HexDecode(src_buf, limit, dest_buf, dest_size); 303 } 304 if (decoder == "FlateDecode" || decoder == "Fl") { 305 return FPDFAPI_FlateOrLZWDecode(FALSE, src_buf, limit, pParam, dest_size, 306 dest_buf, dest_size); 307 } 308 if (decoder == "LZWDecode" || decoder == "LZW") { 309 return FPDFAPI_FlateOrLZWDecode(TRUE, src_buf, limit, pParam, 0, dest_buf, 310 dest_size); 311 } 312 if (decoder == "DCTDecode" || decoder == "DCT") { 313 ICodec_ScanlineDecoder* pDecoder = 314 CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder( 315 src_buf, limit, width, height, 0, 316 pParam ? pParam->GetInteger("ColorTransform", 1) : 1); 317 return _DecodeAllScanlines(pDecoder, dest_buf, dest_size); 318 } 319 if (decoder == "RunLengthDecode" || decoder == "RL") { 320 return RunLengthDecode(src_buf, limit, dest_buf, dest_size); 321 } 322 dest_size = 0; 323 dest_buf = 0; 324 return (FX_DWORD)-1; 325 } 326 CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc, 327 CPDF_Dictionary* pDict, 328 CPDF_Object* pCSObj, 329 FX_BOOL bDecode) { 330 if (m_Pos == m_Size) 331 return nullptr; 332 333 if (PDFCharIsWhitespace(m_pBuf[m_Pos])) 334 m_Pos++; 335 336 CFX_ByteString Decoder; 337 CPDF_Dictionary* pParam = nullptr; 338 CPDF_Object* pFilter = pDict->GetElementValue("Filter"); 339 if (pFilter) { 340 if (CPDF_Array* pArray = pFilter->AsArray()) { 341 Decoder = pArray->GetString(0); 342 CPDF_Array* pParams = pDict->GetArray("DecodeParms"); 343 if (pParams) 344 pParam = pParams->GetDict(0); 345 } else { 346 Decoder = pFilter->GetString(); 347 pParam = pDict->GetDict("DecodeParms"); 348 } 349 } 350 FX_DWORD width = pDict->GetInteger("Width"); 351 FX_DWORD height = pDict->GetInteger("Height"); 352 FX_DWORD OrigSize = 0; 353 if (pCSObj) { 354 FX_DWORD bpc = pDict->GetInteger("BitsPerComponent"); 355 FX_DWORD nComponents = 1; 356 CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj); 357 if (!pCS) { 358 nComponents = 3; 359 } else { 360 nComponents = pCS->CountComponents(); 361 pDoc->GetPageData()->ReleaseColorSpace(pCSObj); 362 } 363 FX_DWORD pitch = width; 364 if (bpc && pitch > INT_MAX / bpc) { 365 return NULL; 366 } 367 pitch *= bpc; 368 if (nComponents && pitch > INT_MAX / nComponents) { 369 return NULL; 370 } 371 pitch *= nComponents; 372 if (pitch > INT_MAX - 7) { 373 return NULL; 374 } 375 pitch += 7; 376 pitch /= 8; 377 OrigSize = pitch; 378 } else { 379 if (width > INT_MAX - 7) { 380 return NULL; 381 } 382 OrigSize = ((width + 7) / 8); 383 } 384 if (height && OrigSize > INT_MAX / height) { 385 return NULL; 386 } 387 OrigSize *= height; 388 uint8_t* pData = NULL; 389 FX_DWORD dwStreamSize; 390 if (Decoder.IsEmpty()) { 391 if (OrigSize > m_Size - m_Pos) { 392 OrigSize = m_Size - m_Pos; 393 } 394 pData = FX_Alloc(uint8_t, OrigSize); 395 FXSYS_memcpy(pData, m_pBuf + m_Pos, OrigSize); 396 dwStreamSize = OrigSize; 397 m_Pos += OrigSize; 398 } else { 399 FX_DWORD dwDestSize = OrigSize; 400 dwStreamSize = 401 PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height, 402 Decoder, pParam, pData, dwDestSize); 403 if ((int)dwStreamSize < 0) { 404 FX_Free(pData); 405 return NULL; 406 } 407 if (bDecode) { 408 m_Pos += dwStreamSize; 409 dwStreamSize = dwDestSize; 410 if (CPDF_Array* pArray = pFilter->AsArray()) { 411 pArray->RemoveAt(0); 412 CPDF_Array* pParams = pDict->GetArray("DecodeParms"); 413 if (pParams) 414 pParams->RemoveAt(0); 415 } else { 416 pDict->RemoveAt("Filter"); 417 pDict->RemoveAt("DecodeParms"); 418 } 419 } else { 420 FX_Free(pData); 421 FX_DWORD dwSavePos = m_Pos; 422 m_Pos += dwStreamSize; 423 while (1) { 424 FX_DWORD dwPrevPos = m_Pos; 425 CPDF_StreamParser::SyntaxType type = ParseNextElement(); 426 if (type == CPDF_StreamParser::EndOfData) { 427 break; 428 } 429 if (type != CPDF_StreamParser::Keyword) { 430 dwStreamSize += m_Pos - dwPrevPos; 431 continue; 432 } 433 if (GetWordSize() == 2 && GetWordBuf()[0] == 'E' && 434 GetWordBuf()[1] == 'I') { 435 m_Pos = dwPrevPos; 436 break; 437 } 438 dwStreamSize += m_Pos - dwPrevPos; 439 } 440 m_Pos = dwSavePos; 441 pData = FX_Alloc(uint8_t, dwStreamSize); 442 FXSYS_memcpy(pData, m_pBuf + m_Pos, dwStreamSize); 443 m_Pos += dwStreamSize; 444 } 445 } 446 pDict->SetAtInteger("Length", (int)dwStreamSize); 447 return new CPDF_Stream(pData, dwStreamSize, pDict); 448 } 449 450 #define MAX_WORD_BUFFER 256 451 #define MAX_STRING_LENGTH 32767 452 #define FXDWORD_TRUE FXDWORD_FROM_LSBFIRST(0x65757274) 453 #define FXDWORD_NULL FXDWORD_FROM_LSBFIRST(0x6c6c756e) 454 #define FXDWORD_FALS FXDWORD_FROM_LSBFIRST(0x736c6166) 455 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() { 456 if (m_pLastObj) { 457 m_pLastObj->Release(); 458 m_pLastObj = nullptr; 459 } 460 461 m_WordSize = 0; 462 FX_BOOL bIsNumber = TRUE; 463 if (!PositionIsInBounds()) 464 return EndOfData; 465 466 int ch = m_pBuf[m_Pos++]; 467 while (1) { 468 while (PDFCharIsWhitespace(ch)) { 469 if (!PositionIsInBounds()) 470 return EndOfData; 471 472 ch = m_pBuf[m_Pos++]; 473 } 474 475 if (ch != '%') 476 break; 477 478 while (1) { 479 if (!PositionIsInBounds()) 480 return EndOfData; 481 482 ch = m_pBuf[m_Pos++]; 483 if (PDFCharIsLineEnding(ch)) 484 break; 485 } 486 } 487 488 if (PDFCharIsDelimiter(ch) && ch != '/') { 489 m_Pos--; 490 m_pLastObj = ReadNextObject(); 491 return Others; 492 } 493 494 while (1) { 495 if (m_WordSize < MAX_WORD_BUFFER) 496 m_WordBuffer[m_WordSize++] = ch; 497 498 if (!PDFCharIsNumeric(ch)) 499 bIsNumber = FALSE; 500 501 if (!PositionIsInBounds()) 502 break; 503 504 ch = m_pBuf[m_Pos++]; 505 506 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { 507 m_Pos--; 508 break; 509 } 510 } 511 512 m_WordBuffer[m_WordSize] = 0; 513 if (bIsNumber) 514 return Number; 515 if (m_WordBuffer[0] == '/') 516 return Name; 517 518 if (m_WordSize == 4) { 519 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) { 520 m_pLastObj = new CPDF_Boolean(TRUE); 521 return Others; 522 } 523 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) { 524 m_pLastObj = new CPDF_Null; 525 return Others; 526 } 527 } else if (m_WordSize == 5) { 528 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') { 529 m_pLastObj = new CPDF_Boolean(FALSE); 530 return Others; 531 } 532 } 533 return Keyword; 534 } 535 536 void CPDF_StreamParser::SkipPathObject() { 537 FX_DWORD command_startpos = m_Pos; 538 if (!PositionIsInBounds()) 539 return; 540 541 int ch = m_pBuf[m_Pos++]; 542 while (1) { 543 while (PDFCharIsWhitespace(ch)) { 544 if (!PositionIsInBounds()) 545 return; 546 ch = m_pBuf[m_Pos++]; 547 } 548 549 if (!PDFCharIsNumeric(ch)) { 550 m_Pos = command_startpos; 551 return; 552 } 553 554 while (1) { 555 while (!PDFCharIsWhitespace(ch)) { 556 if (!PositionIsInBounds()) 557 return; 558 ch = m_pBuf[m_Pos++]; 559 } 560 561 while (PDFCharIsWhitespace(ch)) { 562 if (!PositionIsInBounds()) 563 return; 564 ch = m_pBuf[m_Pos++]; 565 } 566 567 if (PDFCharIsNumeric(ch)) 568 continue; 569 570 FX_DWORD op_startpos = m_Pos - 1; 571 while (!PDFCharIsWhitespace(ch) && !PDFCharIsDelimiter(ch)) { 572 if (!PositionIsInBounds()) 573 return; 574 ch = m_pBuf[m_Pos++]; 575 } 576 577 if (m_Pos - op_startpos == 2) { 578 int op = m_pBuf[op_startpos]; 579 if (op == kPathOperatorSubpath || op == kPathOperatorLine || 580 op == kPathOperatorCubicBezier1 || 581 op == kPathOperatorCubicBezier2 || 582 op == kPathOperatorCubicBezier3) { 583 command_startpos = m_Pos; 584 break; 585 } 586 } else if (m_Pos - op_startpos == 3) { 587 if (m_pBuf[op_startpos] == kPathOperatorRectangle[0] && 588 m_pBuf[op_startpos + 1] == kPathOperatorRectangle[1]) { 589 command_startpos = m_Pos; 590 break; 591 } 592 } 593 m_Pos = command_startpos; 594 return; 595 } 596 } 597 } 598 CPDF_Object* CPDF_StreamParser::ReadNextObject(FX_BOOL bAllowNestedArray, 599 FX_BOOL bInArray) { 600 FX_BOOL bIsNumber; 601 GetNextWord(bIsNumber); 602 if (m_WordSize == 0) { 603 return NULL; 604 } 605 if (bIsNumber) { 606 m_WordBuffer[m_WordSize] = 0; 607 return new CPDF_Number(CFX_ByteStringC(m_WordBuffer, m_WordSize)); 608 } 609 int first_char = m_WordBuffer[0]; 610 if (first_char == '/') { 611 return new CPDF_Name( 612 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); 613 } 614 if (first_char == '(') { 615 return new CPDF_String(ReadString(), FALSE); 616 } 617 if (first_char == '<') { 618 if (m_WordSize == 1) { 619 return new CPDF_String(ReadHexString(), TRUE); 620 } 621 CPDF_Dictionary* pDict = new CPDF_Dictionary; 622 while (1) { 623 GetNextWord(bIsNumber); 624 if (m_WordSize == 0) { 625 pDict->Release(); 626 return nullptr; 627 } 628 if (m_WordSize == 2 && m_WordBuffer[0] == '>') { 629 break; 630 } 631 if (m_WordBuffer[0] != '/') { 632 pDict->Release(); 633 return nullptr; 634 } 635 CFX_ByteString key = 636 PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)); 637 CPDF_Object* pObj = ReadNextObject(TRUE); 638 if (!pObj) { 639 pDict->Release(); 640 return nullptr; 641 } 642 if (!key.IsEmpty()) { 643 pDict->SetAt(key, pObj); 644 } else { 645 pObj->Release(); 646 } 647 } 648 return pDict; 649 } 650 if (first_char == '[') { 651 if (!bAllowNestedArray && bInArray) { 652 return NULL; 653 } 654 CPDF_Array* pArray = new CPDF_Array; 655 while (1) { 656 CPDF_Object* pObj = ReadNextObject(bAllowNestedArray, TRUE); 657 if (pObj) { 658 pArray->Add(pObj); 659 continue; 660 } 661 662 if (m_WordSize == 0 || m_WordBuffer[0] == ']') 663 break; 664 } 665 return pArray; 666 } 667 if (m_WordSize == 4) { 668 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) { 669 return new CPDF_Boolean(TRUE); 670 } 671 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) { 672 return new CPDF_Null; 673 } 674 } else if (m_WordSize == 5) { 675 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') { 676 return new CPDF_Boolean(FALSE); 677 } 678 } 679 return NULL; 680 } 681 void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber) { 682 m_WordSize = 0; 683 bIsNumber = TRUE; 684 if (!PositionIsInBounds()) 685 return; 686 687 int ch = m_pBuf[m_Pos++]; 688 while (1) { 689 while (PDFCharIsWhitespace(ch)) { 690 if (!PositionIsInBounds()) { 691 return; 692 } 693 ch = m_pBuf[m_Pos++]; 694 } 695 696 if (ch != '%') 697 break; 698 699 while (1) { 700 if (!PositionIsInBounds()) 701 return; 702 ch = m_pBuf[m_Pos++]; 703 if (PDFCharIsLineEnding(ch)) 704 break; 705 } 706 } 707 708 if (PDFCharIsDelimiter(ch)) { 709 bIsNumber = FALSE; 710 m_WordBuffer[m_WordSize++] = ch; 711 if (ch == '/') { 712 while (1) { 713 if (!PositionIsInBounds()) 714 return; 715 ch = m_pBuf[m_Pos++]; 716 if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) { 717 m_Pos--; 718 return; 719 } 720 721 if (m_WordSize < MAX_WORD_BUFFER) 722 m_WordBuffer[m_WordSize++] = ch; 723 } 724 } else if (ch == '<') { 725 if (!PositionIsInBounds()) 726 return; 727 ch = m_pBuf[m_Pos++]; 728 if (ch == '<') 729 m_WordBuffer[m_WordSize++] = ch; 730 else 731 m_Pos--; 732 } else if (ch == '>') { 733 if (!PositionIsInBounds()) 734 return; 735 ch = m_pBuf[m_Pos++]; 736 if (ch == '>') 737 m_WordBuffer[m_WordSize++] = ch; 738 else 739 m_Pos--; 740 } 741 return; 742 } 743 744 while (1) { 745 if (m_WordSize < MAX_WORD_BUFFER) 746 m_WordBuffer[m_WordSize++] = ch; 747 if (!PDFCharIsNumeric(ch)) 748 bIsNumber = FALSE; 749 750 if (!PositionIsInBounds()) 751 return; 752 ch = m_pBuf[m_Pos++]; 753 if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) { 754 m_Pos--; 755 break; 756 } 757 } 758 } 759 760 CFX_ByteString CPDF_StreamParser::ReadString() { 761 if (!PositionIsInBounds()) 762 return CFX_ByteString(); 763 764 int ch = m_pBuf[m_Pos++]; 765 CFX_ByteTextBuf buf; 766 int parlevel = 0; 767 int status = 0, iEscCode = 0; 768 while (1) { 769 switch (status) { 770 case 0: 771 if (ch == ')') { 772 if (parlevel == 0) { 773 if (buf.GetLength() > MAX_STRING_LENGTH) { 774 return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH); 775 } 776 return buf.GetByteString(); 777 } 778 parlevel--; 779 buf.AppendChar(')'); 780 } else if (ch == '(') { 781 parlevel++; 782 buf.AppendChar('('); 783 } else if (ch == '\\') { 784 status = 1; 785 } else { 786 buf.AppendChar((char)ch); 787 } 788 break; 789 case 1: 790 if (ch >= '0' && ch <= '7') { 791 iEscCode = FXSYS_toDecimalDigit(ch); 792 status = 2; 793 break; 794 } 795 if (ch == 'n') { 796 buf.AppendChar('\n'); 797 } else if (ch == 'r') { 798 buf.AppendChar('\r'); 799 } else if (ch == 't') { 800 buf.AppendChar('\t'); 801 } else if (ch == 'b') { 802 buf.AppendChar('\b'); 803 } else if (ch == 'f') { 804 buf.AppendChar('\f'); 805 } else if (ch == '\r') { 806 status = 4; 807 break; 808 } else if (ch == '\n') { 809 } else { 810 buf.AppendChar(ch); 811 } 812 status = 0; 813 break; 814 case 2: 815 if (ch >= '0' && ch <= '7') { 816 iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch); 817 status = 3; 818 } else { 819 buf.AppendChar(iEscCode); 820 status = 0; 821 continue; 822 } 823 break; 824 case 3: 825 if (ch >= '0' && ch <= '7') { 826 iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch); 827 buf.AppendChar(iEscCode); 828 status = 0; 829 } else { 830 buf.AppendChar(iEscCode); 831 status = 0; 832 continue; 833 } 834 break; 835 case 4: 836 status = 0; 837 if (ch != '\n') { 838 continue; 839 } 840 break; 841 } 842 if (!PositionIsInBounds()) 843 break; 844 845 ch = m_pBuf[m_Pos++]; 846 } 847 if (PositionIsInBounds()) 848 ch = m_pBuf[m_Pos++]; 849 850 if (buf.GetLength() > MAX_STRING_LENGTH) { 851 return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH); 852 } 853 return buf.GetByteString(); 854 } 855 CFX_ByteString CPDF_StreamParser::ReadHexString() { 856 if (!PositionIsInBounds()) 857 return CFX_ByteString(); 858 859 CFX_ByteTextBuf buf; 860 bool bFirst = true; 861 int code = 0; 862 while (PositionIsInBounds()) { 863 int ch = m_pBuf[m_Pos++]; 864 865 if (ch == '>') 866 break; 867 868 if (!std::isxdigit(ch)) 869 continue; 870 871 int val = FXSYS_toHexDigit(ch); 872 if (bFirst) { 873 code = val * 16; 874 } else { 875 code += val; 876 buf.AppendByte((uint8_t)code); 877 } 878 bFirst = !bFirst; 879 } 880 if (!bFirst) 881 buf.AppendChar((char)code); 882 883 if (buf.GetLength() > MAX_STRING_LENGTH) 884 return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH); 885 886 return buf.GetByteString(); 887 } 888 889 bool CPDF_StreamParser::PositionIsInBounds() const { 890 return m_Pos < m_Size; 891 } 892 893 CPDF_ContentParser::CPDF_ContentParser() 894 : m_Status(Ready), 895 m_InternalStage(STAGE_GETCONTENT), 896 m_pObjects(nullptr), 897 m_bForm(false), 898 m_pType3Char(nullptr), 899 m_pData(nullptr), 900 m_Size(0), 901 m_CurrentOffset(0) {} 902 CPDF_ContentParser::~CPDF_ContentParser() { 903 if (!m_pSingleStream) 904 FX_Free(m_pData); 905 } 906 void CPDF_ContentParser::Start(CPDF_Page* pPage, CPDF_ParseOptions* pOptions) { 907 if (m_Status != Ready || !pPage || !pPage->m_pDocument || 908 !pPage->m_pFormDict) { 909 m_Status = Done; 910 return; 911 } 912 m_pObjects = pPage; 913 m_bForm = FALSE; 914 if (pOptions) { 915 m_Options = *pOptions; 916 } 917 m_Status = ToBeContinued; 918 m_InternalStage = STAGE_GETCONTENT; 919 m_CurrentOffset = 0; 920 921 CPDF_Object* pContent = pPage->m_pFormDict->GetElementValue("Contents"); 922 if (!pContent) { 923 m_Status = Done; 924 return; 925 } 926 if (CPDF_Stream* pStream = pContent->AsStream()) { 927 m_nStreams = 0; 928 m_pSingleStream.reset(new CPDF_StreamAcc); 929 m_pSingleStream->LoadAllData(pStream, FALSE); 930 } else if (CPDF_Array* pArray = pContent->AsArray()) { 931 m_nStreams = pArray->GetCount(); 932 if (m_nStreams) 933 m_StreamArray.resize(m_nStreams); 934 else 935 m_Status = Done; 936 } else { 937 m_Status = Done; 938 } 939 } 940 void CPDF_ContentParser::Start(CPDF_Form* pForm, 941 CPDF_AllStates* pGraphicStates, 942 CFX_Matrix* pParentMatrix, 943 CPDF_Type3Char* pType3Char, 944 CPDF_ParseOptions* pOptions, 945 int level) { 946 m_pType3Char = pType3Char; 947 m_pObjects = pForm; 948 m_bForm = TRUE; 949 CFX_Matrix form_matrix = pForm->m_pFormDict->GetMatrix("Matrix"); 950 if (pGraphicStates) { 951 form_matrix.Concat(pGraphicStates->m_CTM); 952 } 953 CPDF_Array* pBBox = pForm->m_pFormDict->GetArray("BBox"); 954 CFX_FloatRect form_bbox; 955 CPDF_Path ClipPath; 956 if (pBBox) { 957 form_bbox = pBBox->GetRect(); 958 ClipPath.New(); 959 ClipPath.AppendRect(form_bbox.left, form_bbox.bottom, form_bbox.right, 960 form_bbox.top); 961 ClipPath.Transform(&form_matrix); 962 if (pParentMatrix) { 963 ClipPath.Transform(pParentMatrix); 964 } 965 form_bbox.Transform(&form_matrix); 966 if (pParentMatrix) { 967 form_bbox.Transform(pParentMatrix); 968 } 969 } 970 CPDF_Dictionary* pResources = pForm->m_pFormDict->GetDict("Resources"); 971 m_pParser.reset(new CPDF_StreamContentParser( 972 pForm->m_pDocument, pForm->m_pPageResources, pForm->m_pResources, 973 pParentMatrix, pForm, pResources, &form_bbox, pOptions, pGraphicStates, 974 level)); 975 m_pParser->GetCurStates()->m_CTM = form_matrix; 976 m_pParser->GetCurStates()->m_ParentMatrix = form_matrix; 977 if (ClipPath.NotNull()) { 978 m_pParser->GetCurStates()->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING, 979 TRUE); 980 } 981 if (pForm->m_Transparency & PDFTRANS_GROUP) { 982 CPDF_GeneralStateData* pData = 983 m_pParser->GetCurStates()->m_GeneralState.GetModify(); 984 pData->m_BlendType = FXDIB_BLEND_NORMAL; 985 pData->m_StrokeAlpha = 1.0f; 986 pData->m_FillAlpha = 1.0f; 987 pData->m_pSoftMask = NULL; 988 } 989 m_nStreams = 0; 990 m_pSingleStream.reset(new CPDF_StreamAcc); 991 m_pSingleStream->LoadAllData(pForm->m_pFormStream, FALSE); 992 m_pData = (uint8_t*)m_pSingleStream->GetData(); 993 m_Size = m_pSingleStream->GetSize(); 994 m_Status = ToBeContinued; 995 m_InternalStage = STAGE_PARSE; 996 m_CurrentOffset = 0; 997 } 998 void CPDF_ContentParser::Continue(IFX_Pause* pPause) { 999 int steps = 0; 1000 while (m_Status == ToBeContinued) { 1001 if (m_InternalStage == STAGE_GETCONTENT) { 1002 if (m_CurrentOffset == m_nStreams) { 1003 if (!m_StreamArray.empty()) { 1004 FX_SAFE_DWORD safeSize = 0; 1005 for (const auto& stream : m_StreamArray) { 1006 safeSize += stream->GetSize(); 1007 safeSize += 1; 1008 } 1009 if (!safeSize.IsValid()) { 1010 m_Status = Done; 1011 return; 1012 } 1013 m_Size = safeSize.ValueOrDie(); 1014 m_pData = FX_Alloc(uint8_t, m_Size); 1015 FX_DWORD pos = 0; 1016 for (const auto& stream : m_StreamArray) { 1017 FXSYS_memcpy(m_pData + pos, stream->GetData(), stream->GetSize()); 1018 pos += stream->GetSize(); 1019 m_pData[pos++] = ' '; 1020 } 1021 m_StreamArray.clear(); 1022 } else { 1023 m_pData = (uint8_t*)m_pSingleStream->GetData(); 1024 m_Size = m_pSingleStream->GetSize(); 1025 } 1026 m_InternalStage = STAGE_PARSE; 1027 m_CurrentOffset = 0; 1028 } else { 1029 CPDF_Array* pContent = m_pObjects->m_pFormDict->GetArray("Contents"); 1030 m_StreamArray[m_CurrentOffset].reset(new CPDF_StreamAcc); 1031 CPDF_Stream* pStreamObj = ToStream( 1032 pContent ? pContent->GetElementValue(m_CurrentOffset) : nullptr); 1033 m_StreamArray[m_CurrentOffset]->LoadAllData(pStreamObj, FALSE); 1034 m_CurrentOffset++; 1035 } 1036 } 1037 if (m_InternalStage == STAGE_PARSE) { 1038 if (!m_pParser) { 1039 m_pParser.reset(new CPDF_StreamContentParser( 1040 m_pObjects->m_pDocument, m_pObjects->m_pPageResources, nullptr, 1041 nullptr, m_pObjects, m_pObjects->m_pResources, &m_pObjects->m_BBox, 1042 &m_Options, nullptr, 0)); 1043 m_pParser->GetCurStates()->m_ColorState.GetModify()->Default(); 1044 } 1045 if (m_CurrentOffset >= m_Size) { 1046 m_InternalStage = STAGE_CHECKCLIP; 1047 } else { 1048 m_CurrentOffset += 1049 m_pParser->Parse(m_pData + m_CurrentOffset, 1050 m_Size - m_CurrentOffset, PARSE_STEP_LIMIT); 1051 } 1052 } 1053 if (m_InternalStage == STAGE_CHECKCLIP) { 1054 if (m_pType3Char) { 1055 m_pType3Char->m_bColored = m_pParser->IsColored(); 1056 m_pType3Char->m_Width = 1057 FXSYS_round(m_pParser->GetType3Data()[0] * 1000); 1058 m_pType3Char->m_BBox.left = 1059 FXSYS_round(m_pParser->GetType3Data()[2] * 1000); 1060 m_pType3Char->m_BBox.bottom = 1061 FXSYS_round(m_pParser->GetType3Data()[3] * 1000); 1062 m_pType3Char->m_BBox.right = 1063 FXSYS_round(m_pParser->GetType3Data()[4] * 1000); 1064 m_pType3Char->m_BBox.top = 1065 FXSYS_round(m_pParser->GetType3Data()[5] * 1000); 1066 } 1067 FX_POSITION pos = m_pObjects->m_ObjectList.GetHeadPosition(); 1068 while (pos) { 1069 CPDF_PageObject* pObj = 1070 (CPDF_PageObject*)m_pObjects->m_ObjectList.GetNext(pos); 1071 if (pObj->m_ClipPath.IsNull()) { 1072 continue; 1073 } 1074 if (pObj->m_ClipPath.GetPathCount() != 1) { 1075 continue; 1076 } 1077 if (pObj->m_ClipPath.GetTextCount()) { 1078 continue; 1079 } 1080 CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0); 1081 if (!ClipPath.IsRect() || pObj->m_Type == PDFPAGE_SHADING) { 1082 continue; 1083 } 1084 CFX_FloatRect old_rect(ClipPath.GetPointX(0), ClipPath.GetPointY(0), 1085 ClipPath.GetPointX(2), ClipPath.GetPointY(2)); 1086 CFX_FloatRect obj_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, 1087 pObj->m_Top); 1088 if (old_rect.Contains(obj_rect)) { 1089 pObj->m_ClipPath.SetNull(); 1090 } 1091 } 1092 m_Status = Done; 1093 return; 1094 } 1095 steps++; 1096 if (pPause && pPause->NeedToPauseNow()) { 1097 break; 1098 } 1099 } 1100 } 1101