1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "../../../include/fpdfapi/fpdf_page.h" 8 #include "../../../include/fpdfapi/fpdf_module.h" 9 #include "../../../include/fxcodec/fx_codec.h" 10 #include "pageint.h" 11 #include <limits.h> 12 extern const FX_LPCSTR _PDF_OpCharType = 13 "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" 14 "IIVIIIIVIIVIIIIIVVIIIIIIIIIIIIII" 15 "IIVVVVVVIVVVVVVIVVVVVIIVVIIIIIII" 16 "IIVVVVVVVVVVVVVVIVVVIIVVIVVIIIII" 17 "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" 18 "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" 19 "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" 20 "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"; 21 FX_BOOL _PDF_HasInvalidOpChar(FX_LPCSTR op) 22 { 23 if(!op) { 24 return FALSE; 25 } 26 FX_BYTE ch; 27 while((ch = *op++)) { 28 if(_PDF_OpCharType[ch] == 'I') { 29 return TRUE; 30 } 31 } 32 return FALSE; 33 } 34 class CPDF_StreamParserAutoClearer { 35 public: 36 CPDF_StreamParserAutoClearer(CPDF_StreamParser** scoped_variable, CPDF_StreamParser* new_parser) 37 : scoped_variable_(scoped_variable) { 38 *scoped_variable_ = new_parser; 39 } 40 ~CPDF_StreamParserAutoClearer() { *scoped_variable_ = NULL; } 41 private: 42 CPDF_StreamParser** scoped_variable_; 43 }; 44 FX_DWORD CPDF_StreamContentParser::Parse(FX_LPCBYTE pData, FX_DWORD dwSize, FX_DWORD max_cost) 45 { 46 if (m_Level > _FPDF_MAX_FORM_LEVEL_) { 47 return dwSize; 48 } 49 FX_DWORD InitObjCount = m_pObjectList->CountObjects(); 50 CPDF_StreamParser syntax(pData, dwSize); 51 CPDF_StreamParserAutoClearer auto_clearer(&m_pSyntax, &syntax); 52 m_CompatCount = 0; 53 while (1) { 54 FX_DWORD cost = m_pObjectList->CountObjects() - InitObjCount; 55 if (max_cost && cost >= max_cost) { 56 break; 57 } 58 switch (syntax.ParseNextElement()) { 59 case CPDF_StreamParser::EndOfData: 60 return m_pSyntax->GetPos(); 61 case CPDF_StreamParser::Keyword: 62 if(!OnOperator((char*)syntax.GetWordBuf()) && _PDF_HasInvalidOpChar((char*)syntax.GetWordBuf())) { 63 m_bAbort = TRUE; 64 } 65 if (m_bAbort) { 66 return m_pSyntax->GetPos(); 67 } 68 ClearAllParams(); 69 break; 70 case CPDF_StreamParser::Number: 71 AddNumberParam((char*)syntax.GetWordBuf(), syntax.GetWordSize()); 72 break; 73 case CPDF_StreamParser::Name: 74 AddNameParam((FX_LPCSTR)syntax.GetWordBuf() + 1, syntax.GetWordSize() - 1); 75 break; 76 default: 77 AddObjectParam(syntax.GetObject()); 78 } 79 } 80 return m_pSyntax->GetPos(); 81 } 82 void _PDF_ReplaceAbbr(CPDF_Object* pObj); 83 void CPDF_StreamContentParser::Handle_BeginImage() 84 { 85 FX_FILESIZE savePos = m_pSyntax->GetPos(); 86 CPDF_Dictionary* pDict = CPDF_Dictionary::Create(); 87 while (1) { 88 CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement(); 89 if (type == CPDF_StreamParser::Keyword) { 90 CFX_ByteString bsKeyword(m_pSyntax->GetWordBuf(), m_pSyntax->GetWordSize()); 91 if (bsKeyword != FX_BSTRC("ID")) { 92 m_pSyntax->SetPos(savePos); 93 pDict->Release(); 94 return; 95 } 96 } 97 if (type != CPDF_StreamParser::Name) { 98 break; 99 } 100 CFX_ByteString key((FX_LPCSTR)m_pSyntax->GetWordBuf() + 1, m_pSyntax->GetWordSize() - 1); 101 CPDF_Object* pObj = m_pSyntax->ReadNextObject(); 102 if (!key.IsEmpty()) { 103 pDict->SetAt(key, pObj, m_pDocument); 104 } else if (pObj) { 105 pObj->Release(); 106 } 107 } 108 _PDF_ReplaceAbbr(pDict); 109 CPDF_Object* pCSObj = NULL; 110 if (pDict->KeyExist(FX_BSTRC("ColorSpace"))) { 111 pCSObj = pDict->GetElementValue(FX_BSTRC("ColorSpace")); 112 if (pCSObj->GetType() == PDFOBJ_NAME) { 113 CFX_ByteString name = pCSObj->GetString(); 114 if (name != FX_BSTRC("DeviceRGB") && name != FX_BSTRC("DeviceGray") && name != FX_BSTRC("DeviceCMYK")) { 115 pCSObj = FindResourceObj(FX_BSTRC("ColorSpace"), name); 116 if (pCSObj && !pCSObj->GetObjNum()) { 117 pCSObj = pCSObj->Clone(); 118 pDict->SetAt(FX_BSTRC("ColorSpace"), pCSObj, m_pDocument); 119 } 120 } 121 } 122 } 123 CPDF_Stream* pStream = m_pSyntax->ReadInlineStream(m_pDocument, pDict, pCSObj, m_Options.m_bDecodeInlineImage); 124 while (1) { 125 CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement(); 126 if (type == CPDF_StreamParser::EndOfData) { 127 break; 128 } 129 if (type != CPDF_StreamParser::Keyword) { 130 continue; 131 } 132 if (m_pSyntax->GetWordSize() == 2 && m_pSyntax->GetWordBuf()[0] == 'E' && 133 m_pSyntax->GetWordBuf()[1] == 'I') { 134 break; 135 } 136 } 137 if (m_Options.m_bTextOnly) { 138 if (pStream) { 139 pStream->Release(); 140 } else { 141 pDict->Release(); 142 } 143 return; 144 } 145 pDict->SetAtName(FX_BSTRC("Subtype"), FX_BSTRC("Image")); 146 CPDF_ImageObject *pImgObj = AddImage(pStream, NULL, TRUE); 147 if (!pImgObj) { 148 if (pStream) { 149 pStream->Release(); 150 } else { 151 pDict->Release(); 152 } 153 } 154 } 155 void CPDF_StreamContentParser::ParsePathObject() 156 { 157 FX_FLOAT params[6] = {0}; 158 int nParams = 0; 159 int last_pos = m_pSyntax->GetPos(); 160 while (1) { 161 CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement(); 162 FX_BOOL bProcessed = TRUE; 163 switch (type) { 164 case CPDF_StreamParser::EndOfData: 165 return; 166 case CPDF_StreamParser::Keyword: { 167 int len = m_pSyntax->GetWordSize(); 168 if (len == 1) { 169 switch (m_pSyntax->GetWordBuf()[0]) { 170 case 'm': 171 AddPathPoint(params[0], params[1], FXPT_MOVETO); 172 nParams = 0; 173 break; 174 case 'l': 175 AddPathPoint(params[0], params[1], FXPT_LINETO); 176 nParams = 0; 177 break; 178 case 'c': 179 AddPathPoint(params[0], params[1], FXPT_BEZIERTO); 180 AddPathPoint(params[2], params[3], FXPT_BEZIERTO); 181 AddPathPoint(params[4], params[5], FXPT_BEZIERTO); 182 nParams = 0; 183 break; 184 case 'v': 185 AddPathPoint(m_PathCurrentX, m_PathCurrentY, FXPT_BEZIERTO); 186 AddPathPoint(params[0], params[1], FXPT_BEZIERTO); 187 AddPathPoint(params[2], params[3], FXPT_BEZIERTO); 188 nParams = 0; 189 break; 190 case 'y': 191 AddPathPoint(params[0], params[1], FXPT_BEZIERTO); 192 AddPathPoint(params[2], params[3], FXPT_BEZIERTO); 193 AddPathPoint(params[2], params[3], FXPT_BEZIERTO); 194 nParams = 0; 195 break; 196 case 'h': 197 Handle_ClosePath(); 198 nParams = 0; 199 break; 200 default: 201 bProcessed = FALSE; 202 break; 203 } 204 } else if (len == 2) { 205 if (m_pSyntax->GetWordBuf()[0] == 'r' && m_pSyntax->GetWordBuf()[1] == 'e') { 206 AddPathRect(params[0], params[1], params[2], params[3]); 207 nParams = 0; 208 } else { 209 bProcessed = FALSE; 210 } 211 } else { 212 bProcessed = FALSE; 213 } 214 if (bProcessed) { 215 last_pos = m_pSyntax->GetPos(); 216 } 217 break; 218 } 219 case CPDF_StreamParser::Number: { 220 if (nParams == 6) { 221 break; 222 } 223 FX_BOOL bInteger; 224 int value; 225 FX_atonum(CFX_ByteStringC(m_pSyntax->GetWordBuf(), m_pSyntax->GetWordSize()), bInteger, &value); 226 params[nParams++] = bInteger ? (FX_FLOAT)value : *(FX_FLOAT*)&value; 227 break; 228 } 229 default: 230 bProcessed = FALSE; 231 } 232 if (!bProcessed) { 233 m_pSyntax->SetPos(last_pos); 234 return; 235 } 236 } 237 } 238 CPDF_StreamParser::CPDF_StreamParser(const FX_BYTE* pData, FX_DWORD dwSize) 239 { 240 m_pBuf = pData; 241 m_Size = dwSize; 242 m_Pos = 0; 243 m_pLastObj = NULL; 244 } 245 CPDF_StreamParser::~CPDF_StreamParser() 246 { 247 if (m_pLastObj) { 248 m_pLastObj->Release(); 249 } 250 } 251 FX_DWORD _DecodeAllScanlines(ICodec_ScanlineDecoder* pDecoder, FX_LPBYTE& dest_buf, FX_DWORD& dest_size) 252 { 253 if (pDecoder == NULL) { 254 return (FX_DWORD) - 1; 255 } 256 int ncomps = pDecoder->CountComps(); 257 int bpc = pDecoder->GetBPC(); 258 int width = pDecoder->GetWidth(); 259 int height = pDecoder->GetHeight(); 260 int pitch = (width * ncomps * bpc + 7) / 8; 261 if (height == 0 || pitch > (1 << 30) / height) { 262 delete pDecoder; 263 return -1; 264 } 265 dest_buf = FX_Alloc2D(FX_BYTE, pitch, height); 266 dest_size = pitch * height; // Safe since checked alloc returned. 267 for (int row = 0; row < height; row ++) { 268 FX_LPBYTE pLine = pDecoder->GetScanline(row); 269 if (pLine == NULL) { 270 break; 271 } 272 FXSYS_memcpy32(dest_buf + row * pitch, pLine, pitch); 273 } 274 FX_DWORD srcoff = pDecoder->GetSrcOffset(); 275 delete pDecoder; 276 return srcoff; 277 } 278 ICodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder(FX_LPCBYTE src_buf, FX_DWORD src_size, int width, int height, 279 const CPDF_Dictionary* pParams); 280 FX_DWORD _A85Decode(const FX_BYTE* src_buf, FX_DWORD src_size, FX_LPBYTE& dest_buf, FX_DWORD& dest_size); 281 FX_DWORD _HexDecode(const FX_BYTE* src_buf, FX_DWORD src_size, FX_LPBYTE& dest_buf, FX_DWORD& dest_size); 282 FX_DWORD FPDFAPI_FlateOrLZWDecode(FX_BOOL bLZW, const FX_BYTE* src_buf, FX_DWORD src_size, CPDF_Dictionary* pParams, 283 FX_DWORD estimated_size, FX_LPBYTE& dest_buf, FX_DWORD& dest_size); 284 FX_DWORD PDF_DecodeInlineStream(const FX_BYTE* src_buf, FX_DWORD limit, 285 int width, int height, CFX_ByteString& decoder, 286 CPDF_Dictionary* pParam, FX_LPBYTE& dest_buf, FX_DWORD& dest_size) 287 { 288 if (decoder == FX_BSTRC("CCITTFaxDecode") || decoder == FX_BSTRC("CCF")) { 289 ICodec_ScanlineDecoder* pDecoder = FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam); 290 return _DecodeAllScanlines(pDecoder, dest_buf, dest_size); 291 } else if (decoder == FX_BSTRC("ASCII85Decode") || decoder == FX_BSTRC("A85")) { 292 return _A85Decode(src_buf, limit, dest_buf, dest_size); 293 } else if (decoder == FX_BSTRC("ASCIIHexDecode") || decoder == FX_BSTRC("AHx")) { 294 return _HexDecode(src_buf, limit, dest_buf, dest_size); 295 } else if (decoder == FX_BSTRC("FlateDecode") || decoder == FX_BSTRC("Fl")) { 296 return FPDFAPI_FlateOrLZWDecode(FALSE, src_buf, limit, pParam, dest_size, dest_buf, dest_size); 297 } else if (decoder == FX_BSTRC("LZWDecode") || decoder == FX_BSTRC("LZW")) { 298 return FPDFAPI_FlateOrLZWDecode(TRUE, src_buf, limit, pParam, 0, dest_buf, dest_size); 299 } else if (decoder == FX_BSTRC("DCTDecode") || decoder == FX_BSTRC("DCT")) { 300 ICodec_ScanlineDecoder* pDecoder = CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder( 301 src_buf, limit, width, height, 0, pParam ? pParam->GetInteger(FX_BSTRC("ColorTransform"), 1) : 1); 302 return _DecodeAllScanlines(pDecoder, dest_buf, dest_size); 303 } else if (decoder == FX_BSTRC("RunLengthDecode") || decoder == FX_BSTRC("RL")) { 304 return RunLengthDecode(src_buf, limit, dest_buf, dest_size); 305 } 306 dest_size = 0; 307 dest_buf = 0; 308 return (FX_DWORD) - 1; 309 } 310 CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc, CPDF_Dictionary* pDict, CPDF_Object* pCSObj, FX_BOOL bDecode) 311 { 312 if (m_Pos == m_Size) { 313 return NULL; 314 } 315 if (PDF_CharType[m_pBuf[m_Pos]] == 'W') { 316 m_Pos ++; 317 } 318 CFX_ByteString Decoder; 319 CPDF_Dictionary* pParam = NULL; 320 CPDF_Object* pFilter = pDict->GetElementValue(FX_BSTRC("Filter")); 321 if (pFilter == NULL) { 322 } else if (pFilter->GetType() == PDFOBJ_ARRAY) { 323 Decoder = ((CPDF_Array*)pFilter)->GetString(0); 324 CPDF_Array* pParams = pDict->GetArray(FX_BSTRC("DecodeParms")); 325 if (pParams) { 326 pParam = pParams->GetDict(0); 327 } 328 } else { 329 Decoder = pFilter->GetString(); 330 pParam = pDict->GetDict(FX_BSTRC("DecodeParms")); 331 } 332 FX_DWORD width = pDict->GetInteger(FX_BSTRC("Width")); 333 FX_DWORD height = pDict->GetInteger(FX_BSTRC("Height")); 334 FX_DWORD OrigSize = 0; 335 if (pCSObj != NULL) { 336 FX_DWORD bpc = pDict->GetInteger(FX_BSTRC("BitsPerComponent")); 337 FX_DWORD nComponents = 1; 338 CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj); 339 if (pCS == NULL) { 340 nComponents = 3; 341 } else { 342 nComponents = pCS->CountComponents(); 343 pDoc->GetPageData()->ReleaseColorSpace(pCSObj); 344 } 345 FX_DWORD pitch = width; 346 if (bpc && pitch > INT_MAX / bpc) { 347 return NULL; 348 } 349 pitch *= bpc; 350 if (nComponents && pitch > INT_MAX / nComponents) { 351 return NULL; 352 } 353 pitch *= nComponents; 354 if (pitch > INT_MAX - 7) { 355 return NULL; 356 } 357 pitch += 7; 358 pitch /= 8; 359 OrigSize = pitch; 360 } else { 361 if (width > INT_MAX - 7) { 362 return NULL; 363 } 364 OrigSize = ((width + 7) / 8); 365 } 366 if (height && OrigSize > INT_MAX / height) { 367 return NULL; 368 } 369 OrigSize *= height; 370 FX_LPBYTE pData = NULL; 371 FX_DWORD dwStreamSize; 372 if (Decoder.IsEmpty()) { 373 if (OrigSize > m_Size - m_Pos) { 374 OrigSize = m_Size - m_Pos; 375 } 376 pData = FX_Alloc(FX_BYTE, OrigSize); 377 FXSYS_memcpy32(pData, m_pBuf + m_Pos, OrigSize); 378 dwStreamSize = OrigSize; 379 m_Pos += OrigSize; 380 } else { 381 FX_DWORD dwDestSize = OrigSize; 382 dwStreamSize = PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height, Decoder, pParam, 383 pData, dwDestSize); 384 if ((int)dwStreamSize < 0) { 385 return NULL; 386 } 387 if (bDecode) { 388 m_Pos += dwStreamSize; 389 dwStreamSize = dwDestSize; 390 if (pFilter->GetType() == PDFOBJ_ARRAY) { 391 ((CPDF_Array*)pFilter)->RemoveAt(0); 392 CPDF_Array* pParams = pDict->GetArray(FX_BSTRC("DecodeParms")); 393 if (pParams) { 394 pParams->RemoveAt(0); 395 } 396 } else { 397 pDict->RemoveAt(FX_BSTRC("Filter")); 398 pDict->RemoveAt(FX_BSTRC("DecodeParms")); 399 } 400 } else { 401 if (pData) { 402 FX_Free(pData); 403 } 404 FX_DWORD dwSavePos = m_Pos; 405 m_Pos += dwStreamSize; 406 while (1) { 407 FX_DWORD dwPrevPos = m_Pos; 408 CPDF_StreamParser::SyntaxType type = ParseNextElement(); 409 if (type == CPDF_StreamParser::EndOfData) { 410 break; 411 } 412 if (type != CPDF_StreamParser::Keyword) { 413 dwStreamSize += m_Pos - dwPrevPos; 414 continue; 415 } 416 if (GetWordSize() == 2 && GetWordBuf()[0] == 'E' && 417 GetWordBuf()[1] == 'I') { 418 m_Pos = dwPrevPos; 419 break; 420 } 421 dwStreamSize += m_Pos - dwPrevPos; 422 } 423 m_Pos = dwSavePos; 424 pData = FX_Alloc(FX_BYTE, dwStreamSize); 425 FXSYS_memcpy32(pData, m_pBuf + m_Pos, dwStreamSize); 426 m_Pos += dwStreamSize; 427 } 428 } 429 pDict->SetAtInteger(FX_BSTRC("Length"), (int)dwStreamSize); 430 return CPDF_Stream::Create(pData, dwStreamSize, pDict); 431 } 432 #define MAX_WORD_BUFFER 256 433 #define MAX_STRING_LENGTH 32767 434 #define FXDWORD_TRUE FXDWORD_FROM_LSBFIRST(0x65757274) 435 #define FXDWORD_NULL FXDWORD_FROM_LSBFIRST(0x6c6c756e) 436 #define FXDWORD_FALS FXDWORD_FROM_LSBFIRST(0x736c6166) 437 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() 438 { 439 if (m_pLastObj) { 440 m_pLastObj->Release(); 441 m_pLastObj = NULL; 442 } 443 m_WordSize = 0; 444 FX_BOOL bIsNumber = TRUE; 445 if (m_Pos >= m_Size) { 446 return EndOfData; 447 } 448 int ch = m_pBuf[m_Pos++]; 449 int type = PDF_CharType[ch]; 450 while (1) { 451 while (type == 'W') { 452 if (m_Size <= m_Pos) { 453 return EndOfData; 454 } 455 ch = m_pBuf[m_Pos++]; 456 type = PDF_CharType[ch]; 457 } 458 if (ch != '%') { 459 break; 460 } 461 while (1) { 462 if (m_Size <= m_Pos) { 463 return EndOfData; 464 } 465 ch = m_pBuf[m_Pos++]; 466 if (ch == '\r' || ch == '\n') { 467 break; 468 } 469 } 470 type = PDF_CharType[ch]; 471 } 472 if (type == 'D' && ch != '/') { 473 m_Pos --; 474 m_pLastObj = ReadNextObject(); 475 return Others; 476 } 477 while (1) { 478 if (m_WordSize < MAX_WORD_BUFFER) { 479 m_WordBuffer[m_WordSize++] = ch; 480 } 481 if (type != 'N') { 482 bIsNumber = FALSE; 483 } 484 if (m_Size <= m_Pos) { 485 break; 486 } 487 ch = m_pBuf[m_Pos++]; 488 type = PDF_CharType[ch]; 489 if (type == 'D' || type == 'W') { 490 m_Pos --; 491 break; 492 } 493 } 494 m_WordBuffer[m_WordSize] = 0; 495 if (bIsNumber) { 496 return Number; 497 } 498 if (m_WordBuffer[0] == '/') { 499 return Name; 500 } 501 if (m_WordSize == 4) { 502 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) { 503 m_pLastObj = CPDF_Boolean::Create(TRUE); 504 return Others; 505 } 506 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) { 507 m_pLastObj = CPDF_Null::Create(); 508 return Others; 509 } 510 } else if (m_WordSize == 5) { 511 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') { 512 m_pLastObj = CPDF_Boolean::Create(FALSE); 513 return Others; 514 } 515 } 516 return Keyword; 517 } 518 void CPDF_StreamParser::SkipPathObject() 519 { 520 FX_DWORD command_startpos = m_Pos; 521 if (m_Pos >= m_Size) { 522 return; 523 } 524 int ch = m_pBuf[m_Pos++]; 525 int type = PDF_CharType[ch]; 526 while (1) { 527 while (type == 'W') { 528 if (m_Pos >= m_Size) { 529 return; 530 } 531 ch = m_pBuf[m_Pos++]; 532 type = PDF_CharType[ch]; 533 } 534 if (type != 'N') { 535 m_Pos = command_startpos; 536 return; 537 } 538 while (1) { 539 while (type != 'W') { 540 if (m_Pos >= m_Size) { 541 return; 542 } 543 ch = m_pBuf[m_Pos++]; 544 type = PDF_CharType[ch]; 545 } 546 while (type == 'W') { 547 if (m_Pos >= m_Size) { 548 return; 549 } 550 ch = m_pBuf[m_Pos++]; 551 type = PDF_CharType[ch]; 552 } 553 if (type == 'N') { 554 continue; 555 } 556 FX_DWORD op_startpos = m_Pos - 1; 557 while (type != 'W' && type != 'D') { 558 if (m_Pos >= m_Size) { 559 return; 560 } 561 ch = m_pBuf[m_Pos++]; 562 type = PDF_CharType[ch]; 563 } 564 if (m_Pos - op_startpos == 2) { 565 int op = m_pBuf[op_startpos]; 566 if (op == 'm' || op == 'l' || op == 'c' || op == 'v' || op == 'y') { 567 command_startpos = m_Pos; 568 break; 569 } 570 } else if (m_Pos - op_startpos == 3) { 571 if (m_pBuf[op_startpos] == 'r' && m_pBuf[op_startpos + 1] == 'e') { 572 command_startpos = m_Pos; 573 break; 574 } 575 } 576 m_Pos = command_startpos; 577 return; 578 } 579 } 580 } 581 CPDF_Object* CPDF_StreamParser::ReadNextObject(FX_BOOL bAllowNestedArray, FX_BOOL bInArray) 582 { 583 FX_BOOL bIsNumber; 584 GetNextWord(bIsNumber); 585 if (m_WordSize == 0) { 586 return NULL; 587 } 588 if (bIsNumber) { 589 m_WordBuffer[m_WordSize] = 0; 590 return CPDF_Number::Create(CFX_ByteStringC(m_WordBuffer, m_WordSize)); 591 } 592 int first_char = m_WordBuffer[0]; 593 if (first_char == '/') { 594 return CPDF_Name::Create(PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); 595 } 596 if (first_char == '(') { 597 return CPDF_String::Create(ReadString()); 598 } 599 if (first_char == '<') { 600 if (m_WordSize == 1) { 601 return CPDF_String::Create(ReadHexString(), TRUE); 602 } 603 CPDF_Dictionary* pDict = CPDF_Dictionary::Create(); 604 while (1) { 605 GetNextWord(bIsNumber); 606 if (m_WordSize == 0) { 607 pDict->Release(); 608 return NULL; 609 } 610 if (m_WordSize == 2 && m_WordBuffer[0] == '>') { 611 break; 612 } 613 if (m_WordBuffer[0] != '/') { 614 pDict->Release(); 615 return NULL; 616 } 617 CFX_ByteString key = PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)); 618 CPDF_Object* pObj = ReadNextObject(TRUE); 619 if (pObj == NULL) { 620 if (pDict) { 621 pDict->Release(); 622 } 623 return NULL; 624 } 625 if (!key.IsEmpty()) { 626 pDict->SetAt(key, pObj); 627 } else { 628 pObj->Release(); 629 } 630 } 631 return pDict; 632 } 633 if (first_char == '[') { 634 if (!bAllowNestedArray && bInArray) { 635 return NULL; 636 } 637 CPDF_Array* pArray = CPDF_Array::Create(); 638 while (1) { 639 CPDF_Object* pObj = ReadNextObject(bAllowNestedArray, TRUE); 640 if (pObj == NULL) { 641 if (m_WordSize == 0 || m_WordBuffer[0] == ']') { 642 return pArray; 643 } 644 if (m_WordBuffer[0] == '[') { 645 continue; 646 } 647 } else { 648 pArray->Add(pObj); 649 } 650 } 651 } 652 if (m_WordSize == 4) { 653 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) { 654 return CPDF_Boolean::Create(TRUE); 655 } 656 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) { 657 return CPDF_Null::Create(); 658 } 659 } else if (m_WordSize == 5) { 660 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') { 661 return CPDF_Boolean::Create(FALSE); 662 } 663 } 664 return NULL; 665 } 666 void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber) 667 { 668 m_WordSize = 0; 669 bIsNumber = TRUE; 670 if (m_Size <= m_Pos) { 671 return; 672 } 673 int ch = m_pBuf[m_Pos++]; 674 int type = PDF_CharType[ch]; 675 while (1) { 676 while (type == 'W') { 677 if (m_Size <= m_Pos) { 678 return; 679 } 680 ch = m_pBuf[m_Pos++]; 681 type = PDF_CharType[ch]; 682 } 683 if (ch != '%') { 684 break; 685 } 686 while (1) { 687 if (m_Size <= m_Pos) { 688 return; 689 } 690 ch = m_pBuf[m_Pos++]; 691 if (ch == '\r' || ch == '\n') { 692 break; 693 } 694 } 695 type = PDF_CharType[ch]; 696 } 697 if (type == 'D') { 698 bIsNumber = FALSE; 699 m_WordBuffer[m_WordSize++] = ch; 700 if (ch == '/') { 701 while (1) { 702 if (m_Size <= m_Pos) { 703 return; 704 } 705 ch = m_pBuf[m_Pos++]; 706 type = PDF_CharType[ch]; 707 if (type != 'R' && type != 'N') { 708 m_Pos --; 709 return; 710 } 711 if (m_WordSize < MAX_WORD_BUFFER) { 712 m_WordBuffer[m_WordSize++] = ch; 713 } 714 } 715 } else if (ch == '<') { 716 if (m_Size <= m_Pos) { 717 return; 718 } 719 ch = m_pBuf[m_Pos++]; 720 if (ch == '<') { 721 m_WordBuffer[m_WordSize++] = ch; 722 } else { 723 m_Pos --; 724 } 725 } else if (ch == '>') { 726 if (m_Size <= m_Pos) { 727 return; 728 } 729 ch = m_pBuf[m_Pos++]; 730 if (ch == '>') { 731 m_WordBuffer[m_WordSize++] = ch; 732 } else { 733 m_Pos --; 734 } 735 } 736 return; 737 } 738 while (1) { 739 if (m_WordSize < MAX_WORD_BUFFER) { 740 m_WordBuffer[m_WordSize++] = ch; 741 } 742 if (type != 'N') { 743 bIsNumber = FALSE; 744 } 745 if (m_Size <= m_Pos) { 746 return; 747 } 748 ch = m_pBuf[m_Pos++]; 749 type = PDF_CharType[ch]; 750 if (type == 'D' || type == 'W') { 751 m_Pos --; 752 break; 753 } 754 } 755 } 756 CFX_ByteString CPDF_StreamParser::ReadString() 757 { 758 if (m_Size <= m_Pos) { 759 return CFX_ByteString(); 760 } 761 int ch = m_pBuf[m_Pos++]; 762 CFX_ByteTextBuf buf; 763 int parlevel = 0; 764 int status = 0, iEscCode = 0; 765 while (1) { 766 switch (status) { 767 case 0: 768 if (ch == ')') { 769 if (parlevel == 0) { 770 if (buf.GetLength() > MAX_STRING_LENGTH) { 771 return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH); 772 } 773 return buf.GetByteString(); 774 } 775 parlevel --; 776 buf.AppendChar(')'); 777 } else if (ch == '(') { 778 parlevel ++; 779 buf.AppendChar('('); 780 } else if (ch == '\\') { 781 status = 1; 782 } else { 783 buf.AppendChar((char)ch); 784 } 785 break; 786 case 1: 787 if (ch >= '0' && ch <= '7') { 788 iEscCode = ch - '0'; 789 status = 2; 790 break; 791 } 792 if (ch == 'n') { 793 buf.AppendChar('\n'); 794 } else if (ch == 'r') { 795 buf.AppendChar('\r'); 796 } else if (ch == 't') { 797 buf.AppendChar('\t'); 798 } else if (ch == 'b') { 799 buf.AppendChar('\b'); 800 } else if (ch == 'f') { 801 buf.AppendChar('\f'); 802 } else if (ch == '\r') { 803 status = 4; 804 break; 805 } else if (ch == '\n') { 806 } else { 807 buf.AppendChar(ch); 808 } 809 status = 0; 810 break; 811 case 2: 812 if (ch >= '0' && ch <= '7') { 813 iEscCode = iEscCode * 8 + ch - '0'; 814 status = 3; 815 } else { 816 buf.AppendChar(iEscCode); 817 status = 0; 818 continue; 819 } 820 break; 821 case 3: 822 if (ch >= '0' && ch <= '7') { 823 iEscCode = iEscCode * 8 + ch - '0'; 824 buf.AppendChar(iEscCode); 825 status = 0; 826 } else { 827 buf.AppendChar(iEscCode); 828 status = 0; 829 continue; 830 } 831 break; 832 case 4: 833 status = 0; 834 if (ch != '\n') { 835 continue; 836 } 837 break; 838 } 839 if (m_Size <= m_Pos) { 840 break; 841 } 842 ch = m_pBuf[m_Pos++]; 843 } 844 if (m_Size > m_Pos) { 845 ch = m_pBuf[m_Pos++]; 846 } 847 if (buf.GetLength() > MAX_STRING_LENGTH) { 848 return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH); 849 } 850 return buf.GetByteString(); 851 } 852 CFX_ByteString CPDF_StreamParser::ReadHexString() 853 { 854 if (m_Size <= m_Pos) { 855 return CFX_ByteString(); 856 } 857 int ch = m_pBuf[m_Pos++]; 858 CFX_ByteTextBuf buf; 859 FX_BOOL bFirst = TRUE; 860 int code = 0; 861 while (1) { 862 if (ch == '>') { 863 break; 864 } 865 if (ch >= '0' && ch <= '9') { 866 if (bFirst) { 867 code = (ch - '0') * 16; 868 } else { 869 code += ch - '0'; 870 buf.AppendChar((char)code); 871 } 872 bFirst = !bFirst; 873 } else if (ch >= 'A' && ch <= 'F') { 874 if (bFirst) { 875 code = (ch - 'A' + 10) * 16; 876 } else { 877 code += ch - 'A' + 10; 878 buf.AppendChar((char)code); 879 } 880 bFirst = !bFirst; 881 } else if (ch >= 'a' && ch <= 'f') { 882 if (bFirst) { 883 code = (ch - 'a' + 10) * 16; 884 } else { 885 code += ch - 'a' + 10; 886 buf.AppendChar((char)code); 887 } 888 bFirst = !bFirst; 889 } 890 if (m_Size <= m_Pos) { 891 break; 892 } 893 ch = m_pBuf[m_Pos++]; 894 } 895 if (!bFirst) { 896 buf.AppendChar((char)code); 897 } 898 if (buf.GetLength() > MAX_STRING_LENGTH) { 899 return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH); 900 } 901 return buf.GetByteString(); 902 } 903 #define PAGEPARSE_STAGE_GETCONTENT 1 904 #define PAGEPARSE_STAGE_PARSE 2 905 #define PAGEPARSE_STAGE_CHECKCLIP 3 906 CPDF_ContentParser::CPDF_ContentParser() 907 { 908 m_pParser = NULL; 909 m_pStreamArray = NULL; 910 m_pSingleStream = NULL; 911 m_pData = NULL; 912 m_Status = Ready; 913 m_pType3Char = NULL; 914 } 915 CPDF_ContentParser::~CPDF_ContentParser() 916 { 917 Clear(); 918 } 919 void CPDF_ContentParser::Clear() 920 { 921 if (m_pParser) { 922 delete m_pParser; 923 } 924 if (m_pSingleStream) { 925 delete m_pSingleStream; 926 } 927 if (m_pStreamArray) { 928 for (FX_DWORD i = 0; i < m_nStreams; i ++) 929 if (m_pStreamArray[i]) { 930 delete m_pStreamArray[i]; 931 } 932 FX_Free(m_pStreamArray); 933 } 934 if (m_pData && m_pSingleStream == NULL) { 935 FX_Free((void*)m_pData); 936 } 937 m_pParser = NULL; 938 m_pStreamArray = NULL; 939 m_pSingleStream = NULL; 940 m_pData = NULL; 941 m_Status = Ready; 942 } 943 void CPDF_ContentParser::Start(CPDF_Page* pPage, CPDF_ParseOptions* pOptions) 944 { 945 if (m_Status != Ready || pPage == NULL || pPage->m_pDocument == NULL || pPage->m_pFormDict == NULL) { 946 m_Status = Done; 947 return; 948 } 949 m_pObjects = pPage; 950 m_bForm = FALSE; 951 if (pOptions) { 952 m_Options = *pOptions; 953 } 954 m_Status = ToBeContinued; 955 m_InternalStage = PAGEPARSE_STAGE_GETCONTENT; 956 m_CurrentOffset = 0; 957 CPDF_Object* pContent = pPage->m_pFormDict->GetElementValue(FX_BSTRC("Contents")); 958 if (pContent == NULL) { 959 m_Status = Done; 960 return; 961 } 962 if (pContent->GetType() == PDFOBJ_STREAM) { 963 m_nStreams = 0; 964 m_pSingleStream = new CPDF_StreamAcc; 965 m_pSingleStream->LoadAllData((CPDF_Stream*)pContent, FALSE); 966 } else if (pContent->GetType() == PDFOBJ_ARRAY) { 967 CPDF_Array* pArray = (CPDF_Array*)pContent; 968 m_nStreams = pArray->GetCount(); 969 if (m_nStreams == 0) { 970 m_Status = Done; 971 return; 972 } 973 m_pStreamArray = FX_Alloc(CPDF_StreamAcc*, m_nStreams); 974 } else { 975 m_Status = Done; 976 return; 977 } 978 } 979 void CPDF_ContentParser::Start(CPDF_Form* pForm, CPDF_AllStates* pGraphicStates, 980 CFX_AffineMatrix* pParentMatrix, CPDF_Type3Char* pType3Char, CPDF_ParseOptions* pOptions, int level) 981 { 982 m_pType3Char = pType3Char; 983 m_pObjects = pForm; 984 m_bForm = TRUE; 985 CFX_AffineMatrix form_matrix = pForm->m_pFormDict->GetMatrix(FX_BSTRC("Matrix")); 986 if (pGraphicStates) { 987 form_matrix.Concat(pGraphicStates->m_CTM); 988 } 989 CPDF_Array* pBBox = pForm->m_pFormDict->GetArray(FX_BSTRC("BBox")); 990 CFX_FloatRect form_bbox; 991 CPDF_Path ClipPath; 992 if (pBBox) { 993 form_bbox = pBBox->GetRect(); 994 ClipPath.New(); 995 ClipPath.AppendRect(form_bbox.left, form_bbox.bottom, form_bbox.right, form_bbox.top); 996 ClipPath.Transform(&form_matrix); 997 if (pParentMatrix) { 998 ClipPath.Transform(pParentMatrix); 999 } 1000 form_bbox.Transform(&form_matrix); 1001 if (pParentMatrix) { 1002 form_bbox.Transform(pParentMatrix); 1003 } 1004 } 1005 CPDF_Dictionary* pResources = pForm->m_pFormDict->GetDict(FX_BSTRC("Resources")); 1006 m_pParser = new CPDF_StreamContentParser; 1007 m_pParser->Initialize(); 1008 m_pParser->PrepareParse(pForm->m_pDocument, pForm->m_pPageResources, pForm->m_pResources, pParentMatrix, pForm, 1009 pResources, &form_bbox, pOptions, pGraphicStates, level); 1010 m_pParser->m_pCurStates->m_CTM = form_matrix; 1011 m_pParser->m_pCurStates->m_ParentMatrix = form_matrix; 1012 if (ClipPath.NotNull()) { 1013 m_pParser->m_pCurStates->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING, TRUE); 1014 } 1015 if (pForm->m_Transparency & PDFTRANS_GROUP) { 1016 CPDF_GeneralStateData* pData = m_pParser->m_pCurStates->m_GeneralState.GetModify(); 1017 pData->m_BlendType = FXDIB_BLEND_NORMAL; 1018 pData->m_StrokeAlpha = 1.0f; 1019 pData->m_FillAlpha = 1.0f; 1020 pData->m_pSoftMask = NULL; 1021 } 1022 m_nStreams = 0; 1023 m_pSingleStream = new CPDF_StreamAcc; 1024 if (pForm->m_pDocument) { 1025 m_pSingleStream->LoadAllData(pForm->m_pFormStream, FALSE); 1026 } else { 1027 m_pSingleStream->LoadAllData(pForm->m_pFormStream, FALSE); 1028 } 1029 m_pData = (FX_LPBYTE)m_pSingleStream->GetData(); 1030 m_Size = m_pSingleStream->GetSize(); 1031 m_Status = ToBeContinued; 1032 m_InternalStage = PAGEPARSE_STAGE_PARSE; 1033 m_CurrentOffset = 0; 1034 } 1035 void CPDF_ContentParser::Continue(IFX_Pause* pPause) 1036 { 1037 int steps = 0; 1038 while (m_Status == ToBeContinued) { 1039 if (m_InternalStage == PAGEPARSE_STAGE_GETCONTENT) { 1040 if (m_CurrentOffset == m_nStreams) { 1041 if (m_pStreamArray) { 1042 m_Size = 0; 1043 FX_DWORD i; 1044 for (i = 0; i < m_nStreams; i ++) { 1045 FX_DWORD size = m_pStreamArray[i]->GetSize(); 1046 if (m_Size + size + 1 <= m_Size) { 1047 m_Status = Done; 1048 return; 1049 } 1050 m_Size += size + 1; 1051 } 1052 m_pData = FX_Alloc(FX_BYTE, m_Size); 1053 FX_DWORD pos = 0; 1054 for (i = 0; i < m_nStreams; i ++) { 1055 FXSYS_memcpy32(m_pData + pos, m_pStreamArray[i]->GetData(), m_pStreamArray[i]->GetSize()); 1056 pos += m_pStreamArray[i]->GetSize() + 1; 1057 m_pData[pos - 1] = ' '; 1058 delete m_pStreamArray[i]; 1059 } 1060 FX_Free(m_pStreamArray); 1061 m_pStreamArray = NULL; 1062 } else { 1063 m_pData = (FX_LPBYTE)m_pSingleStream->GetData(); 1064 m_Size = m_pSingleStream->GetSize(); 1065 } 1066 m_InternalStage = PAGEPARSE_STAGE_PARSE; 1067 m_CurrentOffset = 0; 1068 } else { 1069 CPDF_Array* pContent = m_pObjects->m_pFormDict->GetArray(FX_BSTRC("Contents")); 1070 m_pStreamArray[m_CurrentOffset] = new CPDF_StreamAcc; 1071 CPDF_Stream* pStreamObj = (CPDF_Stream*)(pContent ? pContent->GetElementValue(m_CurrentOffset) : NULL); 1072 m_pStreamArray[m_CurrentOffset]->LoadAllData(pStreamObj, FALSE); 1073 m_CurrentOffset ++; 1074 } 1075 } 1076 if (m_InternalStage == PAGEPARSE_STAGE_PARSE) { 1077 if (m_pParser == NULL) { 1078 m_pParser = new CPDF_StreamContentParser; 1079 m_pParser->Initialize(); 1080 m_pParser->PrepareParse(m_pObjects->m_pDocument, m_pObjects->m_pPageResources, NULL, NULL, m_pObjects, 1081 m_pObjects->m_pResources, &m_pObjects->m_BBox, &m_Options, NULL, 0); 1082 m_pParser->m_pCurStates->m_ColorState.GetModify()->Default(); 1083 } 1084 if (m_CurrentOffset >= m_Size) { 1085 m_InternalStage = PAGEPARSE_STAGE_CHECKCLIP; 1086 } else { 1087 m_CurrentOffset += m_pParser->Parse(m_pData + m_CurrentOffset, m_Size - m_CurrentOffset, PARSE_STEP_LIMIT); 1088 if (m_pParser->m_bAbort) { 1089 m_InternalStage = PAGEPARSE_STAGE_CHECKCLIP; 1090 continue; 1091 } 1092 } 1093 } 1094 if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) { 1095 if (m_pType3Char) { 1096 m_pType3Char->m_bColored = m_pParser->m_bColored; 1097 m_pType3Char->m_Width = FXSYS_round(m_pParser->m_Type3Data[0] * 1000); 1098 m_pType3Char->m_BBox.left = FXSYS_round(m_pParser->m_Type3Data[2] * 1000); 1099 m_pType3Char->m_BBox.bottom = FXSYS_round(m_pParser->m_Type3Data[3] * 1000); 1100 m_pType3Char->m_BBox.right = FXSYS_round(m_pParser->m_Type3Data[4] * 1000); 1101 m_pType3Char->m_BBox.top = FXSYS_round(m_pParser->m_Type3Data[5] * 1000); 1102 } 1103 FX_POSITION pos = m_pObjects->m_ObjectList.GetHeadPosition(); 1104 while (pos) { 1105 CPDF_PageObject* pObj = (CPDF_PageObject*)m_pObjects->m_ObjectList.GetNext(pos); 1106 if (pObj->m_ClipPath.IsNull()) { 1107 continue; 1108 } 1109 if (pObj->m_ClipPath.GetPathCount() != 1) { 1110 continue; 1111 } 1112 if (pObj->m_ClipPath.GetTextCount()) { 1113 continue; 1114 } 1115 CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0); 1116 if (!ClipPath.IsRect() || pObj->m_Type == PDFPAGE_SHADING) { 1117 continue; 1118 } 1119 CFX_FloatRect old_rect(ClipPath.GetPointX(0), ClipPath.GetPointY(0), 1120 ClipPath.GetPointX(2), ClipPath.GetPointY(2)); 1121 CFX_FloatRect obj_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, pObj->m_Top); 1122 if (old_rect.Contains(obj_rect)) { 1123 pObj->m_ClipPath.SetNull(); 1124 } 1125 } 1126 m_Status = Done; 1127 return; 1128 } 1129 steps ++; 1130 if (pPause && pPause->NeedToPauseNow()) { 1131 break; 1132 } 1133 } 1134 } 1135 int CPDF_ContentParser::EstimateProgress() 1136 { 1137 if (m_Status == Ready) { 1138 return 0; 1139 } 1140 if (m_Status == Done) { 1141 return 100; 1142 } 1143 if (m_InternalStage == PAGEPARSE_STAGE_GETCONTENT) { 1144 return 10; 1145 } 1146 if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) { 1147 return 90; 1148 } 1149 return 10 + 80 * m_CurrentOffset / m_Size; 1150 } 1151