1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "../../../include/fpdfapi/fpdf_page.h" 8 #include "../../../include/fpdfapi/fpdf_module.h" 9 #include "../../../include/fxcodec/fx_codec.h" 10 #include "pageint.h" 11 #include <limits.h> 12 extern const FX_LPCSTR _PDF_OpCharType = 13 "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" 14 "IIVIIIIVIIVIIIIIVVIIIIIIIIIIIIII" 15 "IIVVVVVVIVVVVVVIVVVVVIIVVIIIIIII" 16 "IIVVVVVVVVVVVVVVIVVVIIVVIVVIIIII" 17 "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" 18 "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" 19 "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII" 20 "IIIIIIIIIIIIIIIIIIIIIIIIIIIIIIII"; 21 FX_BOOL _PDF_HasInvalidOpChar(FX_LPCSTR op) 22 { 23 if(!op) { 24 return FALSE; 25 } 26 FX_BYTE ch; 27 while((ch = *op++)) { 28 if(_PDF_OpCharType[ch] == 'I') { 29 return TRUE; 30 } 31 } 32 return FALSE; 33 } 34 FX_DWORD CPDF_StreamContentParser::Parse(FX_LPCBYTE pData, FX_DWORD dwSize, FX_DWORD max_cost) 35 { 36 if (m_Level > _FPDF_MAX_FORM_LEVEL_) { 37 return dwSize; 38 } 39 FX_DWORD InitObjCount = m_pObjectList->CountObjects(); 40 CPDF_StreamParser syntax(pData, dwSize); 41 m_pSyntax = &syntax; 42 m_CompatCount = 0; 43 while (1) { 44 FX_DWORD cost = m_pObjectList->CountObjects() - InitObjCount; 45 if (max_cost && cost >= max_cost) { 46 break; 47 } 48 switch (syntax.ParseNextElement()) { 49 case CPDF_StreamParser::EndOfData: 50 return m_pSyntax->GetPos(); 51 case CPDF_StreamParser::Keyword: 52 if(!OnOperator((char*)syntax.GetWordBuf()) && _PDF_HasInvalidOpChar((char*)syntax.GetWordBuf())) { 53 m_bAbort = TRUE; 54 } 55 if (m_bAbort) { 56 return m_pSyntax->GetPos(); 57 } 58 ClearAllParams(); 59 break; 60 case CPDF_StreamParser::Number: 61 AddNumberParam((char*)syntax.GetWordBuf(), syntax.GetWordSize()); 62 break; 63 case CPDF_StreamParser::Name: 64 AddNameParam((FX_LPCSTR)syntax.GetWordBuf() + 1, syntax.GetWordSize() - 1); 65 break; 66 default: 67 AddObjectParam(syntax.GetObject()); 68 } 69 } 70 return m_pSyntax->GetPos(); 71 } 72 void _PDF_ReplaceAbbr(CPDF_Object* pObj); 73 void CPDF_StreamContentParser::Handle_BeginImage() 74 { 75 FX_FILESIZE savePos = m_pSyntax->GetPos(); 76 CPDF_Dictionary* pDict = CPDF_Dictionary::Create(); 77 while (1) { 78 CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement(); 79 if (type == CPDF_StreamParser::Keyword) { 80 CFX_ByteString bsKeyword(m_pSyntax->GetWordBuf(), m_pSyntax->GetWordSize()); 81 if (bsKeyword != FX_BSTRC("ID")) { 82 m_pSyntax->SetPos(savePos); 83 pDict->Release(); 84 return; 85 } 86 } 87 if (type != CPDF_StreamParser::Name) { 88 break; 89 } 90 CFX_ByteString key((FX_LPCSTR)m_pSyntax->GetWordBuf() + 1, m_pSyntax->GetWordSize() - 1); 91 CPDF_Object* pObj = m_pSyntax->ReadNextObject(); 92 if (!key.IsEmpty()) { 93 pDict->SetAt(key, pObj, m_pDocument); 94 } else { 95 pObj->Release(); 96 } 97 } 98 _PDF_ReplaceAbbr(pDict); 99 CPDF_Object* pCSObj = NULL; 100 if (pDict->KeyExist(FX_BSTRC("ColorSpace"))) { 101 pCSObj = pDict->GetElementValue(FX_BSTRC("ColorSpace")); 102 if (pCSObj->GetType() == PDFOBJ_NAME) { 103 CFX_ByteString name = pCSObj->GetString(); 104 if (name != FX_BSTRC("DeviceRGB") && name != FX_BSTRC("DeviceGray") && name != FX_BSTRC("DeviceCMYK")) { 105 pCSObj = FindResourceObj(FX_BSTRC("ColorSpace"), name); 106 if (pCSObj && !pCSObj->GetObjNum()) { 107 pCSObj = pCSObj->Clone(); 108 pDict->SetAt(FX_BSTRC("ColorSpace"), pCSObj, m_pDocument); 109 } 110 } 111 } 112 } 113 CPDF_Stream* pStream = m_pSyntax->ReadInlineStream(m_pDocument, pDict, pCSObj, m_Options.m_bDecodeInlineImage); 114 while (1) { 115 CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement(); 116 if (type == CPDF_StreamParser::EndOfData) { 117 break; 118 } 119 if (type != CPDF_StreamParser::Keyword) { 120 continue; 121 } 122 if (m_pSyntax->GetWordSize() == 2 && m_pSyntax->GetWordBuf()[0] == 'E' && 123 m_pSyntax->GetWordBuf()[1] == 'I') { 124 break; 125 } 126 } 127 if (m_Options.m_bTextOnly) { 128 if (pStream) { 129 pStream->Release(); 130 } else { 131 pDict->Release(); 132 } 133 return; 134 } 135 pDict->SetAtName(FX_BSTRC("Subtype"), FX_BSTRC("Image")); 136 CPDF_ImageObject *pImgObj = AddImage(pStream, NULL, TRUE); 137 if (!pImgObj) { 138 if (pStream) { 139 pStream->Release(); 140 } else { 141 pDict->Release(); 142 } 143 } 144 } 145 void CPDF_StreamContentParser::ParsePathObject() 146 { 147 FX_FLOAT params[6] = {0}; 148 int nParams = 0; 149 int last_pos = m_pSyntax->GetPos(); 150 while (1) { 151 CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement(); 152 FX_BOOL bProcessed = TRUE; 153 switch (type) { 154 case CPDF_StreamParser::EndOfData: 155 return; 156 case CPDF_StreamParser::Keyword: { 157 int len = m_pSyntax->GetWordSize(); 158 if (len == 1) { 159 switch (m_pSyntax->GetWordBuf()[0]) { 160 case 'm': 161 AddPathPoint(params[0], params[1], FXPT_MOVETO); 162 nParams = 0; 163 break; 164 case 'l': 165 AddPathPoint(params[0], params[1], FXPT_LINETO); 166 nParams = 0; 167 break; 168 case 'c': 169 AddPathPoint(params[0], params[1], FXPT_BEZIERTO); 170 AddPathPoint(params[2], params[3], FXPT_BEZIERTO); 171 AddPathPoint(params[4], params[5], FXPT_BEZIERTO); 172 nParams = 0; 173 break; 174 case 'v': 175 AddPathPoint(m_PathCurrentX, m_PathCurrentY, FXPT_BEZIERTO); 176 AddPathPoint(params[0], params[1], FXPT_BEZIERTO); 177 AddPathPoint(params[2], params[3], FXPT_BEZIERTO); 178 nParams = 0; 179 break; 180 case 'y': 181 AddPathPoint(params[0], params[1], FXPT_BEZIERTO); 182 AddPathPoint(params[2], params[3], FXPT_BEZIERTO); 183 AddPathPoint(params[2], params[3], FXPT_BEZIERTO); 184 nParams = 0; 185 break; 186 case 'h': 187 Handle_ClosePath(); 188 nParams = 0; 189 break; 190 default: 191 bProcessed = FALSE; 192 break; 193 } 194 } else if (len == 2) { 195 if (m_pSyntax->GetWordBuf()[0] == 'r' && m_pSyntax->GetWordBuf()[1] == 'e') { 196 AddPathRect(params[0], params[1], params[2], params[3]); 197 nParams = 0; 198 } else { 199 bProcessed = FALSE; 200 } 201 } else { 202 bProcessed = FALSE; 203 } 204 if (bProcessed) { 205 last_pos = m_pSyntax->GetPos(); 206 } 207 break; 208 } 209 case CPDF_StreamParser::Number: { 210 if (nParams == 6) { 211 break; 212 } 213 FX_BOOL bInteger; 214 int value; 215 FX_atonum(CFX_ByteStringC(m_pSyntax->GetWordBuf(), m_pSyntax->GetWordSize()), bInteger, &value); 216 params[nParams++] = bInteger ? (FX_FLOAT)value : *(FX_FLOAT*)&value; 217 break; 218 } 219 default: 220 bProcessed = FALSE; 221 } 222 if (!bProcessed) { 223 m_pSyntax->SetPos(last_pos); 224 return; 225 } 226 } 227 } 228 CPDF_StreamParser::CPDF_StreamParser(const FX_BYTE* pData, FX_DWORD dwSize) 229 { 230 m_pBuf = pData; 231 m_Size = dwSize; 232 m_Pos = 0; 233 m_pLastObj = NULL; 234 } 235 CPDF_StreamParser::~CPDF_StreamParser() 236 { 237 if (m_pLastObj) { 238 m_pLastObj->Release(); 239 } 240 } 241 FX_DWORD _DecodeAllScanlines(ICodec_ScanlineDecoder* pDecoder, FX_LPBYTE& dest_buf, FX_DWORD& dest_size) 242 { 243 if (pDecoder == NULL) { 244 return (FX_DWORD) - 1; 245 } 246 int ncomps = pDecoder->CountComps(); 247 int bpc = pDecoder->GetBPC(); 248 int width = pDecoder->GetWidth(); 249 int height = pDecoder->GetHeight(); 250 int pitch = (width * ncomps * bpc + 7) / 8; 251 if (height == 0 || pitch > (1 << 30) / height) { 252 delete pDecoder; 253 return -1; 254 } 255 dest_size = pitch * height; 256 dest_buf = FX_Alloc( FX_BYTE, dest_size); 257 for (int row = 0; row < height; row ++) { 258 FX_LPBYTE pLine = pDecoder->GetScanline(row); 259 if (pLine == NULL) { 260 break; 261 } 262 FXSYS_memcpy32(dest_buf + row * pitch, pLine, pitch); 263 } 264 FX_DWORD srcoff = pDecoder->GetSrcOffset(); 265 delete pDecoder; 266 return srcoff; 267 } 268 ICodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder(FX_LPCBYTE src_buf, FX_DWORD src_size, int width, int height, 269 const CPDF_Dictionary* pParams); 270 FX_DWORD _A85Decode(const FX_BYTE* src_buf, FX_DWORD src_size, FX_LPBYTE& dest_buf, FX_DWORD& dest_size); 271 FX_DWORD _HexDecode(const FX_BYTE* src_buf, FX_DWORD src_size, FX_LPBYTE& dest_buf, FX_DWORD& dest_size); 272 FX_DWORD FPDFAPI_FlateOrLZWDecode(FX_BOOL bLZW, const FX_BYTE* src_buf, FX_DWORD src_size, CPDF_Dictionary* pParams, 273 FX_DWORD estimated_size, FX_LPBYTE& dest_buf, FX_DWORD& dest_size); 274 FX_DWORD PDF_DecodeInlineStream(const FX_BYTE* src_buf, FX_DWORD limit, 275 int width, int height, CFX_ByteString& decoder, 276 CPDF_Dictionary* pParam, FX_LPBYTE& dest_buf, FX_DWORD& dest_size) 277 { 278 if (decoder == FX_BSTRC("CCITTFaxDecode") || decoder == FX_BSTRC("CCF")) { 279 ICodec_ScanlineDecoder* pDecoder = FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam); 280 return _DecodeAllScanlines(pDecoder, dest_buf, dest_size); 281 } else if (decoder == FX_BSTRC("ASCII85Decode") || decoder == FX_BSTRC("A85")) { 282 return _A85Decode(src_buf, limit, dest_buf, dest_size); 283 } else if (decoder == FX_BSTRC("ASCIIHexDecode") || decoder == FX_BSTRC("AHx")) { 284 return _HexDecode(src_buf, limit, dest_buf, dest_size); 285 } else if (decoder == FX_BSTRC("FlateDecode") || decoder == FX_BSTRC("Fl")) { 286 return FPDFAPI_FlateOrLZWDecode(FALSE, src_buf, limit, pParam, dest_size, dest_buf, dest_size); 287 } else if (decoder == FX_BSTRC("LZWDecode") || decoder == FX_BSTRC("LZW")) { 288 return FPDFAPI_FlateOrLZWDecode(TRUE, src_buf, limit, pParam, 0, dest_buf, dest_size); 289 } else if (decoder == FX_BSTRC("DCTDecode") || decoder == FX_BSTRC("DCT")) { 290 ICodec_ScanlineDecoder* pDecoder = CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder( 291 src_buf, limit, width, height, 0, pParam ? pParam->GetInteger(FX_BSTRC("ColorTransform"), 1) : 1); 292 return _DecodeAllScanlines(pDecoder, dest_buf, dest_size); 293 } else if (decoder == FX_BSTRC("RunLengthDecode") || decoder == FX_BSTRC("RL")) { 294 return RunLengthDecode(src_buf, limit, dest_buf, dest_size); 295 } 296 dest_size = 0; 297 dest_buf = 0; 298 return (FX_DWORD) - 1; 299 } 300 extern const FX_LPCSTR _PDF_CharType; 301 CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc, CPDF_Dictionary* pDict, CPDF_Object* pCSObj, FX_BOOL bDecode) 302 { 303 if (m_Pos == m_Size) { 304 return NULL; 305 } 306 if (_PDF_CharType[m_pBuf[m_Pos]] == 'W') { 307 m_Pos ++; 308 } 309 CFX_ByteString Decoder; 310 CPDF_Dictionary* pParam = NULL; 311 CPDF_Object* pFilter = pDict->GetElementValue(FX_BSTRC("Filter")); 312 if (pFilter == NULL) { 313 } else if (pFilter->GetType() == PDFOBJ_ARRAY) { 314 Decoder = ((CPDF_Array*)pFilter)->GetString(0); 315 CPDF_Array* pParams = pDict->GetArray(FX_BSTRC("DecodeParms")); 316 if (pParams) { 317 pParam = pParams->GetDict(0); 318 } 319 } else { 320 Decoder = pFilter->GetString(); 321 pParam = pDict->GetDict(FX_BSTRC("DecodeParms")); 322 } 323 FX_DWORD width = pDict->GetInteger(FX_BSTRC("Width")); 324 FX_DWORD height = pDict->GetInteger(FX_BSTRC("Height")); 325 FX_DWORD OrigSize = 0; 326 if (pCSObj != NULL) { 327 FX_DWORD bpc = pDict->GetInteger(FX_BSTRC("BitsPerComponent")); 328 FX_DWORD nComponents = 1; 329 CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj); 330 if (pCS == NULL) { 331 nComponents = 3; 332 } else { 333 nComponents = pCS->CountComponents(); 334 pDoc->GetPageData()->ReleaseColorSpace(pCSObj); 335 } 336 FX_DWORD pitch = width; 337 if (bpc && pitch > INT_MAX / bpc) { 338 return NULL; 339 } 340 pitch *= bpc; 341 if (nComponents && pitch > INT_MAX / nComponents) { 342 return NULL; 343 } 344 pitch *= nComponents; 345 if (pitch > INT_MAX - 7) { 346 return NULL; 347 } 348 pitch += 7; 349 pitch /= 8; 350 OrigSize = pitch; 351 } else { 352 if (width > INT_MAX - 7) { 353 return NULL; 354 } 355 OrigSize = ((width + 7) / 8); 356 } 357 if (height && OrigSize > INT_MAX / height) { 358 return NULL; 359 } 360 OrigSize *= height; 361 FX_LPBYTE pData = NULL; 362 FX_DWORD dwStreamSize; 363 if (Decoder.IsEmpty()) { 364 if (OrigSize > m_Size - m_Pos) { 365 OrigSize = m_Size - m_Pos; 366 } 367 pData = FX_Alloc(FX_BYTE, OrigSize); 368 FXSYS_memcpy32(pData, m_pBuf + m_Pos, OrigSize); 369 dwStreamSize = OrigSize; 370 m_Pos += OrigSize; 371 } else { 372 FX_DWORD dwDestSize = OrigSize; 373 dwStreamSize = PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height, Decoder, pParam, 374 pData, dwDestSize); 375 if ((int)dwStreamSize < 0) { 376 return NULL; 377 } 378 if (bDecode) { 379 m_Pos += dwStreamSize; 380 dwStreamSize = dwDestSize; 381 if (pFilter->GetType() == PDFOBJ_ARRAY) { 382 ((CPDF_Array*)pFilter)->RemoveAt(0); 383 CPDF_Array* pParams = pDict->GetArray(FX_BSTRC("DecodeParms")); 384 if (pParams) { 385 pParams->RemoveAt(0); 386 } 387 } else { 388 pDict->RemoveAt(FX_BSTRC("Filter")); 389 pDict->RemoveAt(FX_BSTRC("DecodeParms")); 390 } 391 } else { 392 if (pData) { 393 FX_Free(pData); 394 } 395 FX_DWORD dwSavePos = m_Pos; 396 m_Pos += dwStreamSize; 397 while (1) { 398 FX_DWORD dwPrevPos = m_Pos; 399 CPDF_StreamParser::SyntaxType type = ParseNextElement(); 400 if (type == CPDF_StreamParser::EndOfData) { 401 break; 402 } 403 if (type != CPDF_StreamParser::Keyword) { 404 dwStreamSize += m_Pos - dwPrevPos; 405 continue; 406 } 407 if (GetWordSize() == 2 && GetWordBuf()[0] == 'E' && 408 GetWordBuf()[1] == 'I') { 409 m_Pos = dwPrevPos; 410 break; 411 } 412 dwStreamSize += m_Pos - dwPrevPos; 413 } 414 m_Pos = dwSavePos; 415 pData = FX_Alloc(FX_BYTE, dwStreamSize); 416 FXSYS_memcpy32(pData, m_pBuf + m_Pos, dwStreamSize); 417 m_Pos += dwStreamSize; 418 } 419 } 420 pDict->SetAtInteger(FX_BSTRC("Length"), (int)dwStreamSize); 421 return CPDF_Stream::Create(pData, dwStreamSize, pDict); 422 } 423 #define MAX_WORD_BUFFER 256 424 #define MAX_STRING_LENGTH 32767 425 #define FXDWORD_TRUE FXDWORD_FROM_LSBFIRST(0x65757274) 426 #define FXDWORD_NULL FXDWORD_FROM_LSBFIRST(0x6c6c756e) 427 #define FXDWORD_FALS FXDWORD_FROM_LSBFIRST(0x736c6166) 428 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() 429 { 430 if (m_pLastObj) { 431 m_pLastObj->Release(); 432 m_pLastObj = NULL; 433 } 434 m_WordSize = 0; 435 FX_BOOL bIsNumber = TRUE; 436 if (m_Pos >= m_Size) { 437 return EndOfData; 438 } 439 int ch = m_pBuf[m_Pos++]; 440 int type = _PDF_CharType[ch]; 441 while (1) { 442 while (type == 'W') { 443 if (m_Size <= m_Pos) { 444 return EndOfData; 445 } 446 ch = m_pBuf[m_Pos++]; 447 type = _PDF_CharType[ch]; 448 } 449 if (ch != '%') { 450 break; 451 } 452 while (1) { 453 if (m_Size <= m_Pos) { 454 return EndOfData; 455 } 456 ch = m_pBuf[m_Pos++]; 457 if (ch == '\r' || ch == '\n') { 458 break; 459 } 460 } 461 type = _PDF_CharType[ch]; 462 } 463 if (type == 'D' && ch != '/') { 464 m_Pos --; 465 m_pLastObj = ReadNextObject(); 466 return Others; 467 } 468 while (1) { 469 if (m_WordSize < MAX_WORD_BUFFER) { 470 m_WordBuffer[m_WordSize++] = ch; 471 } 472 if (type != 'N') { 473 bIsNumber = FALSE; 474 } 475 if (m_Size <= m_Pos) { 476 break; 477 } 478 ch = m_pBuf[m_Pos++]; 479 type = _PDF_CharType[ch]; 480 if (type == 'D' || type == 'W') { 481 m_Pos --; 482 break; 483 } 484 } 485 m_WordBuffer[m_WordSize] = 0; 486 if (bIsNumber) { 487 return Number; 488 } 489 if (m_WordBuffer[0] == '/') { 490 return Name; 491 } 492 if (m_WordSize == 4) { 493 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) { 494 m_pLastObj = CPDF_Boolean::Create(TRUE); 495 return Others; 496 } 497 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) { 498 m_pLastObj = CPDF_Null::Create(); 499 return Others; 500 } 501 } else if (m_WordSize == 5) { 502 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') { 503 m_pLastObj = CPDF_Boolean::Create(FALSE); 504 return Others; 505 } 506 } 507 return Keyword; 508 } 509 void CPDF_StreamParser::SkipPathObject() 510 { 511 FX_DWORD command_startpos = m_Pos; 512 if (m_Pos >= m_Size) { 513 return; 514 } 515 int ch = m_pBuf[m_Pos++]; 516 int type = _PDF_CharType[ch]; 517 while (1) { 518 while (type == 'W') { 519 if (m_Pos >= m_Size) { 520 return; 521 } 522 ch = m_pBuf[m_Pos++]; 523 type = _PDF_CharType[ch]; 524 } 525 if (type != 'N') { 526 m_Pos = command_startpos; 527 return; 528 } 529 while (1) { 530 while (type != 'W') { 531 if (m_Pos >= m_Size) { 532 return; 533 } 534 ch = m_pBuf[m_Pos++]; 535 type = _PDF_CharType[ch]; 536 } 537 while (type == 'W') { 538 if (m_Pos >= m_Size) { 539 return; 540 } 541 ch = m_pBuf[m_Pos++]; 542 type = _PDF_CharType[ch]; 543 } 544 if (type == 'N') { 545 continue; 546 } 547 FX_DWORD op_startpos = m_Pos - 1; 548 while (type != 'W' && type != 'D') { 549 if (m_Pos >= m_Size) { 550 return; 551 } 552 ch = m_pBuf[m_Pos++]; 553 type = _PDF_CharType[ch]; 554 } 555 if (m_Pos - op_startpos == 2) { 556 int op = m_pBuf[op_startpos]; 557 if (op == 'm' || op == 'l' || op == 'c' || op == 'v' || op == 'y') { 558 command_startpos = m_Pos; 559 break; 560 } 561 } else if (m_Pos - op_startpos == 3) { 562 if (m_pBuf[op_startpos] == 'r' && m_pBuf[op_startpos + 1] == 'e') { 563 command_startpos = m_Pos; 564 break; 565 } 566 } 567 m_Pos = command_startpos; 568 return; 569 } 570 } 571 } 572 CPDF_Object* CPDF_StreamParser::ReadNextObject(FX_BOOL bAllowNestedArray, FX_BOOL bInArray) 573 { 574 FX_BOOL bIsNumber; 575 GetNextWord(bIsNumber); 576 if (m_WordSize == 0) { 577 return NULL; 578 } 579 if (bIsNumber) { 580 m_WordBuffer[m_WordSize] = 0; 581 return CPDF_Number::Create(CFX_ByteStringC(m_WordBuffer, m_WordSize)); 582 } 583 int first_char = m_WordBuffer[0]; 584 if (first_char == '/') { 585 return CPDF_Name::Create(PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1))); 586 } 587 if (first_char == '(') { 588 return CPDF_String::Create(ReadString()); 589 } 590 if (first_char == '<') { 591 if (m_WordSize == 1) { 592 return CPDF_String::Create(ReadHexString(), TRUE); 593 } 594 CPDF_Dictionary* pDict = CPDF_Dictionary::Create(); 595 while (1) { 596 GetNextWord(bIsNumber); 597 if (m_WordSize == 0) { 598 pDict->Release(); 599 return NULL; 600 } 601 if (m_WordSize == 2 && m_WordBuffer[0] == '>') { 602 break; 603 } 604 if (m_WordBuffer[0] != '/') { 605 pDict->Release(); 606 return NULL; 607 } 608 CFX_ByteString key = PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)); 609 CPDF_Object* pObj = ReadNextObject(TRUE); 610 if (pObj == NULL) { 611 if (pDict) { 612 pDict->Release(); 613 } 614 return NULL; 615 } 616 if (!key.IsEmpty()) { 617 pDict->SetAt(key, pObj); 618 } else { 619 pObj->Release(); 620 } 621 } 622 return pDict; 623 } 624 if (first_char == '[') { 625 if (!bAllowNestedArray && bInArray) { 626 return NULL; 627 } 628 CPDF_Array* pArray = CPDF_Array::Create(); 629 while (1) { 630 CPDF_Object* pObj = ReadNextObject(bAllowNestedArray, TRUE); 631 if (pObj == NULL) { 632 if (m_WordSize == 0 || m_WordBuffer[0] == ']') { 633 return pArray; 634 } 635 if (m_WordBuffer[0] == '[') { 636 continue; 637 } 638 } else { 639 pArray->Add(pObj); 640 } 641 } 642 } 643 if (m_WordSize == 4) { 644 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) { 645 return CPDF_Boolean::Create(TRUE); 646 } 647 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) { 648 return CPDF_Null::Create(); 649 } 650 } else if (m_WordSize == 5) { 651 if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') { 652 return CPDF_Boolean::Create(FALSE); 653 } 654 } 655 return NULL; 656 } 657 void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber) 658 { 659 m_WordSize = 0; 660 bIsNumber = TRUE; 661 if (m_Size <= m_Pos) { 662 return; 663 } 664 int ch = m_pBuf[m_Pos++]; 665 int type = _PDF_CharType[ch]; 666 while (1) { 667 while (type == 'W') { 668 if (m_Size <= m_Pos) { 669 return; 670 } 671 ch = m_pBuf[m_Pos++]; 672 type = _PDF_CharType[ch]; 673 } 674 if (ch != '%') { 675 break; 676 } 677 while (1) { 678 if (m_Size <= m_Pos) { 679 return; 680 } 681 ch = m_pBuf[m_Pos++]; 682 if (ch == '\r' || ch == '\n') { 683 break; 684 } 685 } 686 type = _PDF_CharType[ch]; 687 } 688 if (type == 'D') { 689 bIsNumber = FALSE; 690 m_WordBuffer[m_WordSize++] = ch; 691 if (ch == '/') { 692 while (1) { 693 if (m_Size <= m_Pos) { 694 return; 695 } 696 ch = m_pBuf[m_Pos++]; 697 type = _PDF_CharType[ch]; 698 if (type != 'R' && type != 'N') { 699 m_Pos --; 700 return; 701 } 702 if (m_WordSize < MAX_WORD_BUFFER) { 703 m_WordBuffer[m_WordSize++] = ch; 704 } 705 } 706 } else if (ch == '<') { 707 if (m_Size <= m_Pos) { 708 return; 709 } 710 ch = m_pBuf[m_Pos++]; 711 if (ch == '<') { 712 m_WordBuffer[m_WordSize++] = ch; 713 } else { 714 m_Pos --; 715 } 716 } else if (ch == '>') { 717 if (m_Size <= m_Pos) { 718 return; 719 } 720 ch = m_pBuf[m_Pos++]; 721 if (ch == '>') { 722 m_WordBuffer[m_WordSize++] = ch; 723 } else { 724 m_Pos --; 725 } 726 } 727 return; 728 } 729 while (1) { 730 if (m_WordSize < MAX_WORD_BUFFER) { 731 m_WordBuffer[m_WordSize++] = ch; 732 } 733 if (type != 'N') { 734 bIsNumber = FALSE; 735 } 736 if (m_Size <= m_Pos) { 737 return; 738 } 739 ch = m_pBuf[m_Pos++]; 740 type = _PDF_CharType[ch]; 741 if (type == 'D' || type == 'W') { 742 m_Pos --; 743 break; 744 } 745 } 746 } 747 CFX_ByteString CPDF_StreamParser::ReadString() 748 { 749 if (m_Size <= m_Pos) { 750 return CFX_ByteString(); 751 } 752 int ch = m_pBuf[m_Pos++]; 753 CFX_ByteTextBuf buf; 754 int parlevel = 0; 755 int status = 0, iEscCode = 0; 756 while (1) { 757 switch (status) { 758 case 0: 759 if (ch == ')') { 760 if (parlevel == 0) { 761 if (buf.GetLength() > MAX_STRING_LENGTH) { 762 return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH); 763 } 764 return buf.GetByteString(); 765 } 766 parlevel --; 767 buf.AppendChar(')'); 768 } else if (ch == '(') { 769 parlevel ++; 770 buf.AppendChar('('); 771 } else if (ch == '\\') { 772 status = 1; 773 } else { 774 buf.AppendChar((char)ch); 775 } 776 break; 777 case 1: 778 if (ch >= '0' && ch <= '7') { 779 iEscCode = ch - '0'; 780 status = 2; 781 break; 782 } 783 if (ch == 'n') { 784 buf.AppendChar('\n'); 785 } else if (ch == 'r') { 786 buf.AppendChar('\r'); 787 } else if (ch == 't') { 788 buf.AppendChar('\t'); 789 } else if (ch == 'b') { 790 buf.AppendChar('\b'); 791 } else if (ch == 'f') { 792 buf.AppendChar('\f'); 793 } else if (ch == '\r') { 794 status = 4; 795 break; 796 } else if (ch == '\n') { 797 } else { 798 buf.AppendChar(ch); 799 } 800 status = 0; 801 break; 802 case 2: 803 if (ch >= '0' && ch <= '7') { 804 iEscCode = iEscCode * 8 + ch - '0'; 805 status = 3; 806 } else { 807 buf.AppendChar(iEscCode); 808 status = 0; 809 continue; 810 } 811 break; 812 case 3: 813 if (ch >= '0' && ch <= '7') { 814 iEscCode = iEscCode * 8 + ch - '0'; 815 buf.AppendChar(iEscCode); 816 status = 0; 817 } else { 818 buf.AppendChar(iEscCode); 819 status = 0; 820 continue; 821 } 822 break; 823 case 4: 824 status = 0; 825 if (ch != '\n') { 826 continue; 827 } 828 break; 829 } 830 if (m_Size <= m_Pos) { 831 break; 832 } 833 ch = m_pBuf[m_Pos++]; 834 } 835 if (m_Size > m_Pos) { 836 ch = m_pBuf[m_Pos++]; 837 } 838 if (buf.GetLength() > MAX_STRING_LENGTH) { 839 return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH); 840 } 841 return buf.GetByteString(); 842 } 843 CFX_ByteString CPDF_StreamParser::ReadHexString() 844 { 845 if (m_Size <= m_Pos) { 846 return CFX_ByteString(); 847 } 848 int ch = m_pBuf[m_Pos++]; 849 CFX_ByteTextBuf buf; 850 FX_BOOL bFirst = TRUE; 851 int code = 0; 852 while (1) { 853 if (ch == '>') { 854 break; 855 } 856 if (ch >= '0' && ch <= '9') { 857 if (bFirst) { 858 code = (ch - '0') * 16; 859 } else { 860 code += ch - '0'; 861 buf.AppendChar((char)code); 862 } 863 bFirst = !bFirst; 864 } else if (ch >= 'A' && ch <= 'F') { 865 if (bFirst) { 866 code = (ch - 'A' + 10) * 16; 867 } else { 868 code += ch - 'A' + 10; 869 buf.AppendChar((char)code); 870 } 871 bFirst = !bFirst; 872 } else if (ch >= 'a' && ch <= 'f') { 873 if (bFirst) { 874 code = (ch - 'a' + 10) * 16; 875 } else { 876 code += ch - 'a' + 10; 877 buf.AppendChar((char)code); 878 } 879 bFirst = !bFirst; 880 } 881 if (m_Size <= m_Pos) { 882 break; 883 } 884 ch = m_pBuf[m_Pos++]; 885 } 886 if (!bFirst) { 887 buf.AppendChar((char)code); 888 } 889 if (buf.GetLength() > MAX_STRING_LENGTH) { 890 return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH); 891 } 892 return buf.GetByteString(); 893 } 894 #define PAGEPARSE_STAGE_GETCONTENT 1 895 #define PAGEPARSE_STAGE_PARSE 2 896 #define PAGEPARSE_STAGE_CHECKCLIP 3 897 CPDF_ContentParser::CPDF_ContentParser() 898 { 899 m_pParser = NULL; 900 m_pStreamArray = NULL; 901 m_pSingleStream = NULL; 902 m_pData = NULL; 903 m_Status = Ready; 904 m_pType3Char = NULL; 905 } 906 CPDF_ContentParser::~CPDF_ContentParser() 907 { 908 Clear(); 909 } 910 void CPDF_ContentParser::Clear() 911 { 912 if (m_pParser) { 913 delete m_pParser; 914 } 915 if (m_pSingleStream) { 916 delete m_pSingleStream; 917 } 918 if (m_pStreamArray) { 919 for (FX_DWORD i = 0; i < m_nStreams; i ++) 920 if (m_pStreamArray[i]) { 921 delete m_pStreamArray[i]; 922 } 923 FX_Free(m_pStreamArray); 924 } 925 if (m_pData && m_pSingleStream == NULL) { 926 FX_Free((void*)m_pData); 927 } 928 m_pParser = NULL; 929 m_pStreamArray = NULL; 930 m_pSingleStream = NULL; 931 m_pData = NULL; 932 m_Status = Ready; 933 } 934 void CPDF_ContentParser::Start(CPDF_Page* pPage, CPDF_ParseOptions* pOptions) 935 { 936 if (m_Status != Ready || pPage == NULL || pPage->m_pDocument == NULL || pPage->m_pFormDict == NULL) { 937 m_Status = Done; 938 return; 939 } 940 m_pObjects = pPage; 941 m_bForm = FALSE; 942 if (pOptions) { 943 m_Options = *pOptions; 944 } 945 m_Status = ToBeContinued; 946 m_InternalStage = PAGEPARSE_STAGE_GETCONTENT; 947 m_CurrentOffset = 0; 948 CPDF_Object* pContent = pPage->m_pFormDict->GetElementValue(FX_BSTRC("Contents")); 949 if (pContent == NULL) { 950 m_Status = Done; 951 return; 952 } 953 if (pContent->GetType() == PDFOBJ_STREAM) { 954 m_nStreams = 0; 955 m_pSingleStream = FX_NEW CPDF_StreamAcc; 956 m_pSingleStream->LoadAllData((CPDF_Stream*)pContent, FALSE); 957 } else if (pContent->GetType() == PDFOBJ_ARRAY) { 958 CPDF_Array* pArray = (CPDF_Array*)pContent; 959 m_nStreams = pArray->GetCount(); 960 if (m_nStreams == 0) { 961 m_Status = Done; 962 return; 963 } 964 m_pStreamArray = FX_Alloc(CPDF_StreamAcc*, m_nStreams); 965 FXSYS_memset32(m_pStreamArray, 0, sizeof(CPDF_StreamAcc*) * m_nStreams); 966 } else { 967 m_Status = Done; 968 return; 969 } 970 } 971 void CPDF_ContentParser::Start(CPDF_Form* pForm, CPDF_AllStates* pGraphicStates, 972 CFX_AffineMatrix* pParentMatrix, CPDF_Type3Char* pType3Char, CPDF_ParseOptions* pOptions, int level) 973 { 974 m_pType3Char = pType3Char; 975 m_pObjects = pForm; 976 m_bForm = TRUE; 977 CFX_AffineMatrix form_matrix = pForm->m_pFormDict->GetMatrix(FX_BSTRC("Matrix")); 978 if (pGraphicStates) { 979 form_matrix.Concat(pGraphicStates->m_CTM); 980 } 981 CPDF_Array* pBBox = pForm->m_pFormDict->GetArray(FX_BSTRC("BBox")); 982 CFX_FloatRect form_bbox; 983 CPDF_Path ClipPath; 984 if (pBBox) { 985 form_bbox = pBBox->GetRect(); 986 ClipPath.New(); 987 ClipPath.AppendRect(form_bbox.left, form_bbox.bottom, form_bbox.right, form_bbox.top); 988 ClipPath.Transform(&form_matrix); 989 if (pParentMatrix) { 990 ClipPath.Transform(pParentMatrix); 991 } 992 form_bbox.Transform(&form_matrix); 993 if (pParentMatrix) { 994 form_bbox.Transform(pParentMatrix); 995 } 996 } 997 CPDF_Dictionary* pResources = pForm->m_pFormDict->GetDict(FX_BSTRC("Resources")); 998 m_pParser = FX_NEW CPDF_StreamContentParser; 999 m_pParser->Initialize(); 1000 m_pParser->PrepareParse(pForm->m_pDocument, pForm->m_pPageResources, pForm->m_pResources, pParentMatrix, pForm, 1001 pResources, &form_bbox, pOptions, pGraphicStates, level); 1002 m_pParser->m_pCurStates->m_CTM = form_matrix; 1003 m_pParser->m_pCurStates->m_ParentMatrix = form_matrix; 1004 if (ClipPath.NotNull()) { 1005 m_pParser->m_pCurStates->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING, TRUE); 1006 } 1007 if (pForm->m_Transparency & PDFTRANS_GROUP) { 1008 CPDF_GeneralStateData* pData = m_pParser->m_pCurStates->m_GeneralState.GetModify(); 1009 pData->m_BlendType = FXDIB_BLEND_NORMAL; 1010 pData->m_StrokeAlpha = 1.0f; 1011 pData->m_FillAlpha = 1.0f; 1012 pData->m_pSoftMask = NULL; 1013 } 1014 m_nStreams = 0; 1015 m_pSingleStream = FX_NEW CPDF_StreamAcc; 1016 if (pForm->m_pDocument) { 1017 m_pSingleStream->LoadAllData(pForm->m_pFormStream, FALSE); 1018 } else { 1019 m_pSingleStream->LoadAllData(pForm->m_pFormStream, FALSE); 1020 } 1021 m_pData = (FX_LPBYTE)m_pSingleStream->GetData(); 1022 m_Size = m_pSingleStream->GetSize(); 1023 m_Status = ToBeContinued; 1024 m_InternalStage = PAGEPARSE_STAGE_PARSE; 1025 m_CurrentOffset = 0; 1026 } 1027 void CPDF_ContentParser::Continue(IFX_Pause* pPause) 1028 { 1029 int steps = 0; 1030 while (m_Status == ToBeContinued) { 1031 if (m_InternalStage == PAGEPARSE_STAGE_GETCONTENT) { 1032 if (m_CurrentOffset == m_nStreams) { 1033 if (m_pStreamArray) { 1034 m_Size = 0; 1035 FX_DWORD i; 1036 for (i = 0; i < m_nStreams; i ++) { 1037 FX_DWORD size = m_pStreamArray[i]->GetSize(); 1038 if (m_Size + size + 1 <= m_Size) { 1039 m_Status = Done; 1040 return; 1041 } 1042 m_Size += size + 1; 1043 } 1044 m_pData = FX_Alloc(FX_BYTE, m_Size); 1045 if (!m_pData) { 1046 m_Status = Done; 1047 return; 1048 } 1049 FX_DWORD pos = 0; 1050 for (i = 0; i < m_nStreams; i ++) { 1051 FXSYS_memcpy32(m_pData + pos, m_pStreamArray[i]->GetData(), m_pStreamArray[i]->GetSize()); 1052 pos += m_pStreamArray[i]->GetSize() + 1; 1053 m_pData[pos - 1] = ' '; 1054 delete m_pStreamArray[i]; 1055 } 1056 FX_Free(m_pStreamArray); 1057 m_pStreamArray = NULL; 1058 } else { 1059 m_pData = (FX_LPBYTE)m_pSingleStream->GetData(); 1060 m_Size = m_pSingleStream->GetSize(); 1061 } 1062 m_InternalStage = PAGEPARSE_STAGE_PARSE; 1063 m_CurrentOffset = 0; 1064 } else { 1065 CPDF_Array* pContent = m_pObjects->m_pFormDict->GetArray(FX_BSTRC("Contents")); 1066 m_pStreamArray[m_CurrentOffset] = FX_NEW CPDF_StreamAcc; 1067 CPDF_Stream* pStreamObj = (CPDF_Stream*)pContent->GetElementValue(m_CurrentOffset); 1068 m_pStreamArray[m_CurrentOffset]->LoadAllData(pStreamObj, FALSE); 1069 m_CurrentOffset ++; 1070 } 1071 } 1072 if (m_InternalStage == PAGEPARSE_STAGE_PARSE) { 1073 if (m_pParser == NULL) { 1074 m_pParser = FX_NEW CPDF_StreamContentParser; 1075 m_pParser->Initialize(); 1076 m_pParser->PrepareParse(m_pObjects->m_pDocument, m_pObjects->m_pPageResources, NULL, NULL, m_pObjects, 1077 m_pObjects->m_pResources, &m_pObjects->m_BBox, &m_Options, NULL, 0); 1078 m_pParser->m_pCurStates->m_ColorState.GetModify()->Default(); 1079 } 1080 if (m_CurrentOffset >= m_Size) { 1081 m_InternalStage = PAGEPARSE_STAGE_CHECKCLIP; 1082 } else { 1083 m_CurrentOffset += m_pParser->Parse(m_pData + m_CurrentOffset, m_Size - m_CurrentOffset, PARSE_STEP_LIMIT); 1084 if (m_pParser->m_bAbort) { 1085 m_InternalStage = PAGEPARSE_STAGE_CHECKCLIP; 1086 continue; 1087 } 1088 } 1089 } 1090 if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) { 1091 if (m_pType3Char) { 1092 m_pType3Char->m_bColored = m_pParser->m_bColored; 1093 m_pType3Char->m_Width = FXSYS_round(m_pParser->m_Type3Data[0] * 1000); 1094 m_pType3Char->m_BBox.left = FXSYS_round(m_pParser->m_Type3Data[2] * 1000); 1095 m_pType3Char->m_BBox.bottom = FXSYS_round(m_pParser->m_Type3Data[3] * 1000); 1096 m_pType3Char->m_BBox.right = FXSYS_round(m_pParser->m_Type3Data[4] * 1000); 1097 m_pType3Char->m_BBox.top = FXSYS_round(m_pParser->m_Type3Data[5] * 1000); 1098 } 1099 FX_POSITION pos = m_pObjects->m_ObjectList.GetHeadPosition(); 1100 while (pos) { 1101 CPDF_PageObject* pObj = (CPDF_PageObject*)m_pObjects->m_ObjectList.GetNext(pos); 1102 if (pObj->m_ClipPath.IsNull()) { 1103 continue; 1104 } 1105 if (pObj->m_ClipPath.GetPathCount() != 1) { 1106 continue; 1107 } 1108 if (pObj->m_ClipPath.GetTextCount()) { 1109 continue; 1110 } 1111 CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0); 1112 if (!ClipPath.IsRect() || pObj->m_Type == PDFPAGE_SHADING) { 1113 continue; 1114 } 1115 CFX_FloatRect old_rect(ClipPath.GetPointX(0), ClipPath.GetPointY(0), 1116 ClipPath.GetPointX(2), ClipPath.GetPointY(2)); 1117 CFX_FloatRect obj_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right, pObj->m_Top); 1118 if (old_rect.Contains(obj_rect)) { 1119 pObj->m_ClipPath.SetNull(); 1120 } 1121 } 1122 m_Status = Done; 1123 return; 1124 } 1125 steps ++; 1126 if (pPause && pPause->NeedToPauseNow()) { 1127 break; 1128 } 1129 } 1130 } 1131 int CPDF_ContentParser::EstimateProgress() 1132 { 1133 if (m_Status == Ready) { 1134 return 0; 1135 } 1136 if (m_Status == Done) { 1137 return 100; 1138 } 1139 if (m_InternalStage == PAGEPARSE_STAGE_GETCONTENT) { 1140 return 10; 1141 } 1142 if (m_InternalStage == PAGEPARSE_STAGE_CHECKCLIP) { 1143 return 90; 1144 } 1145 return 10 + 80 * m_CurrentOffset / m_Size; 1146 } 1147