Home | History | Annotate | Download | only in fpdf_page
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/src/fpdfapi/fpdf_page/pageint.h"
      8 
      9 #include <limits.h>
     10 
     11 #include "core/include/fpdfapi/fpdf_module.h"
     12 #include "core/include/fpdfapi/fpdf_page.h"
     13 #include "core/include/fxcodec/fx_codec.h"
     14 #include "core/include/fxcrt/fx_ext.h"
     15 #include "core/include/fxcrt/fx_safe_types.h"
     16 
     17 namespace {
     18 
     19 const char kPathOperatorSubpath = 'm';
     20 const char kPathOperatorLine = 'l';
     21 const char kPathOperatorCubicBezier1 = 'c';
     22 const char kPathOperatorCubicBezier2 = 'v';
     23 const char kPathOperatorCubicBezier3 = 'y';
     24 const char kPathOperatorClosePath = 'h';
     25 const char kPathOperatorRectangle[] = "re";
     26 
     27 }  // namespace
     28 
     29 class CPDF_StreamParserAutoClearer {
     30  public:
     31   CPDF_StreamParserAutoClearer(CPDF_StreamParser** scoped_variable,
     32                                CPDF_StreamParser* new_parser)
     33       : scoped_variable_(scoped_variable) {
     34     *scoped_variable_ = new_parser;
     35   }
     36   ~CPDF_StreamParserAutoClearer() { *scoped_variable_ = NULL; }
     37 
     38  private:
     39   CPDF_StreamParser** scoped_variable_;
     40 };
     41 FX_DWORD CPDF_StreamContentParser::Parse(const uint8_t* pData,
     42                                          FX_DWORD dwSize,
     43                                          FX_DWORD max_cost) {
     44   if (m_Level > _FPDF_MAX_FORM_LEVEL_) {
     45     return dwSize;
     46   }
     47   FX_DWORD InitObjCount = m_pObjectList->CountObjects();
     48   CPDF_StreamParser syntax(pData, dwSize);
     49   CPDF_StreamParserAutoClearer auto_clearer(&m_pSyntax, &syntax);
     50   m_CompatCount = 0;
     51   while (1) {
     52     FX_DWORD cost = m_pObjectList->CountObjects() - InitObjCount;
     53     if (max_cost && cost >= max_cost) {
     54       break;
     55     }
     56     switch (syntax.ParseNextElement()) {
     57       case CPDF_StreamParser::EndOfData:
     58         return m_pSyntax->GetPos();
     59       case CPDF_StreamParser::Keyword:
     60         OnOperator((char*)syntax.GetWordBuf());
     61         ClearAllParams();
     62         break;
     63       case CPDF_StreamParser::Number:
     64         AddNumberParam((char*)syntax.GetWordBuf(), syntax.GetWordSize());
     65         break;
     66       case CPDF_StreamParser::Name:
     67         AddNameParam((const FX_CHAR*)syntax.GetWordBuf() + 1,
     68                      syntax.GetWordSize() - 1);
     69         break;
     70       default:
     71         AddObjectParam(syntax.GetObject());
     72     }
     73   }
     74   return m_pSyntax->GetPos();
     75 }
     76 
     77 void CPDF_StreamContentParser::Handle_BeginImage() {
     78   FX_FILESIZE savePos = m_pSyntax->GetPos();
     79   CPDF_Dictionary* pDict = new CPDF_Dictionary;
     80   while (1) {
     81     CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
     82     if (type == CPDF_StreamParser::Keyword) {
     83       CFX_ByteString bsKeyword(m_pSyntax->GetWordBuf(),
     84                                m_pSyntax->GetWordSize());
     85       if (bsKeyword != "ID") {
     86         m_pSyntax->SetPos(savePos);
     87         pDict->Release();
     88         return;
     89       }
     90     }
     91     if (type != CPDF_StreamParser::Name) {
     92       break;
     93     }
     94     CFX_ByteString key((const FX_CHAR*)m_pSyntax->GetWordBuf() + 1,
     95                        m_pSyntax->GetWordSize() - 1);
     96     std::unique_ptr<CPDF_Object, ReleaseDeleter<CPDF_Object>> pObj(
     97         m_pSyntax->ReadNextObject());
     98     if (!key.IsEmpty()) {
     99       FX_DWORD dwObjNum = pObj ? pObj->GetObjNum() : 0;
    100       if (dwObjNum)
    101         pDict->SetAtReference(key, m_pDocument, dwObjNum);
    102       else
    103         pDict->SetAt(key, pObj.release());
    104     }
    105   }
    106   PDF_ReplaceAbbr(pDict);
    107   CPDF_Object* pCSObj = NULL;
    108   if (pDict->KeyExist("ColorSpace")) {
    109     pCSObj = pDict->GetElementValue("ColorSpace");
    110     if (pCSObj->IsName()) {
    111       CFX_ByteString name = pCSObj->GetString();
    112       if (name != "DeviceRGB" && name != "DeviceGray" && name != "DeviceCMYK") {
    113         pCSObj = FindResourceObj("ColorSpace", name);
    114         if (pCSObj && !pCSObj->GetObjNum()) {
    115           pCSObj = pCSObj->Clone();
    116           pDict->SetAt("ColorSpace", pCSObj);
    117         }
    118       }
    119     }
    120   }
    121   CPDF_Stream* pStream = m_pSyntax->ReadInlineStream(
    122       m_pDocument, pDict, pCSObj, m_Options.m_bDecodeInlineImage);
    123   while (1) {
    124     CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
    125     if (type == CPDF_StreamParser::EndOfData) {
    126       break;
    127     }
    128     if (type != CPDF_StreamParser::Keyword) {
    129       continue;
    130     }
    131     if (m_pSyntax->GetWordSize() == 2 && m_pSyntax->GetWordBuf()[0] == 'E' &&
    132         m_pSyntax->GetWordBuf()[1] == 'I') {
    133       break;
    134     }
    135   }
    136   if (m_Options.m_bTextOnly) {
    137     if (pStream) {
    138       pStream->Release();
    139     } else {
    140       pDict->Release();
    141     }
    142     return;
    143   }
    144   pDict->SetAtName("Subtype", "Image");
    145   CPDF_ImageObject* pImgObj = AddImage(pStream, NULL, TRUE);
    146   if (!pImgObj) {
    147     if (pStream) {
    148       pStream->Release();
    149     } else {
    150       pDict->Release();
    151     }
    152   }
    153 }
    154 void CPDF_StreamContentParser::ParsePathObject() {
    155   FX_FLOAT params[6] = {};
    156   int nParams = 0;
    157   int last_pos = m_pSyntax->GetPos();
    158   while (1) {
    159     CPDF_StreamParser::SyntaxType type = m_pSyntax->ParseNextElement();
    160     FX_BOOL bProcessed = TRUE;
    161     switch (type) {
    162       case CPDF_StreamParser::EndOfData:
    163         return;
    164       case CPDF_StreamParser::Keyword: {
    165         int len = m_pSyntax->GetWordSize();
    166         if (len == 1) {
    167           switch (m_pSyntax->GetWordBuf()[0]) {
    168             case kPathOperatorSubpath:
    169               AddPathPoint(params[0], params[1], FXPT_MOVETO);
    170               nParams = 0;
    171               break;
    172             case kPathOperatorLine:
    173               AddPathPoint(params[0], params[1], FXPT_LINETO);
    174               nParams = 0;
    175               break;
    176             case kPathOperatorCubicBezier1:
    177               AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
    178               AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
    179               AddPathPoint(params[4], params[5], FXPT_BEZIERTO);
    180               nParams = 0;
    181               break;
    182             case kPathOperatorCubicBezier2:
    183               AddPathPoint(m_PathCurrentX, m_PathCurrentY, FXPT_BEZIERTO);
    184               AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
    185               AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
    186               nParams = 0;
    187               break;
    188             case kPathOperatorCubicBezier3:
    189               AddPathPoint(params[0], params[1], FXPT_BEZIERTO);
    190               AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
    191               AddPathPoint(params[2], params[3], FXPT_BEZIERTO);
    192               nParams = 0;
    193               break;
    194             case kPathOperatorClosePath:
    195               Handle_ClosePath();
    196               nParams = 0;
    197               break;
    198             default:
    199               bProcessed = FALSE;
    200               break;
    201           }
    202         } else if (len == 2) {
    203           if (m_pSyntax->GetWordBuf()[0] == kPathOperatorRectangle[0] &&
    204               m_pSyntax->GetWordBuf()[1] == kPathOperatorRectangle[1]) {
    205             AddPathRect(params[0], params[1], params[2], params[3]);
    206             nParams = 0;
    207           } else {
    208             bProcessed = FALSE;
    209           }
    210         } else {
    211           bProcessed = FALSE;
    212         }
    213         if (bProcessed) {
    214           last_pos = m_pSyntax->GetPos();
    215         }
    216         break;
    217       }
    218       case CPDF_StreamParser::Number: {
    219         if (nParams == 6) {
    220           break;
    221         }
    222         FX_BOOL bInteger;
    223         int value;
    224         FX_atonum(
    225             CFX_ByteStringC(m_pSyntax->GetWordBuf(), m_pSyntax->GetWordSize()),
    226             bInteger, &value);
    227         params[nParams++] = bInteger ? (FX_FLOAT)value : *(FX_FLOAT*)&value;
    228         break;
    229       }
    230       default:
    231         bProcessed = FALSE;
    232     }
    233     if (!bProcessed) {
    234       m_pSyntax->SetPos(last_pos);
    235       return;
    236     }
    237   }
    238 }
    239 CPDF_StreamParser::CPDF_StreamParser(const uint8_t* pData, FX_DWORD dwSize) {
    240   m_pBuf = pData;
    241   m_Size = dwSize;
    242   m_Pos = 0;
    243   m_pLastObj = NULL;
    244 }
    245 CPDF_StreamParser::~CPDF_StreamParser() {
    246   if (m_pLastObj) {
    247     m_pLastObj->Release();
    248   }
    249 }
    250 FX_DWORD _DecodeAllScanlines(ICodec_ScanlineDecoder* pDecoder,
    251                              uint8_t*& dest_buf,
    252                              FX_DWORD& dest_size) {
    253   if (!pDecoder) {
    254     return (FX_DWORD)-1;
    255   }
    256   int ncomps = pDecoder->CountComps();
    257   int bpc = pDecoder->GetBPC();
    258   int width = pDecoder->GetWidth();
    259   int height = pDecoder->GetHeight();
    260   int pitch = (width * ncomps * bpc + 7) / 8;
    261   if (height == 0 || pitch > (1 << 30) / height) {
    262     delete pDecoder;
    263     return -1;
    264   }
    265   dest_buf = FX_Alloc2D(uint8_t, pitch, height);
    266   dest_size = pitch * height;  // Safe since checked alloc returned.
    267   for (int row = 0; row < height; row++) {
    268     const uint8_t* pLine = pDecoder->GetScanline(row);
    269     if (!pLine)
    270       break;
    271 
    272     FXSYS_memcpy(dest_buf + row * pitch, pLine, pitch);
    273   }
    274   FX_DWORD srcoff = pDecoder->GetSrcOffset();
    275   delete pDecoder;
    276   return srcoff;
    277 }
    278 ICodec_ScanlineDecoder* FPDFAPI_CreateFaxDecoder(
    279     const uint8_t* src_buf,
    280     FX_DWORD src_size,
    281     int width,
    282     int height,
    283     const CPDF_Dictionary* pParams);
    284 
    285 FX_DWORD PDF_DecodeInlineStream(const uint8_t* src_buf,
    286                                 FX_DWORD limit,
    287                                 int width,
    288                                 int height,
    289                                 CFX_ByteString& decoder,
    290                                 CPDF_Dictionary* pParam,
    291                                 uint8_t*& dest_buf,
    292                                 FX_DWORD& dest_size) {
    293   if (decoder == "CCITTFaxDecode" || decoder == "CCF") {
    294     ICodec_ScanlineDecoder* pDecoder =
    295         FPDFAPI_CreateFaxDecoder(src_buf, limit, width, height, pParam);
    296     return _DecodeAllScanlines(pDecoder, dest_buf, dest_size);
    297   }
    298   if (decoder == "ASCII85Decode" || decoder == "A85") {
    299     return A85Decode(src_buf, limit, dest_buf, dest_size);
    300   }
    301   if (decoder == "ASCIIHexDecode" || decoder == "AHx") {
    302     return HexDecode(src_buf, limit, dest_buf, dest_size);
    303   }
    304   if (decoder == "FlateDecode" || decoder == "Fl") {
    305     return FPDFAPI_FlateOrLZWDecode(FALSE, src_buf, limit, pParam, dest_size,
    306                                     dest_buf, dest_size);
    307   }
    308   if (decoder == "LZWDecode" || decoder == "LZW") {
    309     return FPDFAPI_FlateOrLZWDecode(TRUE, src_buf, limit, pParam, 0, dest_buf,
    310                                     dest_size);
    311   }
    312   if (decoder == "DCTDecode" || decoder == "DCT") {
    313     ICodec_ScanlineDecoder* pDecoder =
    314         CPDF_ModuleMgr::Get()->GetJpegModule()->CreateDecoder(
    315             src_buf, limit, width, height, 0,
    316             pParam ? pParam->GetInteger("ColorTransform", 1) : 1);
    317     return _DecodeAllScanlines(pDecoder, dest_buf, dest_size);
    318   }
    319   if (decoder == "RunLengthDecode" || decoder == "RL") {
    320     return RunLengthDecode(src_buf, limit, dest_buf, dest_size);
    321   }
    322   dest_size = 0;
    323   dest_buf = 0;
    324   return (FX_DWORD)-1;
    325 }
    326 CPDF_Stream* CPDF_StreamParser::ReadInlineStream(CPDF_Document* pDoc,
    327                                                  CPDF_Dictionary* pDict,
    328                                                  CPDF_Object* pCSObj,
    329                                                  FX_BOOL bDecode) {
    330   if (m_Pos == m_Size)
    331     return nullptr;
    332 
    333   if (PDFCharIsWhitespace(m_pBuf[m_Pos]))
    334     m_Pos++;
    335 
    336   CFX_ByteString Decoder;
    337   CPDF_Dictionary* pParam = nullptr;
    338   CPDF_Object* pFilter = pDict->GetElementValue("Filter");
    339   if (pFilter) {
    340     if (CPDF_Array* pArray = pFilter->AsArray()) {
    341       Decoder = pArray->GetString(0);
    342       CPDF_Array* pParams = pDict->GetArray("DecodeParms");
    343       if (pParams)
    344         pParam = pParams->GetDict(0);
    345     } else {
    346       Decoder = pFilter->GetString();
    347       pParam = pDict->GetDict("DecodeParms");
    348     }
    349   }
    350   FX_DWORD width = pDict->GetInteger("Width");
    351   FX_DWORD height = pDict->GetInteger("Height");
    352   FX_DWORD OrigSize = 0;
    353   if (pCSObj) {
    354     FX_DWORD bpc = pDict->GetInteger("BitsPerComponent");
    355     FX_DWORD nComponents = 1;
    356     CPDF_ColorSpace* pCS = pDoc->LoadColorSpace(pCSObj);
    357     if (!pCS) {
    358       nComponents = 3;
    359     } else {
    360       nComponents = pCS->CountComponents();
    361       pDoc->GetPageData()->ReleaseColorSpace(pCSObj);
    362     }
    363     FX_DWORD pitch = width;
    364     if (bpc && pitch > INT_MAX / bpc) {
    365       return NULL;
    366     }
    367     pitch *= bpc;
    368     if (nComponents && pitch > INT_MAX / nComponents) {
    369       return NULL;
    370     }
    371     pitch *= nComponents;
    372     if (pitch > INT_MAX - 7) {
    373       return NULL;
    374     }
    375     pitch += 7;
    376     pitch /= 8;
    377     OrigSize = pitch;
    378   } else {
    379     if (width > INT_MAX - 7) {
    380       return NULL;
    381     }
    382     OrigSize = ((width + 7) / 8);
    383   }
    384   if (height && OrigSize > INT_MAX / height) {
    385     return NULL;
    386   }
    387   OrigSize *= height;
    388   uint8_t* pData = NULL;
    389   FX_DWORD dwStreamSize;
    390   if (Decoder.IsEmpty()) {
    391     if (OrigSize > m_Size - m_Pos) {
    392       OrigSize = m_Size - m_Pos;
    393     }
    394     pData = FX_Alloc(uint8_t, OrigSize);
    395     FXSYS_memcpy(pData, m_pBuf + m_Pos, OrigSize);
    396     dwStreamSize = OrigSize;
    397     m_Pos += OrigSize;
    398   } else {
    399     FX_DWORD dwDestSize = OrigSize;
    400     dwStreamSize =
    401         PDF_DecodeInlineStream(m_pBuf + m_Pos, m_Size - m_Pos, width, height,
    402                                Decoder, pParam, pData, dwDestSize);
    403     if ((int)dwStreamSize < 0) {
    404       FX_Free(pData);
    405       return NULL;
    406     }
    407     if (bDecode) {
    408       m_Pos += dwStreamSize;
    409       dwStreamSize = dwDestSize;
    410       if (CPDF_Array* pArray = pFilter->AsArray()) {
    411         pArray->RemoveAt(0);
    412         CPDF_Array* pParams = pDict->GetArray("DecodeParms");
    413         if (pParams)
    414           pParams->RemoveAt(0);
    415       } else {
    416         pDict->RemoveAt("Filter");
    417         pDict->RemoveAt("DecodeParms");
    418       }
    419     } else {
    420       FX_Free(pData);
    421       FX_DWORD dwSavePos = m_Pos;
    422       m_Pos += dwStreamSize;
    423       while (1) {
    424         FX_DWORD dwPrevPos = m_Pos;
    425         CPDF_StreamParser::SyntaxType type = ParseNextElement();
    426         if (type == CPDF_StreamParser::EndOfData) {
    427           break;
    428         }
    429         if (type != CPDF_StreamParser::Keyword) {
    430           dwStreamSize += m_Pos - dwPrevPos;
    431           continue;
    432         }
    433         if (GetWordSize() == 2 && GetWordBuf()[0] == 'E' &&
    434             GetWordBuf()[1] == 'I') {
    435           m_Pos = dwPrevPos;
    436           break;
    437         }
    438         dwStreamSize += m_Pos - dwPrevPos;
    439       }
    440       m_Pos = dwSavePos;
    441       pData = FX_Alloc(uint8_t, dwStreamSize);
    442       FXSYS_memcpy(pData, m_pBuf + m_Pos, dwStreamSize);
    443       m_Pos += dwStreamSize;
    444     }
    445   }
    446   pDict->SetAtInteger("Length", (int)dwStreamSize);
    447   return new CPDF_Stream(pData, dwStreamSize, pDict);
    448 }
    449 
    450 #define MAX_WORD_BUFFER 256
    451 #define MAX_STRING_LENGTH 32767
    452 #define FXDWORD_TRUE FXDWORD_FROM_LSBFIRST(0x65757274)
    453 #define FXDWORD_NULL FXDWORD_FROM_LSBFIRST(0x6c6c756e)
    454 #define FXDWORD_FALS FXDWORD_FROM_LSBFIRST(0x736c6166)
    455 CPDF_StreamParser::SyntaxType CPDF_StreamParser::ParseNextElement() {
    456   if (m_pLastObj) {
    457     m_pLastObj->Release();
    458     m_pLastObj = nullptr;
    459   }
    460 
    461   m_WordSize = 0;
    462   FX_BOOL bIsNumber = TRUE;
    463   if (!PositionIsInBounds())
    464     return EndOfData;
    465 
    466   int ch = m_pBuf[m_Pos++];
    467   while (1) {
    468     while (PDFCharIsWhitespace(ch)) {
    469       if (!PositionIsInBounds())
    470         return EndOfData;
    471 
    472       ch = m_pBuf[m_Pos++];
    473     }
    474 
    475     if (ch != '%')
    476       break;
    477 
    478     while (1) {
    479       if (!PositionIsInBounds())
    480         return EndOfData;
    481 
    482       ch = m_pBuf[m_Pos++];
    483       if (PDFCharIsLineEnding(ch))
    484         break;
    485     }
    486   }
    487 
    488   if (PDFCharIsDelimiter(ch) && ch != '/') {
    489     m_Pos--;
    490     m_pLastObj = ReadNextObject();
    491     return Others;
    492   }
    493 
    494   while (1) {
    495     if (m_WordSize < MAX_WORD_BUFFER)
    496       m_WordBuffer[m_WordSize++] = ch;
    497 
    498     if (!PDFCharIsNumeric(ch))
    499       bIsNumber = FALSE;
    500 
    501     if (!PositionIsInBounds())
    502       break;
    503 
    504     ch = m_pBuf[m_Pos++];
    505 
    506     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
    507       m_Pos--;
    508       break;
    509     }
    510   }
    511 
    512   m_WordBuffer[m_WordSize] = 0;
    513   if (bIsNumber)
    514     return Number;
    515   if (m_WordBuffer[0] == '/')
    516     return Name;
    517 
    518   if (m_WordSize == 4) {
    519     if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) {
    520       m_pLastObj = new CPDF_Boolean(TRUE);
    521       return Others;
    522     }
    523     if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) {
    524       m_pLastObj = new CPDF_Null;
    525       return Others;
    526     }
    527   } else if (m_WordSize == 5) {
    528     if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') {
    529       m_pLastObj = new CPDF_Boolean(FALSE);
    530       return Others;
    531     }
    532   }
    533   return Keyword;
    534 }
    535 
    536 void CPDF_StreamParser::SkipPathObject() {
    537   FX_DWORD command_startpos = m_Pos;
    538   if (!PositionIsInBounds())
    539     return;
    540 
    541   int ch = m_pBuf[m_Pos++];
    542   while (1) {
    543     while (PDFCharIsWhitespace(ch)) {
    544       if (!PositionIsInBounds())
    545         return;
    546       ch = m_pBuf[m_Pos++];
    547     }
    548 
    549     if (!PDFCharIsNumeric(ch)) {
    550       m_Pos = command_startpos;
    551       return;
    552     }
    553 
    554     while (1) {
    555       while (!PDFCharIsWhitespace(ch)) {
    556         if (!PositionIsInBounds())
    557           return;
    558         ch = m_pBuf[m_Pos++];
    559       }
    560 
    561       while (PDFCharIsWhitespace(ch)) {
    562         if (!PositionIsInBounds())
    563           return;
    564         ch = m_pBuf[m_Pos++];
    565       }
    566 
    567       if (PDFCharIsNumeric(ch))
    568         continue;
    569 
    570       FX_DWORD op_startpos = m_Pos - 1;
    571       while (!PDFCharIsWhitespace(ch) && !PDFCharIsDelimiter(ch)) {
    572         if (!PositionIsInBounds())
    573           return;
    574         ch = m_pBuf[m_Pos++];
    575       }
    576 
    577       if (m_Pos - op_startpos == 2) {
    578         int op = m_pBuf[op_startpos];
    579         if (op == kPathOperatorSubpath || op == kPathOperatorLine ||
    580             op == kPathOperatorCubicBezier1 ||
    581             op == kPathOperatorCubicBezier2 ||
    582             op == kPathOperatorCubicBezier3) {
    583           command_startpos = m_Pos;
    584           break;
    585         }
    586       } else if (m_Pos - op_startpos == 3) {
    587         if (m_pBuf[op_startpos] == kPathOperatorRectangle[0] &&
    588             m_pBuf[op_startpos + 1] == kPathOperatorRectangle[1]) {
    589           command_startpos = m_Pos;
    590           break;
    591         }
    592       }
    593       m_Pos = command_startpos;
    594       return;
    595     }
    596   }
    597 }
    598 CPDF_Object* CPDF_StreamParser::ReadNextObject(FX_BOOL bAllowNestedArray,
    599                                                FX_BOOL bInArray) {
    600   FX_BOOL bIsNumber;
    601   GetNextWord(bIsNumber);
    602   if (m_WordSize == 0) {
    603     return NULL;
    604   }
    605   if (bIsNumber) {
    606     m_WordBuffer[m_WordSize] = 0;
    607     return new CPDF_Number(CFX_ByteStringC(m_WordBuffer, m_WordSize));
    608   }
    609   int first_char = m_WordBuffer[0];
    610   if (first_char == '/') {
    611     return new CPDF_Name(
    612         PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1)));
    613   }
    614   if (first_char == '(') {
    615     return new CPDF_String(ReadString(), FALSE);
    616   }
    617   if (first_char == '<') {
    618     if (m_WordSize == 1) {
    619       return new CPDF_String(ReadHexString(), TRUE);
    620     }
    621     CPDF_Dictionary* pDict = new CPDF_Dictionary;
    622     while (1) {
    623       GetNextWord(bIsNumber);
    624       if (m_WordSize == 0) {
    625         pDict->Release();
    626         return nullptr;
    627       }
    628       if (m_WordSize == 2 && m_WordBuffer[0] == '>') {
    629         break;
    630       }
    631       if (m_WordBuffer[0] != '/') {
    632         pDict->Release();
    633         return nullptr;
    634       }
    635       CFX_ByteString key =
    636           PDF_NameDecode(CFX_ByteStringC(m_WordBuffer + 1, m_WordSize - 1));
    637       CPDF_Object* pObj = ReadNextObject(TRUE);
    638       if (!pObj) {
    639         pDict->Release();
    640         return nullptr;
    641       }
    642       if (!key.IsEmpty()) {
    643         pDict->SetAt(key, pObj);
    644       } else {
    645         pObj->Release();
    646       }
    647     }
    648     return pDict;
    649   }
    650   if (first_char == '[') {
    651     if (!bAllowNestedArray && bInArray) {
    652       return NULL;
    653     }
    654     CPDF_Array* pArray = new CPDF_Array;
    655     while (1) {
    656       CPDF_Object* pObj = ReadNextObject(bAllowNestedArray, TRUE);
    657       if (pObj) {
    658         pArray->Add(pObj);
    659         continue;
    660       }
    661 
    662       if (m_WordSize == 0 || m_WordBuffer[0] == ']')
    663         break;
    664     }
    665     return pArray;
    666   }
    667   if (m_WordSize == 4) {
    668     if (*(FX_DWORD*)m_WordBuffer == FXDWORD_TRUE) {
    669       return new CPDF_Boolean(TRUE);
    670     }
    671     if (*(FX_DWORD*)m_WordBuffer == FXDWORD_NULL) {
    672       return new CPDF_Null;
    673     }
    674   } else if (m_WordSize == 5) {
    675     if (*(FX_DWORD*)m_WordBuffer == FXDWORD_FALS && m_WordBuffer[4] == 'e') {
    676       return new CPDF_Boolean(FALSE);
    677     }
    678   }
    679   return NULL;
    680 }
    681 void CPDF_StreamParser::GetNextWord(FX_BOOL& bIsNumber) {
    682   m_WordSize = 0;
    683   bIsNumber = TRUE;
    684   if (!PositionIsInBounds())
    685     return;
    686 
    687   int ch = m_pBuf[m_Pos++];
    688   while (1) {
    689     while (PDFCharIsWhitespace(ch)) {
    690       if (!PositionIsInBounds()) {
    691         return;
    692       }
    693       ch = m_pBuf[m_Pos++];
    694     }
    695 
    696     if (ch != '%')
    697       break;
    698 
    699     while (1) {
    700       if (!PositionIsInBounds())
    701         return;
    702       ch = m_pBuf[m_Pos++];
    703       if (PDFCharIsLineEnding(ch))
    704         break;
    705     }
    706   }
    707 
    708   if (PDFCharIsDelimiter(ch)) {
    709     bIsNumber = FALSE;
    710     m_WordBuffer[m_WordSize++] = ch;
    711     if (ch == '/') {
    712       while (1) {
    713         if (!PositionIsInBounds())
    714           return;
    715         ch = m_pBuf[m_Pos++];
    716         if (!PDFCharIsOther(ch) && !PDFCharIsNumeric(ch)) {
    717           m_Pos--;
    718           return;
    719         }
    720 
    721         if (m_WordSize < MAX_WORD_BUFFER)
    722           m_WordBuffer[m_WordSize++] = ch;
    723       }
    724     } else if (ch == '<') {
    725       if (!PositionIsInBounds())
    726         return;
    727       ch = m_pBuf[m_Pos++];
    728       if (ch == '<')
    729         m_WordBuffer[m_WordSize++] = ch;
    730       else
    731         m_Pos--;
    732     } else if (ch == '>') {
    733       if (!PositionIsInBounds())
    734         return;
    735       ch = m_pBuf[m_Pos++];
    736       if (ch == '>')
    737         m_WordBuffer[m_WordSize++] = ch;
    738       else
    739         m_Pos--;
    740     }
    741     return;
    742   }
    743 
    744   while (1) {
    745     if (m_WordSize < MAX_WORD_BUFFER)
    746       m_WordBuffer[m_WordSize++] = ch;
    747     if (!PDFCharIsNumeric(ch))
    748       bIsNumber = FALSE;
    749 
    750     if (!PositionIsInBounds())
    751       return;
    752     ch = m_pBuf[m_Pos++];
    753     if (PDFCharIsDelimiter(ch) || PDFCharIsWhitespace(ch)) {
    754       m_Pos--;
    755       break;
    756     }
    757   }
    758 }
    759 
    760 CFX_ByteString CPDF_StreamParser::ReadString() {
    761   if (!PositionIsInBounds())
    762     return CFX_ByteString();
    763 
    764   int ch = m_pBuf[m_Pos++];
    765   CFX_ByteTextBuf buf;
    766   int parlevel = 0;
    767   int status = 0, iEscCode = 0;
    768   while (1) {
    769     switch (status) {
    770       case 0:
    771         if (ch == ')') {
    772           if (parlevel == 0) {
    773             if (buf.GetLength() > MAX_STRING_LENGTH) {
    774               return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
    775             }
    776             return buf.GetByteString();
    777           }
    778           parlevel--;
    779           buf.AppendChar(')');
    780         } else if (ch == '(') {
    781           parlevel++;
    782           buf.AppendChar('(');
    783         } else if (ch == '\\') {
    784           status = 1;
    785         } else {
    786           buf.AppendChar((char)ch);
    787         }
    788         break;
    789       case 1:
    790         if (ch >= '0' && ch <= '7') {
    791           iEscCode = FXSYS_toDecimalDigit(ch);
    792           status = 2;
    793           break;
    794         }
    795         if (ch == 'n') {
    796           buf.AppendChar('\n');
    797         } else if (ch == 'r') {
    798           buf.AppendChar('\r');
    799         } else if (ch == 't') {
    800           buf.AppendChar('\t');
    801         } else if (ch == 'b') {
    802           buf.AppendChar('\b');
    803         } else if (ch == 'f') {
    804           buf.AppendChar('\f');
    805         } else if (ch == '\r') {
    806           status = 4;
    807           break;
    808         } else if (ch == '\n') {
    809         } else {
    810           buf.AppendChar(ch);
    811         }
    812         status = 0;
    813         break;
    814       case 2:
    815         if (ch >= '0' && ch <= '7') {
    816           iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch);
    817           status = 3;
    818         } else {
    819           buf.AppendChar(iEscCode);
    820           status = 0;
    821           continue;
    822         }
    823         break;
    824       case 3:
    825         if (ch >= '0' && ch <= '7') {
    826           iEscCode = iEscCode * 8 + FXSYS_toDecimalDigit(ch);
    827           buf.AppendChar(iEscCode);
    828           status = 0;
    829         } else {
    830           buf.AppendChar(iEscCode);
    831           status = 0;
    832           continue;
    833         }
    834         break;
    835       case 4:
    836         status = 0;
    837         if (ch != '\n') {
    838           continue;
    839         }
    840         break;
    841     }
    842     if (!PositionIsInBounds())
    843       break;
    844 
    845     ch = m_pBuf[m_Pos++];
    846   }
    847   if (PositionIsInBounds())
    848     ch = m_pBuf[m_Pos++];
    849 
    850   if (buf.GetLength() > MAX_STRING_LENGTH) {
    851     return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
    852   }
    853   return buf.GetByteString();
    854 }
    855 CFX_ByteString CPDF_StreamParser::ReadHexString() {
    856   if (!PositionIsInBounds())
    857     return CFX_ByteString();
    858 
    859   CFX_ByteTextBuf buf;
    860   bool bFirst = true;
    861   int code = 0;
    862   while (PositionIsInBounds()) {
    863     int ch = m_pBuf[m_Pos++];
    864 
    865     if (ch == '>')
    866       break;
    867 
    868     if (!std::isxdigit(ch))
    869       continue;
    870 
    871     int val = FXSYS_toHexDigit(ch);
    872     if (bFirst) {
    873       code = val * 16;
    874     } else {
    875       code += val;
    876       buf.AppendByte((uint8_t)code);
    877     }
    878     bFirst = !bFirst;
    879   }
    880   if (!bFirst)
    881     buf.AppendChar((char)code);
    882 
    883   if (buf.GetLength() > MAX_STRING_LENGTH)
    884     return CFX_ByteString(buf.GetBuffer(), MAX_STRING_LENGTH);
    885 
    886   return buf.GetByteString();
    887 }
    888 
    889 bool CPDF_StreamParser::PositionIsInBounds() const {
    890   return m_Pos < m_Size;
    891 }
    892 
    893 CPDF_ContentParser::CPDF_ContentParser()
    894     : m_Status(Ready),
    895       m_InternalStage(STAGE_GETCONTENT),
    896       m_pObjects(nullptr),
    897       m_bForm(false),
    898       m_pType3Char(nullptr),
    899       m_pData(nullptr),
    900       m_Size(0),
    901       m_CurrentOffset(0) {}
    902 CPDF_ContentParser::~CPDF_ContentParser() {
    903   if (!m_pSingleStream)
    904     FX_Free(m_pData);
    905 }
    906 void CPDF_ContentParser::Start(CPDF_Page* pPage, CPDF_ParseOptions* pOptions) {
    907   if (m_Status != Ready || !pPage || !pPage->m_pDocument ||
    908       !pPage->m_pFormDict) {
    909     m_Status = Done;
    910     return;
    911   }
    912   m_pObjects = pPage;
    913   m_bForm = FALSE;
    914   if (pOptions) {
    915     m_Options = *pOptions;
    916   }
    917   m_Status = ToBeContinued;
    918   m_InternalStage = STAGE_GETCONTENT;
    919   m_CurrentOffset = 0;
    920 
    921   CPDF_Object* pContent = pPage->m_pFormDict->GetElementValue("Contents");
    922   if (!pContent) {
    923     m_Status = Done;
    924     return;
    925   }
    926   if (CPDF_Stream* pStream = pContent->AsStream()) {
    927     m_nStreams = 0;
    928     m_pSingleStream.reset(new CPDF_StreamAcc);
    929     m_pSingleStream->LoadAllData(pStream, FALSE);
    930   } else if (CPDF_Array* pArray = pContent->AsArray()) {
    931     m_nStreams = pArray->GetCount();
    932     if (m_nStreams)
    933       m_StreamArray.resize(m_nStreams);
    934     else
    935       m_Status = Done;
    936   } else {
    937     m_Status = Done;
    938   }
    939 }
    940 void CPDF_ContentParser::Start(CPDF_Form* pForm,
    941                                CPDF_AllStates* pGraphicStates,
    942                                CFX_Matrix* pParentMatrix,
    943                                CPDF_Type3Char* pType3Char,
    944                                CPDF_ParseOptions* pOptions,
    945                                int level) {
    946   m_pType3Char = pType3Char;
    947   m_pObjects = pForm;
    948   m_bForm = TRUE;
    949   CFX_Matrix form_matrix = pForm->m_pFormDict->GetMatrix("Matrix");
    950   if (pGraphicStates) {
    951     form_matrix.Concat(pGraphicStates->m_CTM);
    952   }
    953   CPDF_Array* pBBox = pForm->m_pFormDict->GetArray("BBox");
    954   CFX_FloatRect form_bbox;
    955   CPDF_Path ClipPath;
    956   if (pBBox) {
    957     form_bbox = pBBox->GetRect();
    958     ClipPath.New();
    959     ClipPath.AppendRect(form_bbox.left, form_bbox.bottom, form_bbox.right,
    960                         form_bbox.top);
    961     ClipPath.Transform(&form_matrix);
    962     if (pParentMatrix) {
    963       ClipPath.Transform(pParentMatrix);
    964     }
    965     form_bbox.Transform(&form_matrix);
    966     if (pParentMatrix) {
    967       form_bbox.Transform(pParentMatrix);
    968     }
    969   }
    970   CPDF_Dictionary* pResources = pForm->m_pFormDict->GetDict("Resources");
    971   m_pParser.reset(new CPDF_StreamContentParser(
    972       pForm->m_pDocument, pForm->m_pPageResources, pForm->m_pResources,
    973       pParentMatrix, pForm, pResources, &form_bbox, pOptions, pGraphicStates,
    974       level));
    975   m_pParser->GetCurStates()->m_CTM = form_matrix;
    976   m_pParser->GetCurStates()->m_ParentMatrix = form_matrix;
    977   if (ClipPath.NotNull()) {
    978     m_pParser->GetCurStates()->m_ClipPath.AppendPath(ClipPath, FXFILL_WINDING,
    979                                                      TRUE);
    980   }
    981   if (pForm->m_Transparency & PDFTRANS_GROUP) {
    982     CPDF_GeneralStateData* pData =
    983         m_pParser->GetCurStates()->m_GeneralState.GetModify();
    984     pData->m_BlendType = FXDIB_BLEND_NORMAL;
    985     pData->m_StrokeAlpha = 1.0f;
    986     pData->m_FillAlpha = 1.0f;
    987     pData->m_pSoftMask = NULL;
    988   }
    989   m_nStreams = 0;
    990   m_pSingleStream.reset(new CPDF_StreamAcc);
    991   m_pSingleStream->LoadAllData(pForm->m_pFormStream, FALSE);
    992   m_pData = (uint8_t*)m_pSingleStream->GetData();
    993   m_Size = m_pSingleStream->GetSize();
    994   m_Status = ToBeContinued;
    995   m_InternalStage = STAGE_PARSE;
    996   m_CurrentOffset = 0;
    997 }
    998 void CPDF_ContentParser::Continue(IFX_Pause* pPause) {
    999   int steps = 0;
   1000   while (m_Status == ToBeContinued) {
   1001     if (m_InternalStage == STAGE_GETCONTENT) {
   1002       if (m_CurrentOffset == m_nStreams) {
   1003         if (!m_StreamArray.empty()) {
   1004           FX_SAFE_DWORD safeSize = 0;
   1005           for (const auto& stream : m_StreamArray) {
   1006             safeSize += stream->GetSize();
   1007             safeSize += 1;
   1008           }
   1009           if (!safeSize.IsValid()) {
   1010             m_Status = Done;
   1011             return;
   1012           }
   1013           m_Size = safeSize.ValueOrDie();
   1014           m_pData = FX_Alloc(uint8_t, m_Size);
   1015           FX_DWORD pos = 0;
   1016           for (const auto& stream : m_StreamArray) {
   1017             FXSYS_memcpy(m_pData + pos, stream->GetData(), stream->GetSize());
   1018             pos += stream->GetSize();
   1019             m_pData[pos++] = ' ';
   1020           }
   1021           m_StreamArray.clear();
   1022         } else {
   1023           m_pData = (uint8_t*)m_pSingleStream->GetData();
   1024           m_Size = m_pSingleStream->GetSize();
   1025         }
   1026         m_InternalStage = STAGE_PARSE;
   1027         m_CurrentOffset = 0;
   1028       } else {
   1029         CPDF_Array* pContent = m_pObjects->m_pFormDict->GetArray("Contents");
   1030         m_StreamArray[m_CurrentOffset].reset(new CPDF_StreamAcc);
   1031         CPDF_Stream* pStreamObj = ToStream(
   1032             pContent ? pContent->GetElementValue(m_CurrentOffset) : nullptr);
   1033         m_StreamArray[m_CurrentOffset]->LoadAllData(pStreamObj, FALSE);
   1034         m_CurrentOffset++;
   1035       }
   1036     }
   1037     if (m_InternalStage == STAGE_PARSE) {
   1038       if (!m_pParser) {
   1039         m_pParser.reset(new CPDF_StreamContentParser(
   1040             m_pObjects->m_pDocument, m_pObjects->m_pPageResources, nullptr,
   1041             nullptr, m_pObjects, m_pObjects->m_pResources, &m_pObjects->m_BBox,
   1042             &m_Options, nullptr, 0));
   1043         m_pParser->GetCurStates()->m_ColorState.GetModify()->Default();
   1044       }
   1045       if (m_CurrentOffset >= m_Size) {
   1046         m_InternalStage = STAGE_CHECKCLIP;
   1047       } else {
   1048         m_CurrentOffset +=
   1049             m_pParser->Parse(m_pData + m_CurrentOffset,
   1050                              m_Size - m_CurrentOffset, PARSE_STEP_LIMIT);
   1051       }
   1052     }
   1053     if (m_InternalStage == STAGE_CHECKCLIP) {
   1054       if (m_pType3Char) {
   1055         m_pType3Char->m_bColored = m_pParser->IsColored();
   1056         m_pType3Char->m_Width =
   1057             FXSYS_round(m_pParser->GetType3Data()[0] * 1000);
   1058         m_pType3Char->m_BBox.left =
   1059             FXSYS_round(m_pParser->GetType3Data()[2] * 1000);
   1060         m_pType3Char->m_BBox.bottom =
   1061             FXSYS_round(m_pParser->GetType3Data()[3] * 1000);
   1062         m_pType3Char->m_BBox.right =
   1063             FXSYS_round(m_pParser->GetType3Data()[4] * 1000);
   1064         m_pType3Char->m_BBox.top =
   1065             FXSYS_round(m_pParser->GetType3Data()[5] * 1000);
   1066       }
   1067       FX_POSITION pos = m_pObjects->m_ObjectList.GetHeadPosition();
   1068       while (pos) {
   1069         CPDF_PageObject* pObj =
   1070             (CPDF_PageObject*)m_pObjects->m_ObjectList.GetNext(pos);
   1071         if (pObj->m_ClipPath.IsNull()) {
   1072           continue;
   1073         }
   1074         if (pObj->m_ClipPath.GetPathCount() != 1) {
   1075           continue;
   1076         }
   1077         if (pObj->m_ClipPath.GetTextCount()) {
   1078           continue;
   1079         }
   1080         CPDF_Path ClipPath = pObj->m_ClipPath.GetPath(0);
   1081         if (!ClipPath.IsRect() || pObj->m_Type == PDFPAGE_SHADING) {
   1082           continue;
   1083         }
   1084         CFX_FloatRect old_rect(ClipPath.GetPointX(0), ClipPath.GetPointY(0),
   1085                                ClipPath.GetPointX(2), ClipPath.GetPointY(2));
   1086         CFX_FloatRect obj_rect(pObj->m_Left, pObj->m_Bottom, pObj->m_Right,
   1087                                pObj->m_Top);
   1088         if (old_rect.Contains(obj_rect)) {
   1089           pObj->m_ClipPath.SetNull();
   1090         }
   1091       }
   1092       m_Status = Done;
   1093       return;
   1094     }
   1095     steps++;
   1096     if (pPause && pPause->NeedToPauseNow()) {
   1097       break;
   1098     }
   1099   }
   1100 }
   1101