Home | History | Annotate | Download | only in fpdfsdk
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "public/fpdf_flatten.h"
      8 
      9 #include <algorithm>
     10 #include <memory>
     11 #include <utility>
     12 #include <vector>
     13 
     14 #include "core/fpdfapi/page/cpdf_page.h"
     15 #include "core/fpdfapi/page/cpdf_pageobject.h"
     16 #include "core/fpdfapi/parser/cpdf_array.h"
     17 #include "core/fpdfapi/parser/cpdf_document.h"
     18 #include "core/fpdfapi/parser/cpdf_name.h"
     19 #include "core/fpdfapi/parser/cpdf_number.h"
     20 #include "core/fpdfapi/parser/cpdf_reference.h"
     21 #include "core/fpdfapi/parser/cpdf_stream.h"
     22 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
     23 #include "core/fpdfdoc/cpdf_annot.h"
     24 #include "fpdfsdk/fsdk_define.h"
     25 #include "third_party/base/stl_util.h"
     26 
     27 enum FPDF_TYPE { MAX, MIN };
     28 enum FPDF_VALUE { TOP, LEFT, RIGHT, BOTTOM };
     29 
     30 namespace {
     31 
     32 bool IsValiableRect(CFX_FloatRect rect, CFX_FloatRect rcPage) {
     33   if (rect.left - rect.right > 0.000001f || rect.bottom - rect.top > 0.000001f)
     34     return false;
     35 
     36   if (rect.left == 0.0f && rect.top == 0.0f && rect.right == 0.0f &&
     37       rect.bottom == 0.0f)
     38     return false;
     39 
     40   if (!rcPage.IsEmpty()) {
     41     if (rect.left - rcPage.left < -10.000001f ||
     42         rect.right - rcPage.right > 10.000001f ||
     43         rect.top - rcPage.top > 10.000001f ||
     44         rect.bottom - rcPage.bottom < -10.000001f)
     45       return false;
     46   }
     47 
     48   return true;
     49 }
     50 
     51 void GetContentsRect(CPDF_Document* pDoc,
     52                      CPDF_Dictionary* pDict,
     53                      std::vector<CFX_FloatRect>* pRectArray) {
     54   std::unique_ptr<CPDF_Page> pPDFPage(new CPDF_Page(pDoc, pDict, false));
     55   pPDFPage->ParseContent();
     56 
     57   for (const auto& pPageObject : *pPDFPage->GetPageObjectList()) {
     58     CFX_FloatRect rc;
     59     rc.left = pPageObject->m_Left;
     60     rc.right = pPageObject->m_Right;
     61     rc.bottom = pPageObject->m_Bottom;
     62     rc.top = pPageObject->m_Top;
     63     if (IsValiableRect(rc, pDict->GetRectFor("MediaBox")))
     64       pRectArray->push_back(rc);
     65   }
     66 }
     67 
     68 void ParserStream(CPDF_Dictionary* pPageDic,
     69                   CPDF_Dictionary* pStream,
     70                   std::vector<CFX_FloatRect>* pRectArray,
     71                   std::vector<CPDF_Dictionary*>* pObjectArray) {
     72   if (!pStream)
     73     return;
     74   CFX_FloatRect rect;
     75   if (pStream->KeyExist("Rect"))
     76     rect = pStream->GetRectFor("Rect");
     77   else if (pStream->KeyExist("BBox"))
     78     rect = pStream->GetRectFor("BBox");
     79 
     80   if (IsValiableRect(rect, pPageDic->GetRectFor("MediaBox")))
     81     pRectArray->push_back(rect);
     82 
     83   pObjectArray->push_back(pStream);
     84 }
     85 
     86 int ParserAnnots(CPDF_Document* pSourceDoc,
     87                  CPDF_Dictionary* pPageDic,
     88                  std::vector<CFX_FloatRect>* pRectArray,
     89                  std::vector<CPDF_Dictionary*>* pObjectArray,
     90                  int nUsage) {
     91   if (!pSourceDoc || !pPageDic)
     92     return FLATTEN_FAIL;
     93 
     94   GetContentsRect(pSourceDoc, pPageDic, pRectArray);
     95   CPDF_Array* pAnnots = pPageDic->GetArrayFor("Annots");
     96   if (!pAnnots)
     97     return FLATTEN_NOTHINGTODO;
     98 
     99   uint32_t dwSize = pAnnots->GetCount();
    100   for (int i = 0; i < (int)dwSize; i++) {
    101     CPDF_Dictionary* pAnnotDic = ToDictionary(pAnnots->GetDirectObjectAt(i));
    102     if (!pAnnotDic)
    103       continue;
    104 
    105     CFX_ByteString sSubtype = pAnnotDic->GetStringFor("Subtype");
    106     if (sSubtype == "Popup")
    107       continue;
    108 
    109     int nAnnotFlag = pAnnotDic->GetIntegerFor("F");
    110     if (nAnnotFlag & ANNOTFLAG_HIDDEN)
    111       continue;
    112 
    113     if (nUsage == FLAT_NORMALDISPLAY) {
    114       if (nAnnotFlag & ANNOTFLAG_INVISIBLE)
    115         continue;
    116 
    117       ParserStream(pPageDic, pAnnotDic, pRectArray, pObjectArray);
    118     } else {
    119       if (nAnnotFlag & ANNOTFLAG_PRINT)
    120         ParserStream(pPageDic, pAnnotDic, pRectArray, pObjectArray);
    121     }
    122   }
    123   return FLATTEN_SUCCESS;
    124 }
    125 
    126 FX_FLOAT GetMinMaxValue(const std::vector<CFX_FloatRect>& array,
    127                         FPDF_TYPE type,
    128                         FPDF_VALUE value) {
    129   size_t nRects = array.size();
    130   if (nRects <= 0)
    131     return 0.0f;
    132 
    133   std::vector<FX_FLOAT> pArray(nRects);
    134   switch (value) {
    135     case LEFT:
    136       for (size_t i = 0; i < nRects; i++)
    137         pArray[i] = array[i].left;
    138       break;
    139     case TOP:
    140       for (size_t i = 0; i < nRects; i++)
    141         pArray[i] = array[i].top;
    142       break;
    143     case RIGHT:
    144       for (size_t i = 0; i < nRects; i++)
    145         pArray[i] = array[i].right;
    146       break;
    147     case BOTTOM:
    148       for (size_t i = 0; i < nRects; i++)
    149         pArray[i] = array[i].bottom;
    150       break;
    151     default:
    152       // Not reachable.
    153       return 0.0f;
    154   }
    155 
    156   FX_FLOAT fRet = pArray[0];
    157   if (type == MAX) {
    158     for (size_t i = 1; i < nRects; i++)
    159       fRet = std::max(fRet, pArray[i]);
    160   } else {
    161     for (size_t i = 1; i < nRects; i++)
    162       fRet = std::min(fRet, pArray[i]);
    163   }
    164   return fRet;
    165 }
    166 
    167 CFX_FloatRect CalculateRect(std::vector<CFX_FloatRect>* pRectArray) {
    168   CFX_FloatRect rcRet;
    169 
    170   rcRet.left = GetMinMaxValue(*pRectArray, MIN, LEFT);
    171   rcRet.top = GetMinMaxValue(*pRectArray, MAX, TOP);
    172   rcRet.right = GetMinMaxValue(*pRectArray, MAX, RIGHT);
    173   rcRet.bottom = GetMinMaxValue(*pRectArray, MIN, BOTTOM);
    174 
    175   return rcRet;
    176 }
    177 
    178 uint32_t NewIndirectContentsStream(const CFX_ByteString& key,
    179                                    CPDF_Document* pDocument) {
    180   CPDF_Stream* pNewContents = pDocument->NewIndirect<CPDF_Stream>(
    181       nullptr, 0,
    182       pdfium::MakeUnique<CPDF_Dictionary>(pDocument->GetByteStringPool()));
    183   CFX_ByteString sStream;
    184   sStream.Format("q 1 0 0 1 0 0 cm /%s Do Q", key.c_str());
    185   pNewContents->SetData(sStream.raw_str(), sStream.GetLength());
    186   return pNewContents->GetObjNum();
    187 }
    188 
    189 void SetPageContents(const CFX_ByteString& key,
    190                      CPDF_Dictionary* pPage,
    191                      CPDF_Document* pDocument) {
    192   CPDF_Array* pContentsArray = nullptr;
    193   CPDF_Stream* pContentsStream = pPage->GetStreamFor("Contents");
    194   if (!pContentsStream) {
    195     pContentsArray = pPage->GetArrayFor("Contents");
    196     if (!pContentsArray) {
    197       if (!key.IsEmpty()) {
    198         pPage->SetNewFor<CPDF_Reference>(
    199             "Contents", pDocument, NewIndirectContentsStream(key, pDocument));
    200       }
    201       return;
    202     }
    203   }
    204   pPage->ConvertToIndirectObjectFor("Contents", pDocument);
    205   if (!pContentsArray) {
    206     pContentsArray = pDocument->NewIndirect<CPDF_Array>();
    207     CPDF_StreamAcc acc;
    208     acc.LoadAllData(pContentsStream);
    209     CFX_ByteString sStream = "q\n";
    210     CFX_ByteString sBody =
    211         CFX_ByteString((const FX_CHAR*)acc.GetData(), acc.GetSize());
    212     sStream = sStream + sBody + "\nQ";
    213     pContentsStream->SetData(sStream.raw_str(), sStream.GetLength());
    214     pContentsArray->AddNew<CPDF_Reference>(pDocument,
    215                                            pContentsStream->GetObjNum());
    216     pPage->SetNewFor<CPDF_Reference>("Contents", pDocument,
    217                                      pContentsArray->GetObjNum());
    218   }
    219   if (!key.IsEmpty()) {
    220     pContentsArray->AddNew<CPDF_Reference>(
    221         pDocument, NewIndirectContentsStream(key, pDocument));
    222   }
    223 }
    224 
    225 CFX_Matrix GetMatrix(CFX_FloatRect rcAnnot,
    226                      CFX_FloatRect rcStream,
    227                      const CFX_Matrix& matrix) {
    228   if (rcStream.IsEmpty())
    229     return CFX_Matrix();
    230 
    231   matrix.TransformRect(rcStream);
    232   rcStream.Normalize();
    233 
    234   FX_FLOAT a = rcAnnot.Width() / rcStream.Width();
    235   FX_FLOAT d = rcAnnot.Height() / rcStream.Height();
    236 
    237   FX_FLOAT e = rcAnnot.left - rcStream.left * a;
    238   FX_FLOAT f = rcAnnot.bottom - rcStream.bottom * d;
    239   return CFX_Matrix(a, 0, 0, d, e, f);
    240 }
    241 
    242 }  // namespace
    243 
    244 DLLEXPORT int STDCALL FPDFPage_Flatten(FPDF_PAGE page, int nFlag) {
    245   CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
    246   if (!page)
    247     return FLATTEN_FAIL;
    248 
    249   CPDF_Document* pDocument = pPage->m_pDocument;
    250   CPDF_Dictionary* pPageDict = pPage->m_pFormDict;
    251   if (!pDocument || !pPageDict)
    252     return FLATTEN_FAIL;
    253 
    254   std::vector<CPDF_Dictionary*> ObjectArray;
    255   std::vector<CFX_FloatRect> RectArray;
    256   int iRet =
    257       ParserAnnots(pDocument, pPageDict, &RectArray, &ObjectArray, nFlag);
    258   if (iRet == FLATTEN_NOTHINGTODO || iRet == FLATTEN_FAIL)
    259     return iRet;
    260 
    261   CFX_FloatRect rcOriginalCB;
    262   CFX_FloatRect rcMerger = CalculateRect(&RectArray);
    263   CFX_FloatRect rcOriginalMB = pPageDict->GetRectFor("MediaBox");
    264   if (pPageDict->KeyExist("CropBox"))
    265     rcOriginalMB = pPageDict->GetRectFor("CropBox");
    266 
    267   if (rcOriginalMB.IsEmpty())
    268     rcOriginalMB = CFX_FloatRect(0.0f, 0.0f, 612.0f, 792.0f);
    269 
    270   rcMerger.left = std::max(rcMerger.left, rcOriginalMB.left);
    271   rcMerger.right = std::min(rcMerger.right, rcOriginalMB.right);
    272   rcMerger.bottom = std::max(rcMerger.bottom, rcOriginalMB.bottom);
    273   rcMerger.top = std::min(rcMerger.top, rcOriginalMB.top);
    274   if (pPageDict->KeyExist("ArtBox"))
    275     rcOriginalCB = pPageDict->GetRectFor("ArtBox");
    276   else
    277     rcOriginalCB = rcOriginalMB;
    278 
    279   if (!rcOriginalMB.IsEmpty()) {
    280     CPDF_Array* pMediaBox = pPageDict->SetNewFor<CPDF_Array>("MediaBox");
    281     pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.left);
    282     pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.bottom);
    283     pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.right);
    284     pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.top);
    285   }
    286 
    287   if (!rcOriginalCB.IsEmpty()) {
    288     CPDF_Array* pCropBox = pPageDict->SetNewFor<CPDF_Array>("ArtBox");
    289     pCropBox->AddNew<CPDF_Number>(rcOriginalCB.left);
    290     pCropBox->AddNew<CPDF_Number>(rcOriginalCB.bottom);
    291     pCropBox->AddNew<CPDF_Number>(rcOriginalCB.right);
    292     pCropBox->AddNew<CPDF_Number>(rcOriginalCB.top);
    293   }
    294 
    295   CPDF_Dictionary* pRes = pPageDict->GetDictFor("Resources");
    296   if (!pRes)
    297     pRes = pPageDict->SetNewFor<CPDF_Dictionary>("Resources");
    298 
    299   CPDF_Stream* pNewXObject = pDocument->NewIndirect<CPDF_Stream>(
    300       nullptr, 0,
    301       pdfium::MakeUnique<CPDF_Dictionary>(pDocument->GetByteStringPool()));
    302 
    303   uint32_t dwObjNum = pNewXObject->GetObjNum();
    304   CPDF_Dictionary* pPageXObject = pRes->GetDictFor("XObject");
    305   if (!pPageXObject)
    306     pPageXObject = pRes->SetNewFor<CPDF_Dictionary>("XObject");
    307 
    308   CFX_ByteString key = "";
    309   int nStreams = pdfium::CollectionSize<int>(ObjectArray);
    310   if (nStreams > 0) {
    311     for (int iKey = 0; /*iKey < 100*/; iKey++) {
    312       char sExtend[5] = {};
    313       FXSYS_itoa(iKey, sExtend, 10);
    314       key = CFX_ByteString("FFT") + CFX_ByteString(sExtend);
    315       if (!pPageXObject->KeyExist(key))
    316         break;
    317     }
    318   }
    319 
    320   SetPageContents(key, pPageDict, pDocument);
    321 
    322   CPDF_Dictionary* pNewXORes = nullptr;
    323   if (!key.IsEmpty()) {
    324     pPageXObject->SetNewFor<CPDF_Reference>(key, pDocument, dwObjNum);
    325     CPDF_Dictionary* pNewOXbjectDic = pNewXObject->GetDict();
    326     pNewXORes = pNewOXbjectDic->SetNewFor<CPDF_Dictionary>("Resources");
    327     pNewOXbjectDic->SetNewFor<CPDF_Name>("Type", "XObject");
    328     pNewOXbjectDic->SetNewFor<CPDF_Name>("Subtype", "Form");
    329     pNewOXbjectDic->SetNewFor<CPDF_Number>("FormType", 1);
    330     pNewOXbjectDic->SetNewFor<CPDF_Name>("Name", "FRM");
    331     CFX_FloatRect rcBBox = pPageDict->GetRectFor("ArtBox");
    332     pNewOXbjectDic->SetRectFor("BBox", rcBBox);
    333   }
    334 
    335   for (int i = 0; i < nStreams; i++) {
    336     CPDF_Dictionary* pAnnotDic = ObjectArray[i];
    337     if (!pAnnotDic)
    338       continue;
    339 
    340     CFX_FloatRect rcAnnot = pAnnotDic->GetRectFor("Rect");
    341     rcAnnot.Normalize();
    342 
    343     CFX_ByteString sAnnotState = pAnnotDic->GetStringFor("AS");
    344     CPDF_Dictionary* pAnnotAP = pAnnotDic->GetDictFor("AP");
    345     if (!pAnnotAP)
    346       continue;
    347 
    348     CPDF_Stream* pAPStream = pAnnotAP->GetStreamFor("N");
    349     if (!pAPStream) {
    350       CPDF_Dictionary* pAPDic = pAnnotAP->GetDictFor("N");
    351       if (!pAPDic)
    352         continue;
    353 
    354       if (!sAnnotState.IsEmpty()) {
    355         pAPStream = pAPDic->GetStreamFor(sAnnotState);
    356       } else {
    357         auto it = pAPDic->begin();
    358         if (it != pAPDic->end()) {
    359           CPDF_Object* pFirstObj = it->second.get();
    360           if (pFirstObj) {
    361             if (pFirstObj->IsReference())
    362               pFirstObj = pFirstObj->GetDirect();
    363             if (!pFirstObj->IsStream())
    364               continue;
    365             pAPStream = pFirstObj->AsStream();
    366           }
    367         }
    368       }
    369     }
    370     if (!pAPStream)
    371       continue;
    372 
    373     CPDF_Dictionary* pAPDic = pAPStream->GetDict();
    374     CFX_FloatRect rcStream;
    375     if (pAPDic->KeyExist("Rect"))
    376       rcStream = pAPDic->GetRectFor("Rect");
    377     else if (pAPDic->KeyExist("BBox"))
    378       rcStream = pAPDic->GetRectFor("BBox");
    379 
    380     if (rcStream.IsEmpty())
    381       continue;
    382 
    383     CPDF_Object* pObj = pAPStream;
    384     if (pObj->IsInline()) {
    385       std::unique_ptr<CPDF_Object> pNew = pObj->Clone();
    386       pObj = pNew.get();
    387       pDocument->AddIndirectObject(std::move(pNew));
    388     }
    389 
    390     CPDF_Dictionary* pObjDic = pObj->GetDict();
    391     if (pObjDic) {
    392       pObjDic->SetNewFor<CPDF_Name>("Type", "XObject");
    393       pObjDic->SetNewFor<CPDF_Name>("Subtype", "Form");
    394     }
    395 
    396     CPDF_Dictionary* pXObject = pNewXORes->GetDictFor("XObject");
    397     if (!pXObject)
    398       pXObject = pNewXORes->SetNewFor<CPDF_Dictionary>("XObject");
    399 
    400     CFX_ByteString sFormName;
    401     sFormName.Format("F%d", i);
    402     pXObject->SetNewFor<CPDF_Reference>(sFormName, pDocument,
    403                                         pObj->GetObjNum());
    404 
    405     CPDF_StreamAcc acc;
    406     acc.LoadAllData(pNewXObject);
    407 
    408     const uint8_t* pData = acc.GetData();
    409     CFX_ByteString sStream(pData, acc.GetSize());
    410     CFX_Matrix matrix = pAPDic->GetMatrixFor("Matrix");
    411     if (matrix.IsIdentity()) {
    412       matrix.a = 1.0f;
    413       matrix.b = 0.0f;
    414       matrix.c = 0.0f;
    415       matrix.d = 1.0f;
    416       matrix.e = 0.0f;
    417       matrix.f = 0.0f;
    418     }
    419 
    420     CFX_ByteString sTemp;
    421     CFX_Matrix m = GetMatrix(rcAnnot, rcStream, matrix);
    422     sTemp.Format("q %f 0 0 %f %f %f cm /%s Do Q\n", m.a, m.d, m.e, m.f,
    423                  sFormName.c_str());
    424     sStream += sTemp;
    425     pNewXObject->SetData(sStream.raw_str(), sStream.GetLength());
    426   }
    427   pPageDict->RemoveFor("Annots");
    428   return FLATTEN_SUCCESS;
    429 }
    430