Home | History | Annotate | Download | only in fpdfsdk
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "public/fpdf_flatten.h"
      8 
      9 #include <algorithm>
     10 #include <memory>
     11 #include <utility>
     12 #include <vector>
     13 
     14 #include "core/fpdfapi/page/cpdf_page.h"
     15 #include "core/fpdfapi/page/cpdf_pageobject.h"
     16 #include "core/fpdfapi/parser/cpdf_array.h"
     17 #include "core/fpdfapi/parser/cpdf_document.h"
     18 #include "core/fpdfapi/parser/cpdf_name.h"
     19 #include "core/fpdfapi/parser/cpdf_number.h"
     20 #include "core/fpdfapi/parser/cpdf_reference.h"
     21 #include "core/fpdfapi/parser/cpdf_stream.h"
     22 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
     23 #include "core/fpdfdoc/cpdf_annot.h"
     24 #include "fpdfsdk/fsdk_define.h"
     25 #include "third_party/base/stl_util.h"
     26 
     27 enum FPDF_TYPE { MAX, MIN };
     28 enum FPDF_VALUE { TOP, LEFT, RIGHT, BOTTOM };
     29 
     30 namespace {
     31 
     32 bool IsValidRect(const CFX_FloatRect& rect, const CFX_FloatRect& rcPage) {
     33   constexpr float kMinSize = 0.000001f;
     34   if (rect.IsEmpty() || rect.Width() < kMinSize || rect.Height() < kMinSize)
     35     return false;
     36 
     37   if (rcPage.IsEmpty())
     38     return true;
     39 
     40   constexpr float kMinBorderSize = 10.000001f;
     41   return rect.left - rcPage.left >= -kMinBorderSize &&
     42          rect.right - rcPage.right <= kMinBorderSize &&
     43          rect.top - rcPage.top <= kMinBorderSize &&
     44          rect.bottom - rcPage.bottom >= -kMinBorderSize;
     45 }
     46 
     47 void GetContentsRect(CPDF_Document* pDoc,
     48                      CPDF_Dictionary* pDict,
     49                      std::vector<CFX_FloatRect>* pRectArray) {
     50   auto pPDFPage = pdfium::MakeUnique<CPDF_Page>(pDoc, pDict, false);
     51   pPDFPage->ParseContent();
     52 
     53   for (const auto& pPageObject : *pPDFPage->GetPageObjectList()) {
     54     CFX_FloatRect rc;
     55     rc.left = pPageObject->m_Left;
     56     rc.right = pPageObject->m_Right;
     57     rc.bottom = pPageObject->m_Bottom;
     58     rc.top = pPageObject->m_Top;
     59     if (IsValidRect(rc, pDict->GetRectFor("MediaBox")))
     60       pRectArray->push_back(rc);
     61   }
     62 }
     63 
     64 void ParserStream(CPDF_Dictionary* pPageDic,
     65                   CPDF_Dictionary* pStream,
     66                   std::vector<CFX_FloatRect>* pRectArray,
     67                   std::vector<CPDF_Dictionary*>* pObjectArray) {
     68   if (!pStream)
     69     return;
     70   CFX_FloatRect rect;
     71   if (pStream->KeyExist("Rect"))
     72     rect = pStream->GetRectFor("Rect");
     73   else if (pStream->KeyExist("BBox"))
     74     rect = pStream->GetRectFor("BBox");
     75 
     76   if (IsValidRect(rect, pPageDic->GetRectFor("MediaBox")))
     77     pRectArray->push_back(rect);
     78 
     79   pObjectArray->push_back(pStream);
     80 }
     81 
     82 int ParserAnnots(CPDF_Document* pSourceDoc,
     83                  CPDF_Dictionary* pPageDic,
     84                  std::vector<CFX_FloatRect>* pRectArray,
     85                  std::vector<CPDF_Dictionary*>* pObjectArray,
     86                  int nUsage) {
     87   if (!pSourceDoc || !pPageDic)
     88     return FLATTEN_FAIL;
     89 
     90   GetContentsRect(pSourceDoc, pPageDic, pRectArray);
     91   CPDF_Array* pAnnots = pPageDic->GetArrayFor("Annots");
     92   if (!pAnnots)
     93     return FLATTEN_NOTHINGTODO;
     94 
     95   for (const auto& pAnnot : *pAnnots) {
     96     CPDF_Dictionary* pAnnotDic = ToDictionary(pAnnot->GetDirect());
     97     if (!pAnnotDic)
     98       continue;
     99 
    100     ByteString sSubtype = pAnnotDic->GetStringFor("Subtype");
    101     if (sSubtype == "Popup")
    102       continue;
    103 
    104     int nAnnotFlag = pAnnotDic->GetIntegerFor("F");
    105     if (nAnnotFlag & ANNOTFLAG_HIDDEN)
    106       continue;
    107 
    108     bool bParseStream;
    109     if (nUsage == FLAT_NORMALDISPLAY)
    110       bParseStream = !(nAnnotFlag & ANNOTFLAG_INVISIBLE);
    111     else
    112       bParseStream = !!(nAnnotFlag & ANNOTFLAG_PRINT);
    113     if (bParseStream)
    114       ParserStream(pPageDic, pAnnotDic, pRectArray, pObjectArray);
    115   }
    116   return FLATTEN_SUCCESS;
    117 }
    118 
    119 float GetMinMaxValue(const std::vector<CFX_FloatRect>& array,
    120                      FPDF_TYPE type,
    121                      FPDF_VALUE value) {
    122   if (array.empty())
    123     return 0.0f;
    124 
    125   size_t nRects = array.size();
    126   std::vector<float> pArray(nRects);
    127   switch (value) {
    128     case LEFT:
    129       for (size_t i = 0; i < nRects; i++)
    130         pArray[i] = array[i].left;
    131       break;
    132     case TOP:
    133       for (size_t i = 0; i < nRects; i++)
    134         pArray[i] = array[i].top;
    135       break;
    136     case RIGHT:
    137       for (size_t i = 0; i < nRects; i++)
    138         pArray[i] = array[i].right;
    139       break;
    140     case BOTTOM:
    141       for (size_t i = 0; i < nRects; i++)
    142         pArray[i] = array[i].bottom;
    143       break;
    144     default:
    145       NOTREACHED();
    146       return 0.0f;
    147   }
    148 
    149   float fRet = pArray[0];
    150   if (type == MAX) {
    151     for (size_t i = 1; i < nRects; i++)
    152       fRet = std::max(fRet, pArray[i]);
    153   } else {
    154     for (size_t i = 1; i < nRects; i++)
    155       fRet = std::min(fRet, pArray[i]);
    156   }
    157   return fRet;
    158 }
    159 
    160 CFX_FloatRect CalculateRect(std::vector<CFX_FloatRect>* pRectArray) {
    161   CFX_FloatRect rcRet;
    162 
    163   rcRet.left = GetMinMaxValue(*pRectArray, MIN, LEFT);
    164   rcRet.top = GetMinMaxValue(*pRectArray, MAX, TOP);
    165   rcRet.right = GetMinMaxValue(*pRectArray, MAX, RIGHT);
    166   rcRet.bottom = GetMinMaxValue(*pRectArray, MIN, BOTTOM);
    167 
    168   return rcRet;
    169 }
    170 
    171 uint32_t NewIndirectContentsStream(const ByteString& key,
    172                                    CPDF_Document* pDocument) {
    173   CPDF_Stream* pNewContents = pDocument->NewIndirect<CPDF_Stream>(
    174       nullptr, 0,
    175       pdfium::MakeUnique<CPDF_Dictionary>(pDocument->GetByteStringPool()));
    176   ByteString sStream =
    177       ByteString::Format("q 1 0 0 1 0 0 cm /%s Do Q", key.c_str());
    178   pNewContents->SetData(sStream.raw_str(), sStream.GetLength());
    179   return pNewContents->GetObjNum();
    180 }
    181 
    182 void SetPageContents(const ByteString& key,
    183                      CPDF_Dictionary* pPage,
    184                      CPDF_Document* pDocument) {
    185   CPDF_Array* pContentsArray = nullptr;
    186   CPDF_Stream* pContentsStream = pPage->GetStreamFor("Contents");
    187   if (!pContentsStream) {
    188     pContentsArray = pPage->GetArrayFor("Contents");
    189     if (!pContentsArray) {
    190       if (!key.IsEmpty()) {
    191         pPage->SetNewFor<CPDF_Reference>(
    192             "Contents", pDocument, NewIndirectContentsStream(key, pDocument));
    193       }
    194       return;
    195     }
    196   }
    197   pPage->ConvertToIndirectObjectFor("Contents", pDocument);
    198   if (!pContentsArray) {
    199     pContentsArray = pDocument->NewIndirect<CPDF_Array>();
    200     auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pContentsStream);
    201     pAcc->LoadAllDataFiltered();
    202     ByteString sStream = "q\n";
    203     ByteString sBody = ByteString(pAcc->GetData(), pAcc->GetSize());
    204     sStream = sStream + sBody + "\nQ";
    205     pContentsStream->SetDataAndRemoveFilter(sStream.raw_str(),
    206                                             sStream.GetLength());
    207     pContentsArray->AddNew<CPDF_Reference>(pDocument,
    208                                            pContentsStream->GetObjNum());
    209     pPage->SetNewFor<CPDF_Reference>("Contents", pDocument,
    210                                      pContentsArray->GetObjNum());
    211   }
    212   if (!key.IsEmpty()) {
    213     pContentsArray->AddNew<CPDF_Reference>(
    214         pDocument, NewIndirectContentsStream(key, pDocument));
    215   }
    216 }
    217 
    218 CFX_Matrix GetMatrix(CFX_FloatRect rcAnnot,
    219                      CFX_FloatRect rcStream,
    220                      const CFX_Matrix& matrix) {
    221   if (rcStream.IsEmpty())
    222     return CFX_Matrix();
    223 
    224   rcStream = matrix.TransformRect(rcStream);
    225   rcStream.Normalize();
    226 
    227   float a = rcAnnot.Width() / rcStream.Width();
    228   float d = rcAnnot.Height() / rcStream.Height();
    229 
    230   float e = rcAnnot.left - rcStream.left * a;
    231   float f = rcAnnot.bottom - rcStream.bottom * d;
    232   return CFX_Matrix(a, 0, 0, d, e, f);
    233 }
    234 
    235 }  // namespace
    236 
    237 FPDF_EXPORT int FPDF_CALLCONV FPDFPage_Flatten(FPDF_PAGE page, int nFlag) {
    238   CPDF_Page* pPage = CPDFPageFromFPDFPage(page);
    239   if (!page)
    240     return FLATTEN_FAIL;
    241 
    242   CPDF_Document* pDocument = pPage->m_pDocument.Get();
    243   CPDF_Dictionary* pPageDict = pPage->m_pFormDict.Get();
    244   if (!pDocument || !pPageDict)
    245     return FLATTEN_FAIL;
    246 
    247   std::vector<CPDF_Dictionary*> ObjectArray;
    248   std::vector<CFX_FloatRect> RectArray;
    249   int iRet =
    250       ParserAnnots(pDocument, pPageDict, &RectArray, &ObjectArray, nFlag);
    251   if (iRet == FLATTEN_NOTHINGTODO || iRet == FLATTEN_FAIL)
    252     return iRet;
    253 
    254   CFX_FloatRect rcOriginalCB;
    255   CFX_FloatRect rcMerger = CalculateRect(&RectArray);
    256   CFX_FloatRect rcOriginalMB = pPageDict->GetRectFor("MediaBox");
    257   if (pPageDict->KeyExist("CropBox"))
    258     rcOriginalMB = pPageDict->GetRectFor("CropBox");
    259 
    260   if (rcOriginalMB.IsEmpty())
    261     rcOriginalMB = CFX_FloatRect(0.0f, 0.0f, 612.0f, 792.0f);
    262 
    263   rcMerger.left = std::max(rcMerger.left, rcOriginalMB.left);
    264   rcMerger.right = std::min(rcMerger.right, rcOriginalMB.right);
    265   rcMerger.bottom = std::max(rcMerger.bottom, rcOriginalMB.bottom);
    266   rcMerger.top = std::min(rcMerger.top, rcOriginalMB.top);
    267   if (pPageDict->KeyExist("ArtBox"))
    268     rcOriginalCB = pPageDict->GetRectFor("ArtBox");
    269   else
    270     rcOriginalCB = rcOriginalMB;
    271 
    272   if (!rcOriginalMB.IsEmpty()) {
    273     CPDF_Array* pMediaBox = pPageDict->SetNewFor<CPDF_Array>("MediaBox");
    274     pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.left);
    275     pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.bottom);
    276     pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.right);
    277     pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.top);
    278   }
    279 
    280   if (!rcOriginalCB.IsEmpty()) {
    281     CPDF_Array* pCropBox = pPageDict->SetNewFor<CPDF_Array>("ArtBox");
    282     pCropBox->AddNew<CPDF_Number>(rcOriginalCB.left);
    283     pCropBox->AddNew<CPDF_Number>(rcOriginalCB.bottom);
    284     pCropBox->AddNew<CPDF_Number>(rcOriginalCB.right);
    285     pCropBox->AddNew<CPDF_Number>(rcOriginalCB.top);
    286   }
    287 
    288   CPDF_Dictionary* pRes = pPageDict->GetDictFor("Resources");
    289   if (!pRes)
    290     pRes = pPageDict->SetNewFor<CPDF_Dictionary>("Resources");
    291 
    292   CPDF_Stream* pNewXObject = pDocument->NewIndirect<CPDF_Stream>(
    293       nullptr, 0,
    294       pdfium::MakeUnique<CPDF_Dictionary>(pDocument->GetByteStringPool()));
    295 
    296   uint32_t dwObjNum = pNewXObject->GetObjNum();
    297   CPDF_Dictionary* pPageXObject = pRes->GetDictFor("XObject");
    298   if (!pPageXObject)
    299     pPageXObject = pRes->SetNewFor<CPDF_Dictionary>("XObject");
    300 
    301   ByteString key;
    302   if (!ObjectArray.empty()) {
    303     int i = 0;
    304     while (i < INT_MAX) {
    305       ByteString sKey = ByteString::Format("FFT%d", i);
    306       if (!pPageXObject->KeyExist(sKey)) {
    307         key = sKey;
    308         break;
    309       }
    310       ++i;
    311     }
    312   }
    313 
    314   SetPageContents(key, pPageDict, pDocument);
    315 
    316   CPDF_Dictionary* pNewXORes = nullptr;
    317   if (!key.IsEmpty()) {
    318     pPageXObject->SetNewFor<CPDF_Reference>(key, pDocument, dwObjNum);
    319     CPDF_Dictionary* pNewOXbjectDic = pNewXObject->GetDict();
    320     pNewXORes = pNewOXbjectDic->SetNewFor<CPDF_Dictionary>("Resources");
    321     pNewOXbjectDic->SetNewFor<CPDF_Name>("Type", "XObject");
    322     pNewOXbjectDic->SetNewFor<CPDF_Name>("Subtype", "Form");
    323     pNewOXbjectDic->SetNewFor<CPDF_Number>("FormType", 1);
    324     CFX_FloatRect rcBBox = pPageDict->GetRectFor("ArtBox");
    325     pNewOXbjectDic->SetRectFor("BBox", rcBBox);
    326   }
    327 
    328   for (size_t i = 0; i < ObjectArray.size(); ++i) {
    329     CPDF_Dictionary* pAnnotDic = ObjectArray[i];
    330     if (!pAnnotDic)
    331       continue;
    332 
    333     CFX_FloatRect rcAnnot = pAnnotDic->GetRectFor("Rect");
    334     rcAnnot.Normalize();
    335 
    336     ByteString sAnnotState = pAnnotDic->GetStringFor("AS");
    337     CPDF_Dictionary* pAnnotAP = pAnnotDic->GetDictFor("AP");
    338     if (!pAnnotAP)
    339       continue;
    340 
    341     CPDF_Stream* pAPStream = pAnnotAP->GetStreamFor("N");
    342     if (!pAPStream) {
    343       CPDF_Dictionary* pAPDic = pAnnotAP->GetDictFor("N");
    344       if (!pAPDic)
    345         continue;
    346 
    347       if (!sAnnotState.IsEmpty()) {
    348         pAPStream = pAPDic->GetStreamFor(sAnnotState);
    349       } else {
    350         if (pAPDic->GetCount() > 0) {
    351           CPDF_Object* pFirstObj = pAPDic->begin()->second.get();
    352           if (pFirstObj) {
    353             if (pFirstObj->IsReference())
    354               pFirstObj = pFirstObj->GetDirect();
    355             if (!pFirstObj->IsStream())
    356               continue;
    357             pAPStream = pFirstObj->AsStream();
    358           }
    359         }
    360       }
    361     }
    362     if (!pAPStream)
    363       continue;
    364 
    365     CPDF_Dictionary* pAPDic = pAPStream->GetDict();
    366     CFX_FloatRect rcStream;
    367     if (pAPDic->KeyExist("Rect"))
    368       rcStream = pAPDic->GetRectFor("Rect");
    369     else if (pAPDic->KeyExist("BBox"))
    370       rcStream = pAPDic->GetRectFor("BBox");
    371 
    372     if (rcStream.IsEmpty())
    373       continue;
    374 
    375     CPDF_Object* pObj = pAPStream;
    376     if (pObj->IsInline()) {
    377       std::unique_ptr<CPDF_Object> pNew = pObj->Clone();
    378       pObj = pNew.get();
    379       pDocument->AddIndirectObject(std::move(pNew));
    380     }
    381 
    382     CPDF_Dictionary* pObjDic = pObj->GetDict();
    383     if (pObjDic) {
    384       pObjDic->SetNewFor<CPDF_Name>("Type", "XObject");
    385       pObjDic->SetNewFor<CPDF_Name>("Subtype", "Form");
    386     }
    387 
    388     CPDF_Dictionary* pXObject = pNewXORes->GetDictFor("XObject");
    389     if (!pXObject)
    390       pXObject = pNewXORes->SetNewFor<CPDF_Dictionary>("XObject");
    391 
    392     ByteString sFormName = ByteString::Format("F%d", i);
    393     pXObject->SetNewFor<CPDF_Reference>(sFormName, pDocument,
    394                                         pObj->GetObjNum());
    395 
    396     auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pNewXObject);
    397     pAcc->LoadAllDataFiltered();
    398     ByteString sStream(pAcc->GetData(), pAcc->GetSize());
    399     CFX_Matrix matrix = pAPDic->GetMatrixFor("Matrix");
    400     CFX_Matrix m = GetMatrix(rcAnnot, rcStream, matrix);
    401     sStream += ByteString::Format("q %f 0 0 %f %f %f cm /%s Do Q\n", m.a, m.d,
    402                                   m.e, m.f, sFormName.c_str());
    403     pNewXObject->SetDataAndRemoveFilter(sStream.raw_str(), sStream.GetLength());
    404   }
    405   pPageDict->RemoveFor("Annots");
    406   return FLATTEN_SUCCESS;
    407 }
    408