Home | History | Annotate | Download | only in fpdfsdk
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "public/fpdf_ppo.h"
      8 
      9 #include <map>
     10 #include <memory>
     11 #include <utility>
     12 #include <vector>
     13 
     14 #include "core/fpdfapi/parser/cpdf_array.h"
     15 #include "core/fpdfapi/parser/cpdf_document.h"
     16 #include "core/fpdfapi/parser/cpdf_name.h"
     17 #include "core/fpdfapi/parser/cpdf_number.h"
     18 #include "core/fpdfapi/parser/cpdf_reference.h"
     19 #include "core/fpdfapi/parser/cpdf_stream.h"
     20 #include "core/fpdfapi/parser/cpdf_string.h"
     21 #include "fpdfsdk/fsdk_define.h"
     22 #include "third_party/base/ptr_util.h"
     23 #include "third_party/base/stl_util.h"
     24 
     25 namespace {
     26 
     27 CPDF_Object* PageDictGetInheritableTag(CPDF_Dictionary* pDict,
     28                                        const CFX_ByteString& bsSrcTag) {
     29   if (!pDict || bsSrcTag.IsEmpty())
     30     return nullptr;
     31   if (!pDict->KeyExist("Parent") || !pDict->KeyExist("Type"))
     32     return nullptr;
     33 
     34   CPDF_Object* pType = pDict->GetObjectFor("Type")->GetDirect();
     35   if (!ToName(pType))
     36     return nullptr;
     37   if (pType->GetString().Compare("Page"))
     38     return nullptr;
     39 
     40   CPDF_Dictionary* pp =
     41       ToDictionary(pDict->GetObjectFor("Parent")->GetDirect());
     42   if (!pp)
     43     return nullptr;
     44 
     45   if (pDict->KeyExist(bsSrcTag))
     46     return pDict->GetObjectFor(bsSrcTag);
     47 
     48   while (pp) {
     49     if (pp->KeyExist(bsSrcTag))
     50       return pp->GetObjectFor(bsSrcTag);
     51     if (!pp->KeyExist("Parent"))
     52       break;
     53     pp = ToDictionary(pp->GetObjectFor("Parent")->GetDirect());
     54   }
     55   return nullptr;
     56 }
     57 
     58 bool CopyInheritable(CPDF_Dictionary* pCurPageDict,
     59                      CPDF_Dictionary* pSrcPageDict,
     60                      const CFX_ByteString& key) {
     61   if (pCurPageDict->KeyExist(key))
     62     return true;
     63 
     64   CPDF_Object* pInheritable = PageDictGetInheritableTag(pSrcPageDict, key);
     65   if (!pInheritable)
     66     return false;
     67 
     68   pCurPageDict->SetFor(key, pInheritable->Clone());
     69   return true;
     70 }
     71 
     72 bool ParserPageRangeString(CFX_ByteString rangstring,
     73                            std::vector<uint16_t>* pageArray,
     74                            int nCount) {
     75   if (rangstring.IsEmpty())
     76     return true;
     77 
     78   rangstring.Remove(' ');
     79   int nLength = rangstring.GetLength();
     80   CFX_ByteString cbCompareString("0123456789-,");
     81   for (int i = 0; i < nLength; ++i) {
     82     if (cbCompareString.Find(rangstring[i]) == -1)
     83       return false;
     84   }
     85 
     86   CFX_ByteString cbMidRange;
     87   int nStringFrom = 0;
     88   int nStringTo = 0;
     89   while (nStringTo < nLength) {
     90     nStringTo = rangstring.Find(',', nStringFrom);
     91     if (nStringTo == -1)
     92       nStringTo = nLength;
     93     cbMidRange = rangstring.Mid(nStringFrom, nStringTo - nStringFrom);
     94     int nMid = cbMidRange.Find('-');
     95     if (nMid == -1) {
     96       long lPageNum = atol(cbMidRange.c_str());
     97       if (lPageNum <= 0 || lPageNum > nCount)
     98         return false;
     99       pageArray->push_back((uint16_t)lPageNum);
    100     } else {
    101       int nStartPageNum = atol(cbMidRange.Mid(0, nMid).c_str());
    102       if (nStartPageNum == 0)
    103         return false;
    104 
    105       ++nMid;
    106       int nEnd = cbMidRange.GetLength() - nMid;
    107       if (nEnd == 0)
    108         return false;
    109 
    110       int nEndPageNum = atol(cbMidRange.Mid(nMid, nEnd).c_str());
    111       if (nStartPageNum < 0 || nStartPageNum > nEndPageNum ||
    112           nEndPageNum > nCount) {
    113         return false;
    114       }
    115       for (int i = nStartPageNum; i <= nEndPageNum; ++i) {
    116         pageArray->push_back(i);
    117       }
    118     }
    119     nStringFrom = nStringTo + 1;
    120   }
    121   return true;
    122 }
    123 
    124 }  // namespace
    125 
    126 class CPDF_PageOrganizer {
    127  public:
    128   CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc, CPDF_Document* pSrcPDFDoc);
    129   ~CPDF_PageOrganizer();
    130 
    131   bool PDFDocInit();
    132   bool ExportPage(const std::vector<uint16_t>& pageNums, int nIndex);
    133 
    134  private:
    135   using ObjectNumberMap = std::map<uint32_t, uint32_t>;
    136 
    137   bool UpdateReference(CPDF_Object* pObj, ObjectNumberMap* pObjNumberMap);
    138   uint32_t GetNewObjId(ObjectNumberMap* pObjNumberMap, CPDF_Reference* pRef);
    139 
    140   CPDF_Document* m_pDestPDFDoc;
    141   CPDF_Document* m_pSrcPDFDoc;
    142 };
    143 
    144 CPDF_PageOrganizer::CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc,
    145                                        CPDF_Document* pSrcPDFDoc)
    146     : m_pDestPDFDoc(pDestPDFDoc), m_pSrcPDFDoc(pSrcPDFDoc) {}
    147 
    148 CPDF_PageOrganizer::~CPDF_PageOrganizer() {}
    149 
    150 bool CPDF_PageOrganizer::PDFDocInit() {
    151   ASSERT(m_pDestPDFDoc);
    152   ASSERT(m_pSrcPDFDoc);
    153 
    154   CPDF_Dictionary* pNewRoot = m_pDestPDFDoc->GetRoot();
    155   if (!pNewRoot)
    156     return false;
    157 
    158   CPDF_Dictionary* pDocInfoDict = m_pDestPDFDoc->GetInfo();
    159   if (!pDocInfoDict)
    160     return false;
    161 
    162   pDocInfoDict->SetNewFor<CPDF_String>("Producer", "PDFium", false);
    163 
    164   CFX_ByteString cbRootType = pNewRoot->GetStringFor("Type", "");
    165   if (cbRootType.IsEmpty())
    166     pNewRoot->SetNewFor<CPDF_Name>("Type", "Catalog");
    167 
    168   CPDF_Object* pElement = pNewRoot->GetObjectFor("Pages");
    169   CPDF_Dictionary* pNewPages =
    170       pElement ? ToDictionary(pElement->GetDirect()) : nullptr;
    171   if (!pNewPages) {
    172     pNewPages = m_pDestPDFDoc->NewIndirect<CPDF_Dictionary>();
    173     pNewRoot->SetNewFor<CPDF_Reference>("Pages", m_pDestPDFDoc,
    174                                         pNewPages->GetObjNum());
    175   }
    176 
    177   CFX_ByteString cbPageType = pNewPages->GetStringFor("Type", "");
    178   if (cbPageType.IsEmpty())
    179     pNewPages->SetNewFor<CPDF_Name>("Type", "Pages");
    180 
    181   if (!pNewPages->GetArrayFor("Kids")) {
    182     pNewPages->SetNewFor<CPDF_Number>("Count", 0);
    183     pNewPages->SetNewFor<CPDF_Reference>(
    184         "Kids", m_pDestPDFDoc,
    185         m_pDestPDFDoc->NewIndirect<CPDF_Array>()->GetObjNum());
    186   }
    187 
    188   return true;
    189 }
    190 
    191 bool CPDF_PageOrganizer::ExportPage(const std::vector<uint16_t>& pageNums,
    192                                     int nIndex) {
    193   int curpage = nIndex;
    194   auto pObjNumberMap = pdfium::MakeUnique<ObjectNumberMap>();
    195   int nSize = pdfium::CollectionSize<int>(pageNums);
    196   for (int i = 0; i < nSize; ++i) {
    197     CPDF_Dictionary* pCurPageDict = m_pDestPDFDoc->CreateNewPage(curpage);
    198     CPDF_Dictionary* pSrcPageDict = m_pSrcPDFDoc->GetPage(pageNums[i] - 1);
    199     if (!pSrcPageDict || !pCurPageDict)
    200       return false;
    201 
    202     // Clone the page dictionary
    203     for (const auto& it : *pSrcPageDict) {
    204       const CFX_ByteString& cbSrcKeyStr = it.first;
    205       if (cbSrcKeyStr == "Type" || cbSrcKeyStr == "Parent")
    206         continue;
    207 
    208       CPDF_Object* pObj = it.second.get();
    209       pCurPageDict->SetFor(cbSrcKeyStr, pObj->Clone());
    210     }
    211 
    212     // inheritable item
    213     // 1 MediaBox - required
    214     if (!CopyInheritable(pCurPageDict, pSrcPageDict, "MediaBox")) {
    215       // Search for "CropBox" in the source page dictionary,
    216       // if it does not exists, use the default letter size.
    217       CPDF_Object* pInheritable =
    218           PageDictGetInheritableTag(pSrcPageDict, "CropBox");
    219       if (pInheritable) {
    220         pCurPageDict->SetFor("MediaBox", pInheritable->Clone());
    221       } else {
    222         // Make the default size to be letter size (8.5'x11')
    223         CPDF_Array* pArray = pCurPageDict->SetNewFor<CPDF_Array>("MediaBox");
    224         pArray->AddNew<CPDF_Number>(0);
    225         pArray->AddNew<CPDF_Number>(0);
    226         pArray->AddNew<CPDF_Number>(612);
    227         pArray->AddNew<CPDF_Number>(792);
    228       }
    229     }
    230 
    231     // 2 Resources - required
    232     if (!CopyInheritable(pCurPageDict, pSrcPageDict, "Resources"))
    233       return false;
    234 
    235     // 3 CropBox - optional
    236     CopyInheritable(pCurPageDict, pSrcPageDict, "CropBox");
    237     // 4 Rotate - optional
    238     CopyInheritable(pCurPageDict, pSrcPageDict, "Rotate");
    239 
    240     // Update the reference
    241     uint32_t dwOldPageObj = pSrcPageDict->GetObjNum();
    242     uint32_t dwNewPageObj = pCurPageDict->GetObjNum();
    243     (*pObjNumberMap)[dwOldPageObj] = dwNewPageObj;
    244     UpdateReference(pCurPageDict, pObjNumberMap.get());
    245     ++curpage;
    246   }
    247 
    248   return true;
    249 }
    250 
    251 bool CPDF_PageOrganizer::UpdateReference(CPDF_Object* pObj,
    252                                          ObjectNumberMap* pObjNumberMap) {
    253   switch (pObj->GetType()) {
    254     case CPDF_Object::REFERENCE: {
    255       CPDF_Reference* pReference = pObj->AsReference();
    256       uint32_t newobjnum = GetNewObjId(pObjNumberMap, pReference);
    257       if (newobjnum == 0)
    258         return false;
    259       pReference->SetRef(m_pDestPDFDoc, newobjnum);
    260       break;
    261     }
    262     case CPDF_Object::DICTIONARY: {
    263       CPDF_Dictionary* pDict = pObj->AsDictionary();
    264       auto it = pDict->begin();
    265       while (it != pDict->end()) {
    266         const CFX_ByteString& key = it->first;
    267         CPDF_Object* pNextObj = it->second.get();
    268         ++it;
    269         if (key == "Parent" || key == "Prev" || key == "First")
    270           continue;
    271         if (!pNextObj)
    272           return false;
    273         if (!UpdateReference(pNextObj, pObjNumberMap))
    274           pDict->RemoveFor(key);
    275       }
    276       break;
    277     }
    278     case CPDF_Object::ARRAY: {
    279       CPDF_Array* pArray = pObj->AsArray();
    280       for (size_t i = 0; i < pArray->GetCount(); ++i) {
    281         CPDF_Object* pNextObj = pArray->GetObjectAt(i);
    282         if (!pNextObj)
    283           return false;
    284         if (!UpdateReference(pNextObj, pObjNumberMap))
    285           return false;
    286       }
    287       break;
    288     }
    289     case CPDF_Object::STREAM: {
    290       CPDF_Stream* pStream = pObj->AsStream();
    291       CPDF_Dictionary* pDict = pStream->GetDict();
    292       if (!pDict)
    293         return false;
    294       if (!UpdateReference(pDict, pObjNumberMap))
    295         return false;
    296       break;
    297     }
    298     default:
    299       break;
    300   }
    301 
    302   return true;
    303 }
    304 
    305 uint32_t CPDF_PageOrganizer::GetNewObjId(ObjectNumberMap* pObjNumberMap,
    306                                          CPDF_Reference* pRef) {
    307   if (!pRef)
    308     return 0;
    309 
    310   uint32_t dwObjnum = pRef->GetRefObjNum();
    311   uint32_t dwNewObjNum = 0;
    312   const auto it = pObjNumberMap->find(dwObjnum);
    313   if (it != pObjNumberMap->end())
    314     dwNewObjNum = it->second;
    315   if (dwNewObjNum)
    316     return dwNewObjNum;
    317 
    318   CPDF_Object* pDirect = pRef->GetDirect();
    319   if (!pDirect)
    320     return 0;
    321 
    322   std::unique_ptr<CPDF_Object> pClone = pDirect->Clone();
    323   if (CPDF_Dictionary* pDictClone = pClone->AsDictionary()) {
    324     if (pDictClone->KeyExist("Type")) {
    325       CFX_ByteString strType = pDictClone->GetStringFor("Type");
    326       if (!FXSYS_stricmp(strType.c_str(), "Pages"))
    327         return 4;
    328       if (!FXSYS_stricmp(strType.c_str(), "Page"))
    329         return 0;
    330     }
    331   }
    332   CPDF_Object* pUnownedClone =
    333       m_pDestPDFDoc->AddIndirectObject(std::move(pClone));
    334   dwNewObjNum = pUnownedClone->GetObjNum();
    335   (*pObjNumberMap)[dwObjnum] = dwNewObjNum;
    336   if (!UpdateReference(pUnownedClone, pObjNumberMap))
    337     return 0;
    338 
    339   return dwNewObjNum;
    340 }
    341 
    342 DLLEXPORT FPDF_BOOL STDCALL FPDF_ImportPages(FPDF_DOCUMENT dest_doc,
    343                                              FPDF_DOCUMENT src_doc,
    344                                              FPDF_BYTESTRING pagerange,
    345                                              int index) {
    346   CPDF_Document* pDestDoc = CPDFDocumentFromFPDFDocument(dest_doc);
    347   if (!dest_doc)
    348     return false;
    349 
    350   CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc);
    351   if (!pSrcDoc)
    352     return false;
    353 
    354   std::vector<uint16_t> pageArray;
    355   int nCount = pSrcDoc->GetPageCount();
    356   if (pagerange) {
    357     if (!ParserPageRangeString(pagerange, &pageArray, nCount))
    358       return false;
    359   } else {
    360     for (int i = 1; i <= nCount; ++i) {
    361       pageArray.push_back(i);
    362     }
    363   }
    364 
    365   CPDF_PageOrganizer pageOrg(pDestDoc, pSrcDoc);
    366   return pageOrg.PDFDocInit() && pageOrg.ExportPage(pageArray, index);
    367 }
    368 
    369 DLLEXPORT FPDF_BOOL STDCALL FPDF_CopyViewerPreferences(FPDF_DOCUMENT dest_doc,
    370                                                        FPDF_DOCUMENT src_doc) {
    371   CPDF_Document* pDstDoc = CPDFDocumentFromFPDFDocument(dest_doc);
    372   if (!pDstDoc)
    373     return false;
    374 
    375   CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc);
    376   if (!pSrcDoc)
    377     return false;
    378 
    379   CPDF_Dictionary* pSrcDict = pSrcDoc->GetRoot();
    380   pSrcDict = pSrcDict->GetDictFor("ViewerPreferences");
    381   if (!pSrcDict)
    382     return false;
    383 
    384   CPDF_Dictionary* pDstDict = pDstDoc->GetRoot();
    385   if (!pDstDict)
    386     return false;
    387 
    388   pDstDict->SetFor("ViewerPreferences", pSrcDict->CloneDirectObject());
    389   return true;
    390 }
    391