Home | History | Annotate | Download | only in fpdfsdk
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "public/fpdf_ppo.h"
      8 
      9 #include <map>
     10 #include <memory>
     11 #include <utility>
     12 #include <vector>
     13 
     14 #include "core/fpdfapi/parser/cpdf_array.h"
     15 #include "core/fpdfapi/parser/cpdf_document.h"
     16 #include "core/fpdfapi/parser/cpdf_name.h"
     17 #include "core/fpdfapi/parser/cpdf_number.h"
     18 #include "core/fpdfapi/parser/cpdf_reference.h"
     19 #include "core/fpdfapi/parser/cpdf_stream.h"
     20 #include "core/fpdfapi/parser/cpdf_string.h"
     21 #include "core/fxcrt/unowned_ptr.h"
     22 #include "fpdfsdk/fsdk_define.h"
     23 #include "third_party/base/ptr_util.h"
     24 
     25 namespace {
     26 
     27 CPDF_Object* PageDictGetInheritableTag(CPDF_Dictionary* pDict,
     28                                        const ByteString& bsSrcTag) {
     29   if (!pDict || bsSrcTag.IsEmpty())
     30     return nullptr;
     31   if (!pDict->KeyExist("Parent") || !pDict->KeyExist("Type"))
     32     return nullptr;
     33 
     34   CPDF_Object* pType = pDict->GetObjectFor("Type")->GetDirect();
     35   if (!ToName(pType))
     36     return nullptr;
     37   if (pType->GetString().Compare("Page"))
     38     return nullptr;
     39 
     40   CPDF_Dictionary* pp =
     41       ToDictionary(pDict->GetObjectFor("Parent")->GetDirect());
     42   if (!pp)
     43     return nullptr;
     44 
     45   if (pDict->KeyExist(bsSrcTag))
     46     return pDict->GetObjectFor(bsSrcTag);
     47 
     48   while (pp) {
     49     if (pp->KeyExist(bsSrcTag))
     50       return pp->GetObjectFor(bsSrcTag);
     51     if (!pp->KeyExist("Parent"))
     52       break;
     53     pp = ToDictionary(pp->GetObjectFor("Parent")->GetDirect());
     54   }
     55   return nullptr;
     56 }
     57 
     58 bool CopyInheritable(CPDF_Dictionary* pCurPageDict,
     59                      CPDF_Dictionary* pSrcPageDict,
     60                      const ByteString& key) {
     61   if (pCurPageDict->KeyExist(key))
     62     return true;
     63 
     64   CPDF_Object* pInheritable = PageDictGetInheritableTag(pSrcPageDict, key);
     65   if (!pInheritable)
     66     return false;
     67 
     68   pCurPageDict->SetFor(key, pInheritable->Clone());
     69   return true;
     70 }
     71 
     72 bool ParserPageRangeString(ByteString rangstring,
     73                            std::vector<uint16_t>* pageArray,
     74                            int nCount) {
     75   if (rangstring.IsEmpty())
     76     return true;
     77 
     78   rangstring.Remove(' ');
     79   size_t nLength = rangstring.GetLength();
     80   ByteString cbCompareString("0123456789-,");
     81   for (size_t i = 0; i < nLength; ++i) {
     82     if (!cbCompareString.Contains(rangstring[i]))
     83       return false;
     84   }
     85 
     86   ByteString cbMidRange;
     87   size_t nStringFrom = 0;
     88   Optional<size_t> nStringTo = 0;
     89   while (nStringTo < nLength) {
     90     nStringTo = rangstring.Find(',', nStringFrom);
     91     if (!nStringTo.has_value())
     92       nStringTo = nLength;
     93     cbMidRange = rangstring.Mid(nStringFrom, nStringTo.value() - nStringFrom);
     94     auto nMid = cbMidRange.Find('-');
     95     if (!nMid.has_value()) {
     96       uint16_t pageNum =
     97           pdfium::base::checked_cast<uint16_t>(atoi(cbMidRange.c_str()));
     98       if (pageNum <= 0 || pageNum > nCount)
     99         return false;
    100       pageArray->push_back(pageNum);
    101     } else {
    102       uint16_t nStartPageNum = pdfium::base::checked_cast<uint16_t>(
    103           atoi(cbMidRange.Left(nMid.value()).c_str()));
    104       if (nStartPageNum == 0)
    105         return false;
    106 
    107       nMid = nMid.value() + 1;
    108       size_t nEnd = cbMidRange.GetLength() - nMid.value();
    109       if (nEnd == 0)
    110         return false;
    111 
    112       uint16_t nEndPageNum = pdfium::base::checked_cast<uint16_t>(
    113           atoi(cbMidRange.Mid(nMid.value(), nEnd).c_str()));
    114       if (nStartPageNum < 0 || nStartPageNum > nEndPageNum ||
    115           nEndPageNum > nCount) {
    116         return false;
    117       }
    118       for (uint16_t i = nStartPageNum; i <= nEndPageNum; ++i) {
    119         pageArray->push_back(i);
    120       }
    121     }
    122     nStringFrom = nStringTo.value() + 1;
    123   }
    124   return true;
    125 }
    126 
    127 }  // namespace
    128 
    129 class CPDF_PageOrganizer {
    130  public:
    131   CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc, CPDF_Document* pSrcPDFDoc);
    132   ~CPDF_PageOrganizer();
    133 
    134   bool PDFDocInit();
    135   bool ExportPage(const std::vector<uint16_t>& pageNums, int nIndex);
    136 
    137  private:
    138   using ObjectNumberMap = std::map<uint32_t, uint32_t>;
    139 
    140   bool UpdateReference(CPDF_Object* pObj, ObjectNumberMap* pObjNumberMap);
    141   uint32_t GetNewObjId(ObjectNumberMap* pObjNumberMap, CPDF_Reference* pRef);
    142 
    143   UnownedPtr<CPDF_Document> m_pDestPDFDoc;
    144   UnownedPtr<CPDF_Document> m_pSrcPDFDoc;
    145 };
    146 
    147 CPDF_PageOrganizer::CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc,
    148                                        CPDF_Document* pSrcPDFDoc)
    149     : m_pDestPDFDoc(pDestPDFDoc), m_pSrcPDFDoc(pSrcPDFDoc) {}
    150 
    151 CPDF_PageOrganizer::~CPDF_PageOrganizer() {}
    152 
    153 bool CPDF_PageOrganizer::PDFDocInit() {
    154   ASSERT(m_pDestPDFDoc);
    155   ASSERT(m_pSrcPDFDoc);
    156 
    157   CPDF_Dictionary* pNewRoot = m_pDestPDFDoc->GetRoot();
    158   if (!pNewRoot)
    159     return false;
    160 
    161   CPDF_Dictionary* pDocInfoDict = m_pDestPDFDoc->GetInfo();
    162   if (!pDocInfoDict)
    163     return false;
    164 
    165   pDocInfoDict->SetNewFor<CPDF_String>("Producer", "PDFium", false);
    166 
    167   ByteString cbRootType = pNewRoot->GetStringFor("Type", "");
    168   if (cbRootType.IsEmpty())
    169     pNewRoot->SetNewFor<CPDF_Name>("Type", "Catalog");
    170 
    171   CPDF_Object* pElement = pNewRoot->GetObjectFor("Pages");
    172   CPDF_Dictionary* pNewPages =
    173       pElement ? ToDictionary(pElement->GetDirect()) : nullptr;
    174   if (!pNewPages) {
    175     pNewPages = m_pDestPDFDoc->NewIndirect<CPDF_Dictionary>();
    176     pNewRoot->SetNewFor<CPDF_Reference>("Pages", m_pDestPDFDoc.Get(),
    177                                         pNewPages->GetObjNum());
    178   }
    179 
    180   ByteString cbPageType = pNewPages->GetStringFor("Type", "");
    181   if (cbPageType.IsEmpty())
    182     pNewPages->SetNewFor<CPDF_Name>("Type", "Pages");
    183 
    184   if (!pNewPages->GetArrayFor("Kids")) {
    185     pNewPages->SetNewFor<CPDF_Number>("Count", 0);
    186     pNewPages->SetNewFor<CPDF_Reference>(
    187         "Kids", m_pDestPDFDoc.Get(),
    188         m_pDestPDFDoc->NewIndirect<CPDF_Array>()->GetObjNum());
    189   }
    190 
    191   return true;
    192 }
    193 
    194 bool CPDF_PageOrganizer::ExportPage(const std::vector<uint16_t>& pageNums,
    195                                     int nIndex) {
    196   int curpage = nIndex;
    197   auto pObjNumberMap = pdfium::MakeUnique<ObjectNumberMap>();
    198   for (size_t i = 0; i < pageNums.size(); ++i) {
    199     CPDF_Dictionary* pCurPageDict = m_pDestPDFDoc->CreateNewPage(curpage);
    200     CPDF_Dictionary* pSrcPageDict = m_pSrcPDFDoc->GetPage(pageNums[i] - 1);
    201     if (!pSrcPageDict || !pCurPageDict)
    202       return false;
    203 
    204     // Clone the page dictionary
    205     for (const auto& it : *pSrcPageDict) {
    206       const ByteString& cbSrcKeyStr = it.first;
    207       if (cbSrcKeyStr == "Type" || cbSrcKeyStr == "Parent")
    208         continue;
    209 
    210       CPDF_Object* pObj = it.second.get();
    211       pCurPageDict->SetFor(cbSrcKeyStr, pObj->Clone());
    212     }
    213 
    214     // inheritable item
    215     // Even though some entries are required by the PDF spec, there exist
    216     // PDFs that omit them. Set some defaults in this case.
    217     // 1 MediaBox - required
    218     if (!CopyInheritable(pCurPageDict, pSrcPageDict, "MediaBox")) {
    219       // Search for "CropBox" in the source page dictionary.
    220       // If it does not exist, use the default letter size.
    221       CPDF_Object* pInheritable =
    222           PageDictGetInheritableTag(pSrcPageDict, "CropBox");
    223       if (pInheritable) {
    224         pCurPageDict->SetFor("MediaBox", pInheritable->Clone());
    225       } else {
    226         // Make the default size letter size (8.5"x11")
    227         CPDF_Array* pArray = pCurPageDict->SetNewFor<CPDF_Array>("MediaBox");
    228         pArray->AddNew<CPDF_Number>(0);
    229         pArray->AddNew<CPDF_Number>(0);
    230         pArray->AddNew<CPDF_Number>(612);
    231         pArray->AddNew<CPDF_Number>(792);
    232       }
    233     }
    234 
    235     // 2 Resources - required
    236     if (!CopyInheritable(pCurPageDict, pSrcPageDict, "Resources")) {
    237       // Use a default empty resources if it does not exist.
    238       pCurPageDict->SetNewFor<CPDF_Dictionary>("Resources");
    239     }
    240 
    241     // 3 CropBox - optional
    242     CopyInheritable(pCurPageDict, pSrcPageDict, "CropBox");
    243     // 4 Rotate - optional
    244     CopyInheritable(pCurPageDict, pSrcPageDict, "Rotate");
    245 
    246     // Update the reference
    247     uint32_t dwOldPageObj = pSrcPageDict->GetObjNum();
    248     uint32_t dwNewPageObj = pCurPageDict->GetObjNum();
    249     (*pObjNumberMap)[dwOldPageObj] = dwNewPageObj;
    250     UpdateReference(pCurPageDict, pObjNumberMap.get());
    251     ++curpage;
    252   }
    253 
    254   return true;
    255 }
    256 
    257 bool CPDF_PageOrganizer::UpdateReference(CPDF_Object* pObj,
    258                                          ObjectNumberMap* pObjNumberMap) {
    259   switch (pObj->GetType()) {
    260     case CPDF_Object::REFERENCE: {
    261       CPDF_Reference* pReference = pObj->AsReference();
    262       uint32_t newobjnum = GetNewObjId(pObjNumberMap, pReference);
    263       if (newobjnum == 0)
    264         return false;
    265       pReference->SetRef(m_pDestPDFDoc.Get(), newobjnum);
    266       break;
    267     }
    268     case CPDF_Object::DICTIONARY: {
    269       CPDF_Dictionary* pDict = pObj->AsDictionary();
    270       auto it = pDict->begin();
    271       while (it != pDict->end()) {
    272         const ByteString& key = it->first;
    273         CPDF_Object* pNextObj = it->second.get();
    274         ++it;
    275         if (key == "Parent" || key == "Prev" || key == "First")
    276           continue;
    277         if (!pNextObj)
    278           return false;
    279         if (!UpdateReference(pNextObj, pObjNumberMap))
    280           pDict->RemoveFor(key);
    281       }
    282       break;
    283     }
    284     case CPDF_Object::ARRAY: {
    285       CPDF_Array* pArray = pObj->AsArray();
    286       for (size_t i = 0; i < pArray->GetCount(); ++i) {
    287         CPDF_Object* pNextObj = pArray->GetObjectAt(i);
    288         if (!pNextObj)
    289           return false;
    290         if (!UpdateReference(pNextObj, pObjNumberMap))
    291           return false;
    292       }
    293       break;
    294     }
    295     case CPDF_Object::STREAM: {
    296       CPDF_Stream* pStream = pObj->AsStream();
    297       CPDF_Dictionary* pDict = pStream->GetDict();
    298       if (!pDict)
    299         return false;
    300       if (!UpdateReference(pDict, pObjNumberMap))
    301         return false;
    302       break;
    303     }
    304     default:
    305       break;
    306   }
    307 
    308   return true;
    309 }
    310 
    311 uint32_t CPDF_PageOrganizer::GetNewObjId(ObjectNumberMap* pObjNumberMap,
    312                                          CPDF_Reference* pRef) {
    313   if (!pRef)
    314     return 0;
    315 
    316   uint32_t dwObjnum = pRef->GetRefObjNum();
    317   uint32_t dwNewObjNum = 0;
    318   const auto it = pObjNumberMap->find(dwObjnum);
    319   if (it != pObjNumberMap->end())
    320     dwNewObjNum = it->second;
    321   if (dwNewObjNum)
    322     return dwNewObjNum;
    323 
    324   CPDF_Object* pDirect = pRef->GetDirect();
    325   if (!pDirect)
    326     return 0;
    327 
    328   std::unique_ptr<CPDF_Object> pClone = pDirect->Clone();
    329   if (CPDF_Dictionary* pDictClone = pClone->AsDictionary()) {
    330     if (pDictClone->KeyExist("Type")) {
    331       ByteString strType = pDictClone->GetStringFor("Type");
    332       if (!FXSYS_stricmp(strType.c_str(), "Pages"))
    333         return 4;
    334       if (!FXSYS_stricmp(strType.c_str(), "Page"))
    335         return 0;
    336     }
    337   }
    338   CPDF_Object* pUnownedClone =
    339       m_pDestPDFDoc->AddIndirectObject(std::move(pClone));
    340   dwNewObjNum = pUnownedClone->GetObjNum();
    341   (*pObjNumberMap)[dwObjnum] = dwNewObjNum;
    342   if (!UpdateReference(pUnownedClone, pObjNumberMap))
    343     return 0;
    344 
    345   return dwNewObjNum;
    346 }
    347 
    348 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_ImportPages(FPDF_DOCUMENT dest_doc,
    349                                                      FPDF_DOCUMENT src_doc,
    350                                                      FPDF_BYTESTRING pagerange,
    351                                                      int index) {
    352   CPDF_Document* pDestDoc = CPDFDocumentFromFPDFDocument(dest_doc);
    353   if (!dest_doc)
    354     return false;
    355 
    356   CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc);
    357   if (!pSrcDoc)
    358     return false;
    359 
    360   std::vector<uint16_t> pageArray;
    361   int nCount = pSrcDoc->GetPageCount();
    362   if (pagerange) {
    363     if (!ParserPageRangeString(pagerange, &pageArray, nCount))
    364       return false;
    365   } else {
    366     for (int i = 1; i <= nCount; ++i) {
    367       pageArray.push_back(i);
    368     }
    369   }
    370 
    371   CPDF_PageOrganizer pageOrg(pDestDoc, pSrcDoc);
    372   return pageOrg.PDFDocInit() && pageOrg.ExportPage(pageArray, index);
    373 }
    374 
    375 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV
    376 FPDF_CopyViewerPreferences(FPDF_DOCUMENT dest_doc, FPDF_DOCUMENT src_doc) {
    377   CPDF_Document* pDstDoc = CPDFDocumentFromFPDFDocument(dest_doc);
    378   if (!pDstDoc)
    379     return false;
    380 
    381   CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc);
    382   if (!pSrcDoc)
    383     return false;
    384 
    385   CPDF_Dictionary* pSrcDict = pSrcDoc->GetRoot();
    386   pSrcDict = pSrcDict->GetDictFor("ViewerPreferences");
    387   if (!pSrcDict)
    388     return false;
    389 
    390   CPDF_Dictionary* pDstDict = pDstDoc->GetRoot();
    391   if (!pDstDict)
    392     return false;
    393 
    394   pDstDict->SetFor("ViewerPreferences", pSrcDict->CloneDirectObject());
    395   return true;
    396 }
    397