1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "public/fpdf_ppo.h" 8 9 #include <map> 10 #include <memory> 11 #include <utility> 12 #include <vector> 13 14 #include "core/fpdfapi/parser/cpdf_array.h" 15 #include "core/fpdfapi/parser/cpdf_document.h" 16 #include "core/fpdfapi/parser/cpdf_name.h" 17 #include "core/fpdfapi/parser/cpdf_number.h" 18 #include "core/fpdfapi/parser/cpdf_reference.h" 19 #include "core/fpdfapi/parser/cpdf_stream.h" 20 #include "core/fpdfapi/parser/cpdf_string.h" 21 #include "fpdfsdk/fsdk_define.h" 22 #include "third_party/base/ptr_util.h" 23 #include "third_party/base/stl_util.h" 24 25 namespace { 26 27 CPDF_Object* PageDictGetInheritableTag(CPDF_Dictionary* pDict, 28 const CFX_ByteString& bsSrcTag) { 29 if (!pDict || bsSrcTag.IsEmpty()) 30 return nullptr; 31 if (!pDict->KeyExist("Parent") || !pDict->KeyExist("Type")) 32 return nullptr; 33 34 CPDF_Object* pType = pDict->GetObjectFor("Type")->GetDirect(); 35 if (!ToName(pType)) 36 return nullptr; 37 if (pType->GetString().Compare("Page")) 38 return nullptr; 39 40 CPDF_Dictionary* pp = 41 ToDictionary(pDict->GetObjectFor("Parent")->GetDirect()); 42 if (!pp) 43 return nullptr; 44 45 if (pDict->KeyExist(bsSrcTag)) 46 return pDict->GetObjectFor(bsSrcTag); 47 48 while (pp) { 49 if (pp->KeyExist(bsSrcTag)) 50 return pp->GetObjectFor(bsSrcTag); 51 if (!pp->KeyExist("Parent")) 52 break; 53 pp = ToDictionary(pp->GetObjectFor("Parent")->GetDirect()); 54 } 55 return nullptr; 56 } 57 58 bool CopyInheritable(CPDF_Dictionary* pCurPageDict, 59 CPDF_Dictionary* pSrcPageDict, 60 const CFX_ByteString& key) { 61 if (pCurPageDict->KeyExist(key)) 62 return true; 63 64 CPDF_Object* pInheritable = PageDictGetInheritableTag(pSrcPageDict, key); 65 if (!pInheritable) 66 return false; 67 68 pCurPageDict->SetFor(key, pInheritable->Clone()); 69 return true; 70 } 71 72 bool ParserPageRangeString(CFX_ByteString rangstring, 73 std::vector<uint16_t>* pageArray, 74 int nCount) { 75 if (rangstring.IsEmpty()) 76 return true; 77 78 rangstring.Remove(' '); 79 int nLength = rangstring.GetLength(); 80 CFX_ByteString cbCompareString("0123456789-,"); 81 for (int i = 0; i < nLength; ++i) { 82 if (cbCompareString.Find(rangstring[i]) == -1) 83 return false; 84 } 85 86 CFX_ByteString cbMidRange; 87 int nStringFrom = 0; 88 int nStringTo = 0; 89 while (nStringTo < nLength) { 90 nStringTo = rangstring.Find(',', nStringFrom); 91 if (nStringTo == -1) 92 nStringTo = nLength; 93 cbMidRange = rangstring.Mid(nStringFrom, nStringTo - nStringFrom); 94 int nMid = cbMidRange.Find('-'); 95 if (nMid == -1) { 96 long lPageNum = atol(cbMidRange.c_str()); 97 if (lPageNum <= 0 || lPageNum > nCount) 98 return false; 99 pageArray->push_back((uint16_t)lPageNum); 100 } else { 101 int nStartPageNum = atol(cbMidRange.Mid(0, nMid).c_str()); 102 if (nStartPageNum == 0) 103 return false; 104 105 ++nMid; 106 int nEnd = cbMidRange.GetLength() - nMid; 107 if (nEnd == 0) 108 return false; 109 110 int nEndPageNum = atol(cbMidRange.Mid(nMid, nEnd).c_str()); 111 if (nStartPageNum < 0 || nStartPageNum > nEndPageNum || 112 nEndPageNum > nCount) { 113 return false; 114 } 115 for (int i = nStartPageNum; i <= nEndPageNum; ++i) { 116 pageArray->push_back(i); 117 } 118 } 119 nStringFrom = nStringTo + 1; 120 } 121 return true; 122 } 123 124 } // namespace 125 126 class CPDF_PageOrganizer { 127 public: 128 CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc, CPDF_Document* pSrcPDFDoc); 129 ~CPDF_PageOrganizer(); 130 131 bool PDFDocInit(); 132 bool ExportPage(const std::vector<uint16_t>& pageNums, int nIndex); 133 134 private: 135 using ObjectNumberMap = std::map<uint32_t, uint32_t>; 136 137 bool UpdateReference(CPDF_Object* pObj, ObjectNumberMap* pObjNumberMap); 138 uint32_t GetNewObjId(ObjectNumberMap* pObjNumberMap, CPDF_Reference* pRef); 139 140 CPDF_Document* m_pDestPDFDoc; 141 CPDF_Document* m_pSrcPDFDoc; 142 }; 143 144 CPDF_PageOrganizer::CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc, 145 CPDF_Document* pSrcPDFDoc) 146 : m_pDestPDFDoc(pDestPDFDoc), m_pSrcPDFDoc(pSrcPDFDoc) {} 147 148 CPDF_PageOrganizer::~CPDF_PageOrganizer() {} 149 150 bool CPDF_PageOrganizer::PDFDocInit() { 151 ASSERT(m_pDestPDFDoc); 152 ASSERT(m_pSrcPDFDoc); 153 154 CPDF_Dictionary* pNewRoot = m_pDestPDFDoc->GetRoot(); 155 if (!pNewRoot) 156 return false; 157 158 CPDF_Dictionary* pDocInfoDict = m_pDestPDFDoc->GetInfo(); 159 if (!pDocInfoDict) 160 return false; 161 162 pDocInfoDict->SetNewFor<CPDF_String>("Producer", "PDFium", false); 163 164 CFX_ByteString cbRootType = pNewRoot->GetStringFor("Type", ""); 165 if (cbRootType.IsEmpty()) 166 pNewRoot->SetNewFor<CPDF_Name>("Type", "Catalog"); 167 168 CPDF_Object* pElement = pNewRoot->GetObjectFor("Pages"); 169 CPDF_Dictionary* pNewPages = 170 pElement ? ToDictionary(pElement->GetDirect()) : nullptr; 171 if (!pNewPages) { 172 pNewPages = m_pDestPDFDoc->NewIndirect<CPDF_Dictionary>(); 173 pNewRoot->SetNewFor<CPDF_Reference>("Pages", m_pDestPDFDoc, 174 pNewPages->GetObjNum()); 175 } 176 177 CFX_ByteString cbPageType = pNewPages->GetStringFor("Type", ""); 178 if (cbPageType.IsEmpty()) 179 pNewPages->SetNewFor<CPDF_Name>("Type", "Pages"); 180 181 if (!pNewPages->GetArrayFor("Kids")) { 182 pNewPages->SetNewFor<CPDF_Number>("Count", 0); 183 pNewPages->SetNewFor<CPDF_Reference>( 184 "Kids", m_pDestPDFDoc, 185 m_pDestPDFDoc->NewIndirect<CPDF_Array>()->GetObjNum()); 186 } 187 188 return true; 189 } 190 191 bool CPDF_PageOrganizer::ExportPage(const std::vector<uint16_t>& pageNums, 192 int nIndex) { 193 int curpage = nIndex; 194 auto pObjNumberMap = pdfium::MakeUnique<ObjectNumberMap>(); 195 int nSize = pdfium::CollectionSize<int>(pageNums); 196 for (int i = 0; i < nSize; ++i) { 197 CPDF_Dictionary* pCurPageDict = m_pDestPDFDoc->CreateNewPage(curpage); 198 CPDF_Dictionary* pSrcPageDict = m_pSrcPDFDoc->GetPage(pageNums[i] - 1); 199 if (!pSrcPageDict || !pCurPageDict) 200 return false; 201 202 // Clone the page dictionary 203 for (const auto& it : *pSrcPageDict) { 204 const CFX_ByteString& cbSrcKeyStr = it.first; 205 if (cbSrcKeyStr == "Type" || cbSrcKeyStr == "Parent") 206 continue; 207 208 CPDF_Object* pObj = it.second.get(); 209 pCurPageDict->SetFor(cbSrcKeyStr, pObj->Clone()); 210 } 211 212 // inheritable item 213 // 1 MediaBox - required 214 if (!CopyInheritable(pCurPageDict, pSrcPageDict, "MediaBox")) { 215 // Search for "CropBox" in the source page dictionary, 216 // if it does not exists, use the default letter size. 217 CPDF_Object* pInheritable = 218 PageDictGetInheritableTag(pSrcPageDict, "CropBox"); 219 if (pInheritable) { 220 pCurPageDict->SetFor("MediaBox", pInheritable->Clone()); 221 } else { 222 // Make the default size to be letter size (8.5'x11') 223 CPDF_Array* pArray = pCurPageDict->SetNewFor<CPDF_Array>("MediaBox"); 224 pArray->AddNew<CPDF_Number>(0); 225 pArray->AddNew<CPDF_Number>(0); 226 pArray->AddNew<CPDF_Number>(612); 227 pArray->AddNew<CPDF_Number>(792); 228 } 229 } 230 231 // 2 Resources - required 232 if (!CopyInheritable(pCurPageDict, pSrcPageDict, "Resources")) 233 return false; 234 235 // 3 CropBox - optional 236 CopyInheritable(pCurPageDict, pSrcPageDict, "CropBox"); 237 // 4 Rotate - optional 238 CopyInheritable(pCurPageDict, pSrcPageDict, "Rotate"); 239 240 // Update the reference 241 uint32_t dwOldPageObj = pSrcPageDict->GetObjNum(); 242 uint32_t dwNewPageObj = pCurPageDict->GetObjNum(); 243 (*pObjNumberMap)[dwOldPageObj] = dwNewPageObj; 244 UpdateReference(pCurPageDict, pObjNumberMap.get()); 245 ++curpage; 246 } 247 248 return true; 249 } 250 251 bool CPDF_PageOrganizer::UpdateReference(CPDF_Object* pObj, 252 ObjectNumberMap* pObjNumberMap) { 253 switch (pObj->GetType()) { 254 case CPDF_Object::REFERENCE: { 255 CPDF_Reference* pReference = pObj->AsReference(); 256 uint32_t newobjnum = GetNewObjId(pObjNumberMap, pReference); 257 if (newobjnum == 0) 258 return false; 259 pReference->SetRef(m_pDestPDFDoc, newobjnum); 260 break; 261 } 262 case CPDF_Object::DICTIONARY: { 263 CPDF_Dictionary* pDict = pObj->AsDictionary(); 264 auto it = pDict->begin(); 265 while (it != pDict->end()) { 266 const CFX_ByteString& key = it->first; 267 CPDF_Object* pNextObj = it->second.get(); 268 ++it; 269 if (key == "Parent" || key == "Prev" || key == "First") 270 continue; 271 if (!pNextObj) 272 return false; 273 if (!UpdateReference(pNextObj, pObjNumberMap)) 274 pDict->RemoveFor(key); 275 } 276 break; 277 } 278 case CPDF_Object::ARRAY: { 279 CPDF_Array* pArray = pObj->AsArray(); 280 for (size_t i = 0; i < pArray->GetCount(); ++i) { 281 CPDF_Object* pNextObj = pArray->GetObjectAt(i); 282 if (!pNextObj) 283 return false; 284 if (!UpdateReference(pNextObj, pObjNumberMap)) 285 return false; 286 } 287 break; 288 } 289 case CPDF_Object::STREAM: { 290 CPDF_Stream* pStream = pObj->AsStream(); 291 CPDF_Dictionary* pDict = pStream->GetDict(); 292 if (!pDict) 293 return false; 294 if (!UpdateReference(pDict, pObjNumberMap)) 295 return false; 296 break; 297 } 298 default: 299 break; 300 } 301 302 return true; 303 } 304 305 uint32_t CPDF_PageOrganizer::GetNewObjId(ObjectNumberMap* pObjNumberMap, 306 CPDF_Reference* pRef) { 307 if (!pRef) 308 return 0; 309 310 uint32_t dwObjnum = pRef->GetRefObjNum(); 311 uint32_t dwNewObjNum = 0; 312 const auto it = pObjNumberMap->find(dwObjnum); 313 if (it != pObjNumberMap->end()) 314 dwNewObjNum = it->second; 315 if (dwNewObjNum) 316 return dwNewObjNum; 317 318 CPDF_Object* pDirect = pRef->GetDirect(); 319 if (!pDirect) 320 return 0; 321 322 std::unique_ptr<CPDF_Object> pClone = pDirect->Clone(); 323 if (CPDF_Dictionary* pDictClone = pClone->AsDictionary()) { 324 if (pDictClone->KeyExist("Type")) { 325 CFX_ByteString strType = pDictClone->GetStringFor("Type"); 326 if (!FXSYS_stricmp(strType.c_str(), "Pages")) 327 return 4; 328 if (!FXSYS_stricmp(strType.c_str(), "Page")) 329 return 0; 330 } 331 } 332 CPDF_Object* pUnownedClone = 333 m_pDestPDFDoc->AddIndirectObject(std::move(pClone)); 334 dwNewObjNum = pUnownedClone->GetObjNum(); 335 (*pObjNumberMap)[dwObjnum] = dwNewObjNum; 336 if (!UpdateReference(pUnownedClone, pObjNumberMap)) 337 return 0; 338 339 return dwNewObjNum; 340 } 341 342 DLLEXPORT FPDF_BOOL STDCALL FPDF_ImportPages(FPDF_DOCUMENT dest_doc, 343 FPDF_DOCUMENT src_doc, 344 FPDF_BYTESTRING pagerange, 345 int index) { 346 CPDF_Document* pDestDoc = CPDFDocumentFromFPDFDocument(dest_doc); 347 if (!dest_doc) 348 return false; 349 350 CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc); 351 if (!pSrcDoc) 352 return false; 353 354 std::vector<uint16_t> pageArray; 355 int nCount = pSrcDoc->GetPageCount(); 356 if (pagerange) { 357 if (!ParserPageRangeString(pagerange, &pageArray, nCount)) 358 return false; 359 } else { 360 for (int i = 1; i <= nCount; ++i) { 361 pageArray.push_back(i); 362 } 363 } 364 365 CPDF_PageOrganizer pageOrg(pDestDoc, pSrcDoc); 366 return pageOrg.PDFDocInit() && pageOrg.ExportPage(pageArray, index); 367 } 368 369 DLLEXPORT FPDF_BOOL STDCALL FPDF_CopyViewerPreferences(FPDF_DOCUMENT dest_doc, 370 FPDF_DOCUMENT src_doc) { 371 CPDF_Document* pDstDoc = CPDFDocumentFromFPDFDocument(dest_doc); 372 if (!pDstDoc) 373 return false; 374 375 CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc); 376 if (!pSrcDoc) 377 return false; 378 379 CPDF_Dictionary* pSrcDict = pSrcDoc->GetRoot(); 380 pSrcDict = pSrcDict->GetDictFor("ViewerPreferences"); 381 if (!pSrcDict) 382 return false; 383 384 CPDF_Dictionary* pDstDict = pDstDoc->GetRoot(); 385 if (!pDstDict) 386 return false; 387 388 pDstDict->SetFor("ViewerPreferences", pSrcDict->CloneDirectObject()); 389 return true; 390 } 391