1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "public/fpdf_ppo.h" 8 9 #include <map> 10 #include <memory> 11 #include <utility> 12 #include <vector> 13 14 #include "core/fpdfapi/parser/cpdf_array.h" 15 #include "core/fpdfapi/parser/cpdf_document.h" 16 #include "core/fpdfapi/parser/cpdf_name.h" 17 #include "core/fpdfapi/parser/cpdf_number.h" 18 #include "core/fpdfapi/parser/cpdf_reference.h" 19 #include "core/fpdfapi/parser/cpdf_stream.h" 20 #include "core/fpdfapi/parser/cpdf_string.h" 21 #include "core/fxcrt/unowned_ptr.h" 22 #include "fpdfsdk/fsdk_define.h" 23 #include "third_party/base/ptr_util.h" 24 25 namespace { 26 27 CPDF_Object* PageDictGetInheritableTag(CPDF_Dictionary* pDict, 28 const ByteString& bsSrcTag) { 29 if (!pDict || bsSrcTag.IsEmpty()) 30 return nullptr; 31 if (!pDict->KeyExist("Parent") || !pDict->KeyExist("Type")) 32 return nullptr; 33 34 CPDF_Object* pType = pDict->GetObjectFor("Type")->GetDirect(); 35 if (!ToName(pType)) 36 return nullptr; 37 if (pType->GetString().Compare("Page")) 38 return nullptr; 39 40 CPDF_Dictionary* pp = 41 ToDictionary(pDict->GetObjectFor("Parent")->GetDirect()); 42 if (!pp) 43 return nullptr; 44 45 if (pDict->KeyExist(bsSrcTag)) 46 return pDict->GetObjectFor(bsSrcTag); 47 48 while (pp) { 49 if (pp->KeyExist(bsSrcTag)) 50 return pp->GetObjectFor(bsSrcTag); 51 if (!pp->KeyExist("Parent")) 52 break; 53 pp = ToDictionary(pp->GetObjectFor("Parent")->GetDirect()); 54 } 55 return nullptr; 56 } 57 58 bool CopyInheritable(CPDF_Dictionary* pCurPageDict, 59 CPDF_Dictionary* pSrcPageDict, 60 const ByteString& key) { 61 if (pCurPageDict->KeyExist(key)) 62 return true; 63 64 CPDF_Object* pInheritable = PageDictGetInheritableTag(pSrcPageDict, key); 65 if (!pInheritable) 66 return false; 67 68 pCurPageDict->SetFor(key, pInheritable->Clone()); 69 return true; 70 } 71 72 bool ParserPageRangeString(ByteString rangstring, 73 std::vector<uint16_t>* pageArray, 74 int nCount) { 75 if (rangstring.IsEmpty()) 76 return true; 77 78 rangstring.Remove(' '); 79 size_t nLength = rangstring.GetLength(); 80 ByteString cbCompareString("0123456789-,"); 81 for (size_t i = 0; i < nLength; ++i) { 82 if (!cbCompareString.Contains(rangstring[i])) 83 return false; 84 } 85 86 ByteString cbMidRange; 87 size_t nStringFrom = 0; 88 Optional<size_t> nStringTo = 0; 89 while (nStringTo < nLength) { 90 nStringTo = rangstring.Find(',', nStringFrom); 91 if (!nStringTo.has_value()) 92 nStringTo = nLength; 93 cbMidRange = rangstring.Mid(nStringFrom, nStringTo.value() - nStringFrom); 94 auto nMid = cbMidRange.Find('-'); 95 if (!nMid.has_value()) { 96 uint16_t pageNum = 97 pdfium::base::checked_cast<uint16_t>(atoi(cbMidRange.c_str())); 98 if (pageNum <= 0 || pageNum > nCount) 99 return false; 100 pageArray->push_back(pageNum); 101 } else { 102 uint16_t nStartPageNum = pdfium::base::checked_cast<uint16_t>( 103 atoi(cbMidRange.Left(nMid.value()).c_str())); 104 if (nStartPageNum == 0) 105 return false; 106 107 nMid = nMid.value() + 1; 108 size_t nEnd = cbMidRange.GetLength() - nMid.value(); 109 if (nEnd == 0) 110 return false; 111 112 uint16_t nEndPageNum = pdfium::base::checked_cast<uint16_t>( 113 atoi(cbMidRange.Mid(nMid.value(), nEnd).c_str())); 114 if (nStartPageNum < 0 || nStartPageNum > nEndPageNum || 115 nEndPageNum > nCount) { 116 return false; 117 } 118 for (uint16_t i = nStartPageNum; i <= nEndPageNum; ++i) { 119 pageArray->push_back(i); 120 } 121 } 122 nStringFrom = nStringTo.value() + 1; 123 } 124 return true; 125 } 126 127 } // namespace 128 129 class CPDF_PageOrganizer { 130 public: 131 CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc, CPDF_Document* pSrcPDFDoc); 132 ~CPDF_PageOrganizer(); 133 134 bool PDFDocInit(); 135 bool ExportPage(const std::vector<uint16_t>& pageNums, int nIndex); 136 137 private: 138 using ObjectNumberMap = std::map<uint32_t, uint32_t>; 139 140 bool UpdateReference(CPDF_Object* pObj, ObjectNumberMap* pObjNumberMap); 141 uint32_t GetNewObjId(ObjectNumberMap* pObjNumberMap, CPDF_Reference* pRef); 142 143 UnownedPtr<CPDF_Document> m_pDestPDFDoc; 144 UnownedPtr<CPDF_Document> m_pSrcPDFDoc; 145 }; 146 147 CPDF_PageOrganizer::CPDF_PageOrganizer(CPDF_Document* pDestPDFDoc, 148 CPDF_Document* pSrcPDFDoc) 149 : m_pDestPDFDoc(pDestPDFDoc), m_pSrcPDFDoc(pSrcPDFDoc) {} 150 151 CPDF_PageOrganizer::~CPDF_PageOrganizer() {} 152 153 bool CPDF_PageOrganizer::PDFDocInit() { 154 ASSERT(m_pDestPDFDoc); 155 ASSERT(m_pSrcPDFDoc); 156 157 CPDF_Dictionary* pNewRoot = m_pDestPDFDoc->GetRoot(); 158 if (!pNewRoot) 159 return false; 160 161 CPDF_Dictionary* pDocInfoDict = m_pDestPDFDoc->GetInfo(); 162 if (!pDocInfoDict) 163 return false; 164 165 pDocInfoDict->SetNewFor<CPDF_String>("Producer", "PDFium", false); 166 167 ByteString cbRootType = pNewRoot->GetStringFor("Type", ""); 168 if (cbRootType.IsEmpty()) 169 pNewRoot->SetNewFor<CPDF_Name>("Type", "Catalog"); 170 171 CPDF_Object* pElement = pNewRoot->GetObjectFor("Pages"); 172 CPDF_Dictionary* pNewPages = 173 pElement ? ToDictionary(pElement->GetDirect()) : nullptr; 174 if (!pNewPages) { 175 pNewPages = m_pDestPDFDoc->NewIndirect<CPDF_Dictionary>(); 176 pNewRoot->SetNewFor<CPDF_Reference>("Pages", m_pDestPDFDoc.Get(), 177 pNewPages->GetObjNum()); 178 } 179 180 ByteString cbPageType = pNewPages->GetStringFor("Type", ""); 181 if (cbPageType.IsEmpty()) 182 pNewPages->SetNewFor<CPDF_Name>("Type", "Pages"); 183 184 if (!pNewPages->GetArrayFor("Kids")) { 185 pNewPages->SetNewFor<CPDF_Number>("Count", 0); 186 pNewPages->SetNewFor<CPDF_Reference>( 187 "Kids", m_pDestPDFDoc.Get(), 188 m_pDestPDFDoc->NewIndirect<CPDF_Array>()->GetObjNum()); 189 } 190 191 return true; 192 } 193 194 bool CPDF_PageOrganizer::ExportPage(const std::vector<uint16_t>& pageNums, 195 int nIndex) { 196 int curpage = nIndex; 197 auto pObjNumberMap = pdfium::MakeUnique<ObjectNumberMap>(); 198 for (size_t i = 0; i < pageNums.size(); ++i) { 199 CPDF_Dictionary* pCurPageDict = m_pDestPDFDoc->CreateNewPage(curpage); 200 CPDF_Dictionary* pSrcPageDict = m_pSrcPDFDoc->GetPage(pageNums[i] - 1); 201 if (!pSrcPageDict || !pCurPageDict) 202 return false; 203 204 // Clone the page dictionary 205 for (const auto& it : *pSrcPageDict) { 206 const ByteString& cbSrcKeyStr = it.first; 207 if (cbSrcKeyStr == "Type" || cbSrcKeyStr == "Parent") 208 continue; 209 210 CPDF_Object* pObj = it.second.get(); 211 pCurPageDict->SetFor(cbSrcKeyStr, pObj->Clone()); 212 } 213 214 // inheritable item 215 // Even though some entries are required by the PDF spec, there exist 216 // PDFs that omit them. Set some defaults in this case. 217 // 1 MediaBox - required 218 if (!CopyInheritable(pCurPageDict, pSrcPageDict, "MediaBox")) { 219 // Search for "CropBox" in the source page dictionary. 220 // If it does not exist, use the default letter size. 221 CPDF_Object* pInheritable = 222 PageDictGetInheritableTag(pSrcPageDict, "CropBox"); 223 if (pInheritable) { 224 pCurPageDict->SetFor("MediaBox", pInheritable->Clone()); 225 } else { 226 // Make the default size letter size (8.5"x11") 227 CPDF_Array* pArray = pCurPageDict->SetNewFor<CPDF_Array>("MediaBox"); 228 pArray->AddNew<CPDF_Number>(0); 229 pArray->AddNew<CPDF_Number>(0); 230 pArray->AddNew<CPDF_Number>(612); 231 pArray->AddNew<CPDF_Number>(792); 232 } 233 } 234 235 // 2 Resources - required 236 if (!CopyInheritable(pCurPageDict, pSrcPageDict, "Resources")) { 237 // Use a default empty resources if it does not exist. 238 pCurPageDict->SetNewFor<CPDF_Dictionary>("Resources"); 239 } 240 241 // 3 CropBox - optional 242 CopyInheritable(pCurPageDict, pSrcPageDict, "CropBox"); 243 // 4 Rotate - optional 244 CopyInheritable(pCurPageDict, pSrcPageDict, "Rotate"); 245 246 // Update the reference 247 uint32_t dwOldPageObj = pSrcPageDict->GetObjNum(); 248 uint32_t dwNewPageObj = pCurPageDict->GetObjNum(); 249 (*pObjNumberMap)[dwOldPageObj] = dwNewPageObj; 250 UpdateReference(pCurPageDict, pObjNumberMap.get()); 251 ++curpage; 252 } 253 254 return true; 255 } 256 257 bool CPDF_PageOrganizer::UpdateReference(CPDF_Object* pObj, 258 ObjectNumberMap* pObjNumberMap) { 259 switch (pObj->GetType()) { 260 case CPDF_Object::REFERENCE: { 261 CPDF_Reference* pReference = pObj->AsReference(); 262 uint32_t newobjnum = GetNewObjId(pObjNumberMap, pReference); 263 if (newobjnum == 0) 264 return false; 265 pReference->SetRef(m_pDestPDFDoc.Get(), newobjnum); 266 break; 267 } 268 case CPDF_Object::DICTIONARY: { 269 CPDF_Dictionary* pDict = pObj->AsDictionary(); 270 auto it = pDict->begin(); 271 while (it != pDict->end()) { 272 const ByteString& key = it->first; 273 CPDF_Object* pNextObj = it->second.get(); 274 ++it; 275 if (key == "Parent" || key == "Prev" || key == "First") 276 continue; 277 if (!pNextObj) 278 return false; 279 if (!UpdateReference(pNextObj, pObjNumberMap)) 280 pDict->RemoveFor(key); 281 } 282 break; 283 } 284 case CPDF_Object::ARRAY: { 285 CPDF_Array* pArray = pObj->AsArray(); 286 for (size_t i = 0; i < pArray->GetCount(); ++i) { 287 CPDF_Object* pNextObj = pArray->GetObjectAt(i); 288 if (!pNextObj) 289 return false; 290 if (!UpdateReference(pNextObj, pObjNumberMap)) 291 return false; 292 } 293 break; 294 } 295 case CPDF_Object::STREAM: { 296 CPDF_Stream* pStream = pObj->AsStream(); 297 CPDF_Dictionary* pDict = pStream->GetDict(); 298 if (!pDict) 299 return false; 300 if (!UpdateReference(pDict, pObjNumberMap)) 301 return false; 302 break; 303 } 304 default: 305 break; 306 } 307 308 return true; 309 } 310 311 uint32_t CPDF_PageOrganizer::GetNewObjId(ObjectNumberMap* pObjNumberMap, 312 CPDF_Reference* pRef) { 313 if (!pRef) 314 return 0; 315 316 uint32_t dwObjnum = pRef->GetRefObjNum(); 317 uint32_t dwNewObjNum = 0; 318 const auto it = pObjNumberMap->find(dwObjnum); 319 if (it != pObjNumberMap->end()) 320 dwNewObjNum = it->second; 321 if (dwNewObjNum) 322 return dwNewObjNum; 323 324 CPDF_Object* pDirect = pRef->GetDirect(); 325 if (!pDirect) 326 return 0; 327 328 std::unique_ptr<CPDF_Object> pClone = pDirect->Clone(); 329 if (CPDF_Dictionary* pDictClone = pClone->AsDictionary()) { 330 if (pDictClone->KeyExist("Type")) { 331 ByteString strType = pDictClone->GetStringFor("Type"); 332 if (!FXSYS_stricmp(strType.c_str(), "Pages")) 333 return 4; 334 if (!FXSYS_stricmp(strType.c_str(), "Page")) 335 return 0; 336 } 337 } 338 CPDF_Object* pUnownedClone = 339 m_pDestPDFDoc->AddIndirectObject(std::move(pClone)); 340 dwNewObjNum = pUnownedClone->GetObjNum(); 341 (*pObjNumberMap)[dwObjnum] = dwNewObjNum; 342 if (!UpdateReference(pUnownedClone, pObjNumberMap)) 343 return 0; 344 345 return dwNewObjNum; 346 } 347 348 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV FPDF_ImportPages(FPDF_DOCUMENT dest_doc, 349 FPDF_DOCUMENT src_doc, 350 FPDF_BYTESTRING pagerange, 351 int index) { 352 CPDF_Document* pDestDoc = CPDFDocumentFromFPDFDocument(dest_doc); 353 if (!dest_doc) 354 return false; 355 356 CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc); 357 if (!pSrcDoc) 358 return false; 359 360 std::vector<uint16_t> pageArray; 361 int nCount = pSrcDoc->GetPageCount(); 362 if (pagerange) { 363 if (!ParserPageRangeString(pagerange, &pageArray, nCount)) 364 return false; 365 } else { 366 for (int i = 1; i <= nCount; ++i) { 367 pageArray.push_back(i); 368 } 369 } 370 371 CPDF_PageOrganizer pageOrg(pDestDoc, pSrcDoc); 372 return pageOrg.PDFDocInit() && pageOrg.ExportPage(pageArray, index); 373 } 374 375 FPDF_EXPORT FPDF_BOOL FPDF_CALLCONV 376 FPDF_CopyViewerPreferences(FPDF_DOCUMENT dest_doc, FPDF_DOCUMENT src_doc) { 377 CPDF_Document* pDstDoc = CPDFDocumentFromFPDFDocument(dest_doc); 378 if (!pDstDoc) 379 return false; 380 381 CPDF_Document* pSrcDoc = CPDFDocumentFromFPDFDocument(src_doc); 382 if (!pSrcDoc) 383 return false; 384 385 CPDF_Dictionary* pSrcDict = pSrcDoc->GetRoot(); 386 pSrcDict = pSrcDict->GetDictFor("ViewerPreferences"); 387 if (!pSrcDict) 388 return false; 389 390 CPDF_Dictionary* pDstDict = pDstDoc->GetRoot(); 391 if (!pDstDict) 392 return false; 393 394 pDstDict->SetFor("ViewerPreferences", pSrcDict->CloneDirectObject()); 395 return true; 396 } 397