1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "public/fpdf_flatten.h" 8 9 #include <algorithm> 10 #include <memory> 11 #include <utility> 12 #include <vector> 13 14 #include "core/fpdfapi/page/cpdf_page.h" 15 #include "core/fpdfapi/page/cpdf_pageobject.h" 16 #include "core/fpdfapi/parser/cpdf_array.h" 17 #include "core/fpdfapi/parser/cpdf_document.h" 18 #include "core/fpdfapi/parser/cpdf_name.h" 19 #include "core/fpdfapi/parser/cpdf_number.h" 20 #include "core/fpdfapi/parser/cpdf_reference.h" 21 #include "core/fpdfapi/parser/cpdf_stream.h" 22 #include "core/fpdfapi/parser/cpdf_stream_acc.h" 23 #include "core/fpdfdoc/cpdf_annot.h" 24 #include "fpdfsdk/fsdk_define.h" 25 #include "third_party/base/stl_util.h" 26 27 enum FPDF_TYPE { MAX, MIN }; 28 enum FPDF_VALUE { TOP, LEFT, RIGHT, BOTTOM }; 29 30 namespace { 31 32 bool IsValidRect(const CFX_FloatRect& rect, const CFX_FloatRect& rcPage) { 33 constexpr float kMinSize = 0.000001f; 34 if (rect.IsEmpty() || rect.Width() < kMinSize || rect.Height() < kMinSize) 35 return false; 36 37 if (rcPage.IsEmpty()) 38 return true; 39 40 constexpr float kMinBorderSize = 10.000001f; 41 return rect.left - rcPage.left >= -kMinBorderSize && 42 rect.right - rcPage.right <= kMinBorderSize && 43 rect.top - rcPage.top <= kMinBorderSize && 44 rect.bottom - rcPage.bottom >= -kMinBorderSize; 45 } 46 47 void GetContentsRect(CPDF_Document* pDoc, 48 CPDF_Dictionary* pDict, 49 std::vector<CFX_FloatRect>* pRectArray) { 50 auto pPDFPage = pdfium::MakeUnique<CPDF_Page>(pDoc, pDict, false); 51 pPDFPage->ParseContent(); 52 53 for (const auto& pPageObject : *pPDFPage->GetPageObjectList()) { 54 CFX_FloatRect rc; 55 rc.left = pPageObject->m_Left; 56 rc.right = pPageObject->m_Right; 57 rc.bottom = pPageObject->m_Bottom; 58 rc.top = pPageObject->m_Top; 59 if (IsValidRect(rc, pDict->GetRectFor("MediaBox"))) 60 pRectArray->push_back(rc); 61 } 62 } 63 64 void ParserStream(CPDF_Dictionary* pPageDic, 65 CPDF_Dictionary* pStream, 66 std::vector<CFX_FloatRect>* pRectArray, 67 std::vector<CPDF_Dictionary*>* pObjectArray) { 68 if (!pStream) 69 return; 70 CFX_FloatRect rect; 71 if (pStream->KeyExist("Rect")) 72 rect = pStream->GetRectFor("Rect"); 73 else if (pStream->KeyExist("BBox")) 74 rect = pStream->GetRectFor("BBox"); 75 76 if (IsValidRect(rect, pPageDic->GetRectFor("MediaBox"))) 77 pRectArray->push_back(rect); 78 79 pObjectArray->push_back(pStream); 80 } 81 82 int ParserAnnots(CPDF_Document* pSourceDoc, 83 CPDF_Dictionary* pPageDic, 84 std::vector<CFX_FloatRect>* pRectArray, 85 std::vector<CPDF_Dictionary*>* pObjectArray, 86 int nUsage) { 87 if (!pSourceDoc || !pPageDic) 88 return FLATTEN_FAIL; 89 90 GetContentsRect(pSourceDoc, pPageDic, pRectArray); 91 CPDF_Array* pAnnots = pPageDic->GetArrayFor("Annots"); 92 if (!pAnnots) 93 return FLATTEN_NOTHINGTODO; 94 95 for (const auto& pAnnot : *pAnnots) { 96 CPDF_Dictionary* pAnnotDic = ToDictionary(pAnnot->GetDirect()); 97 if (!pAnnotDic) 98 continue; 99 100 ByteString sSubtype = pAnnotDic->GetStringFor("Subtype"); 101 if (sSubtype == "Popup") 102 continue; 103 104 int nAnnotFlag = pAnnotDic->GetIntegerFor("F"); 105 if (nAnnotFlag & ANNOTFLAG_HIDDEN) 106 continue; 107 108 bool bParseStream; 109 if (nUsage == FLAT_NORMALDISPLAY) 110 bParseStream = !(nAnnotFlag & ANNOTFLAG_INVISIBLE); 111 else 112 bParseStream = !!(nAnnotFlag & ANNOTFLAG_PRINT); 113 if (bParseStream) 114 ParserStream(pPageDic, pAnnotDic, pRectArray, pObjectArray); 115 } 116 return FLATTEN_SUCCESS; 117 } 118 119 float GetMinMaxValue(const std::vector<CFX_FloatRect>& array, 120 FPDF_TYPE type, 121 FPDF_VALUE value) { 122 if (array.empty()) 123 return 0.0f; 124 125 size_t nRects = array.size(); 126 std::vector<float> pArray(nRects); 127 switch (value) { 128 case LEFT: 129 for (size_t i = 0; i < nRects; i++) 130 pArray[i] = array[i].left; 131 break; 132 case TOP: 133 for (size_t i = 0; i < nRects; i++) 134 pArray[i] = array[i].top; 135 break; 136 case RIGHT: 137 for (size_t i = 0; i < nRects; i++) 138 pArray[i] = array[i].right; 139 break; 140 case BOTTOM: 141 for (size_t i = 0; i < nRects; i++) 142 pArray[i] = array[i].bottom; 143 break; 144 default: 145 NOTREACHED(); 146 return 0.0f; 147 } 148 149 float fRet = pArray[0]; 150 if (type == MAX) { 151 for (size_t i = 1; i < nRects; i++) 152 fRet = std::max(fRet, pArray[i]); 153 } else { 154 for (size_t i = 1; i < nRects; i++) 155 fRet = std::min(fRet, pArray[i]); 156 } 157 return fRet; 158 } 159 160 CFX_FloatRect CalculateRect(std::vector<CFX_FloatRect>* pRectArray) { 161 CFX_FloatRect rcRet; 162 163 rcRet.left = GetMinMaxValue(*pRectArray, MIN, LEFT); 164 rcRet.top = GetMinMaxValue(*pRectArray, MAX, TOP); 165 rcRet.right = GetMinMaxValue(*pRectArray, MAX, RIGHT); 166 rcRet.bottom = GetMinMaxValue(*pRectArray, MIN, BOTTOM); 167 168 return rcRet; 169 } 170 171 uint32_t NewIndirectContentsStream(const ByteString& key, 172 CPDF_Document* pDocument) { 173 CPDF_Stream* pNewContents = pDocument->NewIndirect<CPDF_Stream>( 174 nullptr, 0, 175 pdfium::MakeUnique<CPDF_Dictionary>(pDocument->GetByteStringPool())); 176 ByteString sStream = 177 ByteString::Format("q 1 0 0 1 0 0 cm /%s Do Q", key.c_str()); 178 pNewContents->SetData(sStream.raw_str(), sStream.GetLength()); 179 return pNewContents->GetObjNum(); 180 } 181 182 void SetPageContents(const ByteString& key, 183 CPDF_Dictionary* pPage, 184 CPDF_Document* pDocument) { 185 CPDF_Array* pContentsArray = nullptr; 186 CPDF_Stream* pContentsStream = pPage->GetStreamFor("Contents"); 187 if (!pContentsStream) { 188 pContentsArray = pPage->GetArrayFor("Contents"); 189 if (!pContentsArray) { 190 if (!key.IsEmpty()) { 191 pPage->SetNewFor<CPDF_Reference>( 192 "Contents", pDocument, NewIndirectContentsStream(key, pDocument)); 193 } 194 return; 195 } 196 } 197 pPage->ConvertToIndirectObjectFor("Contents", pDocument); 198 if (!pContentsArray) { 199 pContentsArray = pDocument->NewIndirect<CPDF_Array>(); 200 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pContentsStream); 201 pAcc->LoadAllDataFiltered(); 202 ByteString sStream = "q\n"; 203 ByteString sBody = ByteString(pAcc->GetData(), pAcc->GetSize()); 204 sStream = sStream + sBody + "\nQ"; 205 pContentsStream->SetDataAndRemoveFilter(sStream.raw_str(), 206 sStream.GetLength()); 207 pContentsArray->AddNew<CPDF_Reference>(pDocument, 208 pContentsStream->GetObjNum()); 209 pPage->SetNewFor<CPDF_Reference>("Contents", pDocument, 210 pContentsArray->GetObjNum()); 211 } 212 if (!key.IsEmpty()) { 213 pContentsArray->AddNew<CPDF_Reference>( 214 pDocument, NewIndirectContentsStream(key, pDocument)); 215 } 216 } 217 218 CFX_Matrix GetMatrix(CFX_FloatRect rcAnnot, 219 CFX_FloatRect rcStream, 220 const CFX_Matrix& matrix) { 221 if (rcStream.IsEmpty()) 222 return CFX_Matrix(); 223 224 rcStream = matrix.TransformRect(rcStream); 225 rcStream.Normalize(); 226 227 float a = rcAnnot.Width() / rcStream.Width(); 228 float d = rcAnnot.Height() / rcStream.Height(); 229 230 float e = rcAnnot.left - rcStream.left * a; 231 float f = rcAnnot.bottom - rcStream.bottom * d; 232 return CFX_Matrix(a, 0, 0, d, e, f); 233 } 234 235 } // namespace 236 237 FPDF_EXPORT int FPDF_CALLCONV FPDFPage_Flatten(FPDF_PAGE page, int nFlag) { 238 CPDF_Page* pPage = CPDFPageFromFPDFPage(page); 239 if (!page) 240 return FLATTEN_FAIL; 241 242 CPDF_Document* pDocument = pPage->m_pDocument.Get(); 243 CPDF_Dictionary* pPageDict = pPage->m_pFormDict.Get(); 244 if (!pDocument || !pPageDict) 245 return FLATTEN_FAIL; 246 247 std::vector<CPDF_Dictionary*> ObjectArray; 248 std::vector<CFX_FloatRect> RectArray; 249 int iRet = 250 ParserAnnots(pDocument, pPageDict, &RectArray, &ObjectArray, nFlag); 251 if (iRet == FLATTEN_NOTHINGTODO || iRet == FLATTEN_FAIL) 252 return iRet; 253 254 CFX_FloatRect rcOriginalCB; 255 CFX_FloatRect rcMerger = CalculateRect(&RectArray); 256 CFX_FloatRect rcOriginalMB = pPageDict->GetRectFor("MediaBox"); 257 if (pPageDict->KeyExist("CropBox")) 258 rcOriginalMB = pPageDict->GetRectFor("CropBox"); 259 260 if (rcOriginalMB.IsEmpty()) 261 rcOriginalMB = CFX_FloatRect(0.0f, 0.0f, 612.0f, 792.0f); 262 263 rcMerger.left = std::max(rcMerger.left, rcOriginalMB.left); 264 rcMerger.right = std::min(rcMerger.right, rcOriginalMB.right); 265 rcMerger.bottom = std::max(rcMerger.bottom, rcOriginalMB.bottom); 266 rcMerger.top = std::min(rcMerger.top, rcOriginalMB.top); 267 if (pPageDict->KeyExist("ArtBox")) 268 rcOriginalCB = pPageDict->GetRectFor("ArtBox"); 269 else 270 rcOriginalCB = rcOriginalMB; 271 272 if (!rcOriginalMB.IsEmpty()) { 273 CPDF_Array* pMediaBox = pPageDict->SetNewFor<CPDF_Array>("MediaBox"); 274 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.left); 275 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.bottom); 276 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.right); 277 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.top); 278 } 279 280 if (!rcOriginalCB.IsEmpty()) { 281 CPDF_Array* pCropBox = pPageDict->SetNewFor<CPDF_Array>("ArtBox"); 282 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.left); 283 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.bottom); 284 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.right); 285 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.top); 286 } 287 288 CPDF_Dictionary* pRes = pPageDict->GetDictFor("Resources"); 289 if (!pRes) 290 pRes = pPageDict->SetNewFor<CPDF_Dictionary>("Resources"); 291 292 CPDF_Stream* pNewXObject = pDocument->NewIndirect<CPDF_Stream>( 293 nullptr, 0, 294 pdfium::MakeUnique<CPDF_Dictionary>(pDocument->GetByteStringPool())); 295 296 uint32_t dwObjNum = pNewXObject->GetObjNum(); 297 CPDF_Dictionary* pPageXObject = pRes->GetDictFor("XObject"); 298 if (!pPageXObject) 299 pPageXObject = pRes->SetNewFor<CPDF_Dictionary>("XObject"); 300 301 ByteString key; 302 if (!ObjectArray.empty()) { 303 int i = 0; 304 while (i < INT_MAX) { 305 ByteString sKey = ByteString::Format("FFT%d", i); 306 if (!pPageXObject->KeyExist(sKey)) { 307 key = sKey; 308 break; 309 } 310 ++i; 311 } 312 } 313 314 SetPageContents(key, pPageDict, pDocument); 315 316 CPDF_Dictionary* pNewXORes = nullptr; 317 if (!key.IsEmpty()) { 318 pPageXObject->SetNewFor<CPDF_Reference>(key, pDocument, dwObjNum); 319 CPDF_Dictionary* pNewOXbjectDic = pNewXObject->GetDict(); 320 pNewXORes = pNewOXbjectDic->SetNewFor<CPDF_Dictionary>("Resources"); 321 pNewOXbjectDic->SetNewFor<CPDF_Name>("Type", "XObject"); 322 pNewOXbjectDic->SetNewFor<CPDF_Name>("Subtype", "Form"); 323 pNewOXbjectDic->SetNewFor<CPDF_Number>("FormType", 1); 324 CFX_FloatRect rcBBox = pPageDict->GetRectFor("ArtBox"); 325 pNewOXbjectDic->SetRectFor("BBox", rcBBox); 326 } 327 328 for (size_t i = 0; i < ObjectArray.size(); ++i) { 329 CPDF_Dictionary* pAnnotDic = ObjectArray[i]; 330 if (!pAnnotDic) 331 continue; 332 333 CFX_FloatRect rcAnnot = pAnnotDic->GetRectFor("Rect"); 334 rcAnnot.Normalize(); 335 336 ByteString sAnnotState = pAnnotDic->GetStringFor("AS"); 337 CPDF_Dictionary* pAnnotAP = pAnnotDic->GetDictFor("AP"); 338 if (!pAnnotAP) 339 continue; 340 341 CPDF_Stream* pAPStream = pAnnotAP->GetStreamFor("N"); 342 if (!pAPStream) { 343 CPDF_Dictionary* pAPDic = pAnnotAP->GetDictFor("N"); 344 if (!pAPDic) 345 continue; 346 347 if (!sAnnotState.IsEmpty()) { 348 pAPStream = pAPDic->GetStreamFor(sAnnotState); 349 } else { 350 if (pAPDic->GetCount() > 0) { 351 CPDF_Object* pFirstObj = pAPDic->begin()->second.get(); 352 if (pFirstObj) { 353 if (pFirstObj->IsReference()) 354 pFirstObj = pFirstObj->GetDirect(); 355 if (!pFirstObj->IsStream()) 356 continue; 357 pAPStream = pFirstObj->AsStream(); 358 } 359 } 360 } 361 } 362 if (!pAPStream) 363 continue; 364 365 CPDF_Dictionary* pAPDic = pAPStream->GetDict(); 366 CFX_FloatRect rcStream; 367 if (pAPDic->KeyExist("Rect")) 368 rcStream = pAPDic->GetRectFor("Rect"); 369 else if (pAPDic->KeyExist("BBox")) 370 rcStream = pAPDic->GetRectFor("BBox"); 371 372 if (rcStream.IsEmpty()) 373 continue; 374 375 CPDF_Object* pObj = pAPStream; 376 if (pObj->IsInline()) { 377 std::unique_ptr<CPDF_Object> pNew = pObj->Clone(); 378 pObj = pNew.get(); 379 pDocument->AddIndirectObject(std::move(pNew)); 380 } 381 382 CPDF_Dictionary* pObjDic = pObj->GetDict(); 383 if (pObjDic) { 384 pObjDic->SetNewFor<CPDF_Name>("Type", "XObject"); 385 pObjDic->SetNewFor<CPDF_Name>("Subtype", "Form"); 386 } 387 388 CPDF_Dictionary* pXObject = pNewXORes->GetDictFor("XObject"); 389 if (!pXObject) 390 pXObject = pNewXORes->SetNewFor<CPDF_Dictionary>("XObject"); 391 392 ByteString sFormName = ByteString::Format("F%d", i); 393 pXObject->SetNewFor<CPDF_Reference>(sFormName, pDocument, 394 pObj->GetObjNum()); 395 396 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pNewXObject); 397 pAcc->LoadAllDataFiltered(); 398 ByteString sStream(pAcc->GetData(), pAcc->GetSize()); 399 CFX_Matrix matrix = pAPDic->GetMatrixFor("Matrix"); 400 CFX_Matrix m = GetMatrix(rcAnnot, rcStream, matrix); 401 sStream += ByteString::Format("q %f 0 0 %f %f %f cm /%s Do Q\n", m.a, m.d, 402 m.e, m.f, sFormName.c_str()); 403 pNewXObject->SetDataAndRemoveFilter(sStream.raw_str(), sStream.GetLength()); 404 } 405 pPageDict->RemoveFor("Annots"); 406 return FLATTEN_SUCCESS; 407 } 408