1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "public/fpdf_flatten.h" 8 9 #include <algorithm> 10 #include <memory> 11 #include <utility> 12 #include <vector> 13 14 #include "core/fpdfapi/page/cpdf_page.h" 15 #include "core/fpdfapi/page/cpdf_pageobject.h" 16 #include "core/fpdfapi/parser/cpdf_array.h" 17 #include "core/fpdfapi/parser/cpdf_document.h" 18 #include "core/fpdfapi/parser/cpdf_name.h" 19 #include "core/fpdfapi/parser/cpdf_number.h" 20 #include "core/fpdfapi/parser/cpdf_reference.h" 21 #include "core/fpdfapi/parser/cpdf_stream.h" 22 #include "core/fpdfapi/parser/cpdf_stream_acc.h" 23 #include "core/fpdfdoc/cpdf_annot.h" 24 #include "fpdfsdk/fsdk_define.h" 25 #include "third_party/base/stl_util.h" 26 27 enum FPDF_TYPE { MAX, MIN }; 28 enum FPDF_VALUE { TOP, LEFT, RIGHT, BOTTOM }; 29 30 namespace { 31 32 bool IsValiableRect(CFX_FloatRect rect, CFX_FloatRect rcPage) { 33 if (rect.left - rect.right > 0.000001f || rect.bottom - rect.top > 0.000001f) 34 return false; 35 36 if (rect.left == 0.0f && rect.top == 0.0f && rect.right == 0.0f && 37 rect.bottom == 0.0f) 38 return false; 39 40 if (!rcPage.IsEmpty()) { 41 if (rect.left - rcPage.left < -10.000001f || 42 rect.right - rcPage.right > 10.000001f || 43 rect.top - rcPage.top > 10.000001f || 44 rect.bottom - rcPage.bottom < -10.000001f) 45 return false; 46 } 47 48 return true; 49 } 50 51 void GetContentsRect(CPDF_Document* pDoc, 52 CPDF_Dictionary* pDict, 53 std::vector<CFX_FloatRect>* pRectArray) { 54 std::unique_ptr<CPDF_Page> pPDFPage(new CPDF_Page(pDoc, pDict, false)); 55 pPDFPage->ParseContent(); 56 57 for (const auto& pPageObject : *pPDFPage->GetPageObjectList()) { 58 CFX_FloatRect rc; 59 rc.left = pPageObject->m_Left; 60 rc.right = pPageObject->m_Right; 61 rc.bottom = pPageObject->m_Bottom; 62 rc.top = pPageObject->m_Top; 63 if (IsValiableRect(rc, pDict->GetRectFor("MediaBox"))) 64 pRectArray->push_back(rc); 65 } 66 } 67 68 void ParserStream(CPDF_Dictionary* pPageDic, 69 CPDF_Dictionary* pStream, 70 std::vector<CFX_FloatRect>* pRectArray, 71 std::vector<CPDF_Dictionary*>* pObjectArray) { 72 if (!pStream) 73 return; 74 CFX_FloatRect rect; 75 if (pStream->KeyExist("Rect")) 76 rect = pStream->GetRectFor("Rect"); 77 else if (pStream->KeyExist("BBox")) 78 rect = pStream->GetRectFor("BBox"); 79 80 if (IsValiableRect(rect, pPageDic->GetRectFor("MediaBox"))) 81 pRectArray->push_back(rect); 82 83 pObjectArray->push_back(pStream); 84 } 85 86 int ParserAnnots(CPDF_Document* pSourceDoc, 87 CPDF_Dictionary* pPageDic, 88 std::vector<CFX_FloatRect>* pRectArray, 89 std::vector<CPDF_Dictionary*>* pObjectArray, 90 int nUsage) { 91 if (!pSourceDoc || !pPageDic) 92 return FLATTEN_FAIL; 93 94 GetContentsRect(pSourceDoc, pPageDic, pRectArray); 95 CPDF_Array* pAnnots = pPageDic->GetArrayFor("Annots"); 96 if (!pAnnots) 97 return FLATTEN_NOTHINGTODO; 98 99 uint32_t dwSize = pAnnots->GetCount(); 100 for (int i = 0; i < (int)dwSize; i++) { 101 CPDF_Dictionary* pAnnotDic = ToDictionary(pAnnots->GetDirectObjectAt(i)); 102 if (!pAnnotDic) 103 continue; 104 105 CFX_ByteString sSubtype = pAnnotDic->GetStringFor("Subtype"); 106 if (sSubtype == "Popup") 107 continue; 108 109 int nAnnotFlag = pAnnotDic->GetIntegerFor("F"); 110 if (nAnnotFlag & ANNOTFLAG_HIDDEN) 111 continue; 112 113 if (nUsage == FLAT_NORMALDISPLAY) { 114 if (nAnnotFlag & ANNOTFLAG_INVISIBLE) 115 continue; 116 117 ParserStream(pPageDic, pAnnotDic, pRectArray, pObjectArray); 118 } else { 119 if (nAnnotFlag & ANNOTFLAG_PRINT) 120 ParserStream(pPageDic, pAnnotDic, pRectArray, pObjectArray); 121 } 122 } 123 return FLATTEN_SUCCESS; 124 } 125 126 FX_FLOAT GetMinMaxValue(const std::vector<CFX_FloatRect>& array, 127 FPDF_TYPE type, 128 FPDF_VALUE value) { 129 size_t nRects = array.size(); 130 if (nRects <= 0) 131 return 0.0f; 132 133 std::vector<FX_FLOAT> pArray(nRects); 134 switch (value) { 135 case LEFT: 136 for (size_t i = 0; i < nRects; i++) 137 pArray[i] = array[i].left; 138 break; 139 case TOP: 140 for (size_t i = 0; i < nRects; i++) 141 pArray[i] = array[i].top; 142 break; 143 case RIGHT: 144 for (size_t i = 0; i < nRects; i++) 145 pArray[i] = array[i].right; 146 break; 147 case BOTTOM: 148 for (size_t i = 0; i < nRects; i++) 149 pArray[i] = array[i].bottom; 150 break; 151 default: 152 // Not reachable. 153 return 0.0f; 154 } 155 156 FX_FLOAT fRet = pArray[0]; 157 if (type == MAX) { 158 for (size_t i = 1; i < nRects; i++) 159 fRet = std::max(fRet, pArray[i]); 160 } else { 161 for (size_t i = 1; i < nRects; i++) 162 fRet = std::min(fRet, pArray[i]); 163 } 164 return fRet; 165 } 166 167 CFX_FloatRect CalculateRect(std::vector<CFX_FloatRect>* pRectArray) { 168 CFX_FloatRect rcRet; 169 170 rcRet.left = GetMinMaxValue(*pRectArray, MIN, LEFT); 171 rcRet.top = GetMinMaxValue(*pRectArray, MAX, TOP); 172 rcRet.right = GetMinMaxValue(*pRectArray, MAX, RIGHT); 173 rcRet.bottom = GetMinMaxValue(*pRectArray, MIN, BOTTOM); 174 175 return rcRet; 176 } 177 178 uint32_t NewIndirectContentsStream(const CFX_ByteString& key, 179 CPDF_Document* pDocument) { 180 CPDF_Stream* pNewContents = pDocument->NewIndirect<CPDF_Stream>( 181 nullptr, 0, 182 pdfium::MakeUnique<CPDF_Dictionary>(pDocument->GetByteStringPool())); 183 CFX_ByteString sStream; 184 sStream.Format("q 1 0 0 1 0 0 cm /%s Do Q", key.c_str()); 185 pNewContents->SetData(sStream.raw_str(), sStream.GetLength()); 186 return pNewContents->GetObjNum(); 187 } 188 189 void SetPageContents(const CFX_ByteString& key, 190 CPDF_Dictionary* pPage, 191 CPDF_Document* pDocument) { 192 CPDF_Array* pContentsArray = nullptr; 193 CPDF_Stream* pContentsStream = pPage->GetStreamFor("Contents"); 194 if (!pContentsStream) { 195 pContentsArray = pPage->GetArrayFor("Contents"); 196 if (!pContentsArray) { 197 if (!key.IsEmpty()) { 198 pPage->SetNewFor<CPDF_Reference>( 199 "Contents", pDocument, NewIndirectContentsStream(key, pDocument)); 200 } 201 return; 202 } 203 } 204 pPage->ConvertToIndirectObjectFor("Contents", pDocument); 205 if (!pContentsArray) { 206 pContentsArray = pDocument->NewIndirect<CPDF_Array>(); 207 CPDF_StreamAcc acc; 208 acc.LoadAllData(pContentsStream); 209 CFX_ByteString sStream = "q\n"; 210 CFX_ByteString sBody = 211 CFX_ByteString((const FX_CHAR*)acc.GetData(), acc.GetSize()); 212 sStream = sStream + sBody + "\nQ"; 213 pContentsStream->SetData(sStream.raw_str(), sStream.GetLength()); 214 pContentsArray->AddNew<CPDF_Reference>(pDocument, 215 pContentsStream->GetObjNum()); 216 pPage->SetNewFor<CPDF_Reference>("Contents", pDocument, 217 pContentsArray->GetObjNum()); 218 } 219 if (!key.IsEmpty()) { 220 pContentsArray->AddNew<CPDF_Reference>( 221 pDocument, NewIndirectContentsStream(key, pDocument)); 222 } 223 } 224 225 CFX_Matrix GetMatrix(CFX_FloatRect rcAnnot, 226 CFX_FloatRect rcStream, 227 const CFX_Matrix& matrix) { 228 if (rcStream.IsEmpty()) 229 return CFX_Matrix(); 230 231 matrix.TransformRect(rcStream); 232 rcStream.Normalize(); 233 234 FX_FLOAT a = rcAnnot.Width() / rcStream.Width(); 235 FX_FLOAT d = rcAnnot.Height() / rcStream.Height(); 236 237 FX_FLOAT e = rcAnnot.left - rcStream.left * a; 238 FX_FLOAT f = rcAnnot.bottom - rcStream.bottom * d; 239 return CFX_Matrix(a, 0, 0, d, e, f); 240 } 241 242 } // namespace 243 244 DLLEXPORT int STDCALL FPDFPage_Flatten(FPDF_PAGE page, int nFlag) { 245 CPDF_Page* pPage = CPDFPageFromFPDFPage(page); 246 if (!page) 247 return FLATTEN_FAIL; 248 249 CPDF_Document* pDocument = pPage->m_pDocument; 250 CPDF_Dictionary* pPageDict = pPage->m_pFormDict; 251 if (!pDocument || !pPageDict) 252 return FLATTEN_FAIL; 253 254 std::vector<CPDF_Dictionary*> ObjectArray; 255 std::vector<CFX_FloatRect> RectArray; 256 int iRet = 257 ParserAnnots(pDocument, pPageDict, &RectArray, &ObjectArray, nFlag); 258 if (iRet == FLATTEN_NOTHINGTODO || iRet == FLATTEN_FAIL) 259 return iRet; 260 261 CFX_FloatRect rcOriginalCB; 262 CFX_FloatRect rcMerger = CalculateRect(&RectArray); 263 CFX_FloatRect rcOriginalMB = pPageDict->GetRectFor("MediaBox"); 264 if (pPageDict->KeyExist("CropBox")) 265 rcOriginalMB = pPageDict->GetRectFor("CropBox"); 266 267 if (rcOriginalMB.IsEmpty()) 268 rcOriginalMB = CFX_FloatRect(0.0f, 0.0f, 612.0f, 792.0f); 269 270 rcMerger.left = std::max(rcMerger.left, rcOriginalMB.left); 271 rcMerger.right = std::min(rcMerger.right, rcOriginalMB.right); 272 rcMerger.bottom = std::max(rcMerger.bottom, rcOriginalMB.bottom); 273 rcMerger.top = std::min(rcMerger.top, rcOriginalMB.top); 274 if (pPageDict->KeyExist("ArtBox")) 275 rcOriginalCB = pPageDict->GetRectFor("ArtBox"); 276 else 277 rcOriginalCB = rcOriginalMB; 278 279 if (!rcOriginalMB.IsEmpty()) { 280 CPDF_Array* pMediaBox = pPageDict->SetNewFor<CPDF_Array>("MediaBox"); 281 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.left); 282 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.bottom); 283 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.right); 284 pMediaBox->AddNew<CPDF_Number>(rcOriginalMB.top); 285 } 286 287 if (!rcOriginalCB.IsEmpty()) { 288 CPDF_Array* pCropBox = pPageDict->SetNewFor<CPDF_Array>("ArtBox"); 289 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.left); 290 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.bottom); 291 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.right); 292 pCropBox->AddNew<CPDF_Number>(rcOriginalCB.top); 293 } 294 295 CPDF_Dictionary* pRes = pPageDict->GetDictFor("Resources"); 296 if (!pRes) 297 pRes = pPageDict->SetNewFor<CPDF_Dictionary>("Resources"); 298 299 CPDF_Stream* pNewXObject = pDocument->NewIndirect<CPDF_Stream>( 300 nullptr, 0, 301 pdfium::MakeUnique<CPDF_Dictionary>(pDocument->GetByteStringPool())); 302 303 uint32_t dwObjNum = pNewXObject->GetObjNum(); 304 CPDF_Dictionary* pPageXObject = pRes->GetDictFor("XObject"); 305 if (!pPageXObject) 306 pPageXObject = pRes->SetNewFor<CPDF_Dictionary>("XObject"); 307 308 CFX_ByteString key = ""; 309 int nStreams = pdfium::CollectionSize<int>(ObjectArray); 310 if (nStreams > 0) { 311 for (int iKey = 0; /*iKey < 100*/; iKey++) { 312 char sExtend[5] = {}; 313 FXSYS_itoa(iKey, sExtend, 10); 314 key = CFX_ByteString("FFT") + CFX_ByteString(sExtend); 315 if (!pPageXObject->KeyExist(key)) 316 break; 317 } 318 } 319 320 SetPageContents(key, pPageDict, pDocument); 321 322 CPDF_Dictionary* pNewXORes = nullptr; 323 if (!key.IsEmpty()) { 324 pPageXObject->SetNewFor<CPDF_Reference>(key, pDocument, dwObjNum); 325 CPDF_Dictionary* pNewOXbjectDic = pNewXObject->GetDict(); 326 pNewXORes = pNewOXbjectDic->SetNewFor<CPDF_Dictionary>("Resources"); 327 pNewOXbjectDic->SetNewFor<CPDF_Name>("Type", "XObject"); 328 pNewOXbjectDic->SetNewFor<CPDF_Name>("Subtype", "Form"); 329 pNewOXbjectDic->SetNewFor<CPDF_Number>("FormType", 1); 330 pNewOXbjectDic->SetNewFor<CPDF_Name>("Name", "FRM"); 331 CFX_FloatRect rcBBox = pPageDict->GetRectFor("ArtBox"); 332 pNewOXbjectDic->SetRectFor("BBox", rcBBox); 333 } 334 335 for (int i = 0; i < nStreams; i++) { 336 CPDF_Dictionary* pAnnotDic = ObjectArray[i]; 337 if (!pAnnotDic) 338 continue; 339 340 CFX_FloatRect rcAnnot = pAnnotDic->GetRectFor("Rect"); 341 rcAnnot.Normalize(); 342 343 CFX_ByteString sAnnotState = pAnnotDic->GetStringFor("AS"); 344 CPDF_Dictionary* pAnnotAP = pAnnotDic->GetDictFor("AP"); 345 if (!pAnnotAP) 346 continue; 347 348 CPDF_Stream* pAPStream = pAnnotAP->GetStreamFor("N"); 349 if (!pAPStream) { 350 CPDF_Dictionary* pAPDic = pAnnotAP->GetDictFor("N"); 351 if (!pAPDic) 352 continue; 353 354 if (!sAnnotState.IsEmpty()) { 355 pAPStream = pAPDic->GetStreamFor(sAnnotState); 356 } else { 357 auto it = pAPDic->begin(); 358 if (it != pAPDic->end()) { 359 CPDF_Object* pFirstObj = it->second.get(); 360 if (pFirstObj) { 361 if (pFirstObj->IsReference()) 362 pFirstObj = pFirstObj->GetDirect(); 363 if (!pFirstObj->IsStream()) 364 continue; 365 pAPStream = pFirstObj->AsStream(); 366 } 367 } 368 } 369 } 370 if (!pAPStream) 371 continue; 372 373 CPDF_Dictionary* pAPDic = pAPStream->GetDict(); 374 CFX_FloatRect rcStream; 375 if (pAPDic->KeyExist("Rect")) 376 rcStream = pAPDic->GetRectFor("Rect"); 377 else if (pAPDic->KeyExist("BBox")) 378 rcStream = pAPDic->GetRectFor("BBox"); 379 380 if (rcStream.IsEmpty()) 381 continue; 382 383 CPDF_Object* pObj = pAPStream; 384 if (pObj->IsInline()) { 385 std::unique_ptr<CPDF_Object> pNew = pObj->Clone(); 386 pObj = pNew.get(); 387 pDocument->AddIndirectObject(std::move(pNew)); 388 } 389 390 CPDF_Dictionary* pObjDic = pObj->GetDict(); 391 if (pObjDic) { 392 pObjDic->SetNewFor<CPDF_Name>("Type", "XObject"); 393 pObjDic->SetNewFor<CPDF_Name>("Subtype", "Form"); 394 } 395 396 CPDF_Dictionary* pXObject = pNewXORes->GetDictFor("XObject"); 397 if (!pXObject) 398 pXObject = pNewXORes->SetNewFor<CPDF_Dictionary>("XObject"); 399 400 CFX_ByteString sFormName; 401 sFormName.Format("F%d", i); 402 pXObject->SetNewFor<CPDF_Reference>(sFormName, pDocument, 403 pObj->GetObjNum()); 404 405 CPDF_StreamAcc acc; 406 acc.LoadAllData(pNewXObject); 407 408 const uint8_t* pData = acc.GetData(); 409 CFX_ByteString sStream(pData, acc.GetSize()); 410 CFX_Matrix matrix = pAPDic->GetMatrixFor("Matrix"); 411 if (matrix.IsIdentity()) { 412 matrix.a = 1.0f; 413 matrix.b = 0.0f; 414 matrix.c = 0.0f; 415 matrix.d = 1.0f; 416 matrix.e = 0.0f; 417 matrix.f = 0.0f; 418 } 419 420 CFX_ByteString sTemp; 421 CFX_Matrix m = GetMatrix(rcAnnot, rcStream, matrix); 422 sTemp.Format("q %f 0 0 %f %f %f cm /%s Do Q\n", m.a, m.d, m.e, m.f, 423 sFormName.c_str()); 424 sStream += sTemp; 425 pNewXObject->SetData(sStream.raw_str(), sStream.GetLength()); 426 } 427 pPageDict->RemoveFor("Annots"); 428 return FLATTEN_SUCCESS; 429 } 430