Home | History | Annotate | Download | only in fpdf_parser
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/include/fpdfapi/fpdf_parser.h"
      8 
      9 #include "core/include/fpdfapi/fpdf_module.h"
     10 
     11 CPDF_Document::CPDF_Document(CPDF_Parser* pParser)
     12     : CPDF_IndirectObjectHolder(pParser) {
     13   ASSERT(pParser);
     14   m_pRootDict = NULL;
     15   m_pInfoDict = NULL;
     16   m_bLinearized = FALSE;
     17   m_dwFirstPageNo = 0;
     18   m_dwFirstPageObjNum = 0;
     19   m_pDocPage = CPDF_ModuleMgr::Get()->GetPageModule()->CreateDocData(this);
     20   m_pDocRender = CPDF_ModuleMgr::Get()->GetRenderModule()->CreateDocData(this);
     21 }
     22 CPDF_DocPageData* CPDF_Document::GetValidatePageData() {
     23   if (m_pDocPage) {
     24     return m_pDocPage;
     25   }
     26   m_pDocPage = CPDF_ModuleMgr::Get()->GetPageModule()->CreateDocData(this);
     27   return m_pDocPage;
     28 }
     29 CPDF_DocRenderData* CPDF_Document::GetValidateRenderData() {
     30   if (m_pDocRender) {
     31     return m_pDocRender;
     32   }
     33   m_pDocRender = CPDF_ModuleMgr::Get()->GetRenderModule()->CreateDocData(this);
     34   return m_pDocRender;
     35 }
     36 void CPDF_Document::LoadDoc() {
     37   m_LastObjNum = m_pParser->GetLastObjNum();
     38   CPDF_Object* pRootObj =
     39       GetIndirectObject(m_pParser->GetRootObjNum(), nullptr);
     40   if (!pRootObj) {
     41     return;
     42   }
     43   m_pRootDict = pRootObj->GetDict();
     44   if (!m_pRootDict) {
     45     return;
     46   }
     47   CPDF_Object* pInfoObj =
     48       GetIndirectObject(m_pParser->GetInfoObjNum(), nullptr);
     49   if (pInfoObj) {
     50     m_pInfoDict = pInfoObj->GetDict();
     51   }
     52   CPDF_Array* pIDArray = m_pParser->GetIDArray();
     53   if (pIDArray) {
     54     m_ID1 = pIDArray->GetString(0);
     55     m_ID2 = pIDArray->GetString(1);
     56   }
     57   m_PageList.SetSize(_GetPageCount());
     58 }
     59 void CPDF_Document::LoadAsynDoc(CPDF_Dictionary* pLinearized) {
     60   m_bLinearized = TRUE;
     61   m_LastObjNum = m_pParser->GetLastObjNum();
     62   CPDF_Object* pIndirectObj =
     63       GetIndirectObject(m_pParser->GetRootObjNum(), nullptr);
     64   m_pRootDict = pIndirectObj ? pIndirectObj->GetDict() : nullptr;
     65   if (!m_pRootDict) {
     66     return;
     67   }
     68   pIndirectObj = GetIndirectObject(m_pParser->GetInfoObjNum(), nullptr);
     69   m_pInfoDict = pIndirectObj ? pIndirectObj->GetDict() : nullptr;
     70   CPDF_Array* pIDArray = m_pParser->GetIDArray();
     71   if (pIDArray) {
     72     m_ID1 = pIDArray->GetString(0);
     73     m_ID2 = pIDArray->GetString(1);
     74   }
     75   FX_DWORD dwPageCount = 0;
     76   CPDF_Object* pCount = pLinearized->GetElement("N");
     77   if (ToNumber(pCount))
     78     dwPageCount = pCount->GetInteger();
     79 
     80   m_PageList.SetSize(dwPageCount);
     81   CPDF_Object* pNo = pLinearized->GetElement("P");
     82   if (ToNumber(pNo))
     83     m_dwFirstPageNo = pNo->GetInteger();
     84 
     85   CPDF_Object* pObjNum = pLinearized->GetElement("O");
     86   if (ToNumber(pObjNum))
     87     m_dwFirstPageObjNum = pObjNum->GetInteger();
     88 }
     89 void CPDF_Document::LoadPages() {
     90   m_PageList.SetSize(_GetPageCount());
     91 }
     92 CPDF_Document::~CPDF_Document() {
     93   if (m_pDocPage) {
     94     CPDF_ModuleMgr::Get()->GetPageModule()->ReleaseDoc(this);
     95     CPDF_ModuleMgr::Get()->GetPageModule()->ClearStockFont(this);
     96   }
     97   if (m_pDocRender) {
     98     CPDF_ModuleMgr::Get()->GetRenderModule()->DestroyDocData(m_pDocRender);
     99   }
    100 }
    101 #define FX_MAX_PAGE_LEVEL 1024
    102 CPDF_Dictionary* CPDF_Document::_FindPDFPage(CPDF_Dictionary* pPages,
    103                                              int iPage,
    104                                              int nPagesToGo,
    105                                              int level) {
    106   CPDF_Array* pKidList = pPages->GetArray("Kids");
    107   if (!pKidList) {
    108     if (nPagesToGo == 0) {
    109       return pPages;
    110     }
    111     return NULL;
    112   }
    113   if (level >= FX_MAX_PAGE_LEVEL) {
    114     return NULL;
    115   }
    116   int nKids = pKidList->GetCount();
    117   for (int i = 0; i < nKids; i++) {
    118     CPDF_Dictionary* pKid = pKidList->GetDict(i);
    119     if (!pKid) {
    120       nPagesToGo--;
    121       continue;
    122     }
    123     if (pKid == pPages) {
    124       continue;
    125     }
    126     if (!pKid->KeyExist("Kids")) {
    127       if (nPagesToGo == 0) {
    128         return pKid;
    129       }
    130       m_PageList.SetAt(iPage - nPagesToGo, pKid->GetObjNum());
    131       nPagesToGo--;
    132     } else {
    133       int nPages = pKid->GetInteger("Count");
    134       if (nPagesToGo < nPages) {
    135         return _FindPDFPage(pKid, iPage, nPagesToGo, level + 1);
    136       }
    137       nPagesToGo -= nPages;
    138     }
    139   }
    140   return NULL;
    141 }
    142 
    143 CPDF_Dictionary* CPDF_Document::GetPage(int iPage) {
    144   if (iPage < 0 || iPage >= m_PageList.GetSize())
    145     return nullptr;
    146 
    147   if (m_bLinearized && (iPage == (int)m_dwFirstPageNo)) {
    148     if (CPDF_Dictionary* pDict =
    149             ToDictionary(GetIndirectObject(m_dwFirstPageObjNum, nullptr)))
    150       return pDict;
    151   }
    152 
    153   int objnum = m_PageList.GetAt(iPage);
    154   if (objnum) {
    155     if (CPDF_Dictionary* pDict =
    156             ToDictionary(GetIndirectObject(objnum, nullptr))) {
    157       return pDict;
    158     }
    159   }
    160 
    161   CPDF_Dictionary* pRoot = GetRoot();
    162   if (!pRoot)
    163     return nullptr;
    164 
    165   CPDF_Dictionary* pPages = pRoot->GetDict("Pages");
    166   if (!pPages)
    167     return nullptr;
    168 
    169   CPDF_Dictionary* pPage = _FindPDFPage(pPages, iPage, iPage, 0);
    170   if (!pPage)
    171     return nullptr;
    172 
    173   m_PageList.SetAt(iPage, pPage->GetObjNum());
    174   return pPage;
    175 }
    176 
    177 int CPDF_Document::_FindPageIndex(CPDF_Dictionary* pNode,
    178                                   FX_DWORD& skip_count,
    179                                   FX_DWORD objnum,
    180                                   int& index,
    181                                   int level) {
    182   if (pNode->KeyExist("Kids")) {
    183     CPDF_Array* pKidList = pNode->GetArray("Kids");
    184     if (!pKidList) {
    185       return -1;
    186     }
    187     if (level >= FX_MAX_PAGE_LEVEL) {
    188       return -1;
    189     }
    190     FX_DWORD count = pNode->GetInteger("Count");
    191     if (count <= skip_count) {
    192       skip_count -= count;
    193       index += count;
    194       return -1;
    195     }
    196     if (count && count == pKidList->GetCount()) {
    197       for (FX_DWORD i = 0; i < count; i++) {
    198         if (CPDF_Reference* pKid = ToReference(pKidList->GetElement(i))) {
    199           if (pKid->GetRefObjNum() == objnum) {
    200             m_PageList.SetAt(index + i, objnum);
    201             return index + i;
    202           }
    203         }
    204       }
    205     }
    206     for (FX_DWORD i = 0; i < pKidList->GetCount(); i++) {
    207       CPDF_Dictionary* pKid = pKidList->GetDict(i);
    208       if (!pKid) {
    209         continue;
    210       }
    211       if (pKid == pNode) {
    212         continue;
    213       }
    214       int found_index =
    215           _FindPageIndex(pKid, skip_count, objnum, index, level + 1);
    216       if (found_index >= 0) {
    217         return found_index;
    218       }
    219     }
    220   } else {
    221     if (objnum == pNode->GetObjNum()) {
    222       return index;
    223     }
    224     if (skip_count) {
    225       skip_count--;
    226     }
    227     index++;
    228   }
    229   return -1;
    230 }
    231 int CPDF_Document::GetPageIndex(FX_DWORD objnum) {
    232   FX_DWORD nPages = m_PageList.GetSize();
    233   FX_DWORD skip_count = 0;
    234   FX_BOOL bSkipped = FALSE;
    235   for (FX_DWORD i = 0; i < nPages; i++) {
    236     FX_DWORD objnum1 = m_PageList.GetAt(i);
    237     if (objnum1 == objnum) {
    238       return i;
    239     }
    240     if (!bSkipped && objnum1 == 0) {
    241       skip_count = i;
    242       bSkipped = TRUE;
    243     }
    244   }
    245   CPDF_Dictionary* pRoot = GetRoot();
    246   if (!pRoot) {
    247     return -1;
    248   }
    249   CPDF_Dictionary* pPages = pRoot->GetDict("Pages");
    250   if (!pPages) {
    251     return -1;
    252   }
    253   int index = 0;
    254   return _FindPageIndex(pPages, skip_count, objnum, index);
    255 }
    256 int CPDF_Document::GetPageCount() const {
    257   return m_PageList.GetSize();
    258 }
    259 static int _CountPages(CPDF_Dictionary* pPages, int level) {
    260   if (level > 128) {
    261     return 0;
    262   }
    263   int count = pPages->GetInteger("Count");
    264   if (count > 0 && count < FPDF_PAGE_MAX_NUM) {
    265     return count;
    266   }
    267   CPDF_Array* pKidList = pPages->GetArray("Kids");
    268   if (!pKidList) {
    269     return 0;
    270   }
    271   count = 0;
    272   for (FX_DWORD i = 0; i < pKidList->GetCount(); i++) {
    273     CPDF_Dictionary* pKid = pKidList->GetDict(i);
    274     if (!pKid) {
    275       continue;
    276     }
    277     if (!pKid->KeyExist("Kids")) {
    278       count++;
    279     } else {
    280       count += _CountPages(pKid, level + 1);
    281     }
    282   }
    283   pPages->SetAtInteger("Count", count);
    284   return count;
    285 }
    286 int CPDF_Document::_GetPageCount() const {
    287   CPDF_Dictionary* pRoot = GetRoot();
    288   if (!pRoot) {
    289     return 0;
    290   }
    291   CPDF_Dictionary* pPages = pRoot->GetDict("Pages");
    292   if (!pPages) {
    293     return 0;
    294   }
    295   if (!pPages->KeyExist("Kids")) {
    296     return 1;
    297   }
    298   return _CountPages(pPages, 0);
    299 }
    300 FX_BOOL CPDF_Document::IsContentUsedElsewhere(FX_DWORD objnum,
    301                                               CPDF_Dictionary* pThisPageDict) {
    302   for (int i = 0; i < m_PageList.GetSize(); i++) {
    303     CPDF_Dictionary* pPageDict = GetPage(i);
    304     if (pPageDict == pThisPageDict) {
    305       continue;
    306     }
    307     CPDF_Object* pContents =
    308         pPageDict ? pPageDict->GetElement("Contents") : NULL;
    309     if (!pContents) {
    310       continue;
    311     }
    312     if (pContents->GetDirectType() == PDFOBJ_ARRAY) {
    313       CPDF_Array* pArray = pContents->GetDirect()->AsArray();
    314       for (FX_DWORD j = 0; j < pArray->GetCount(); j++) {
    315         CPDF_Reference* pRef = ToReference(pArray->GetElement(j));
    316         if (pRef && pRef->GetRefObjNum() == objnum)
    317           return TRUE;
    318       }
    319     } else if (pContents->GetObjNum() == objnum) {
    320       return TRUE;
    321     }
    322   }
    323   return FALSE;
    324 }
    325 FX_DWORD CPDF_Document::GetUserPermissions(FX_BOOL bCheckRevision) const {
    326   if (!m_pParser) {
    327     return (FX_DWORD)-1;
    328   }
    329   return m_pParser->GetPermissions(bCheckRevision);
    330 }
    331 FX_BOOL CPDF_Document::IsOwner() const {
    332   return !m_pParser || m_pParser->IsOwner();
    333 }
    334 FX_BOOL CPDF_Document::IsFormStream(FX_DWORD objnum, FX_BOOL& bForm) const {
    335   auto it = m_IndirectObjs.find(objnum);
    336   if (it != m_IndirectObjs.end()) {
    337     CPDF_Stream* pStream = it->second->AsStream();
    338     bForm = pStream && pStream->GetDict()->GetString("Subtype") == "Form";
    339     return TRUE;
    340   }
    341   if (!m_pParser) {
    342     bForm = FALSE;
    343     return TRUE;
    344   }
    345   return m_pParser->IsFormStream(objnum, bForm);
    346 }
    347 void CPDF_Document::ClearPageData() {
    348   if (m_pDocPage) {
    349     CPDF_ModuleMgr::Get()->GetPageModule()->ClearDoc(this);
    350   }
    351 }
    352 void CPDF_Document::ClearRenderData() {
    353   if (m_pDocRender) {
    354     CPDF_ModuleMgr::Get()->GetRenderModule()->ClearDocData(m_pDocRender);
    355   }
    356 }
    357