Home | History | Annotate | Download | only in fpdfdoc
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "../../include/fpdfapi/fpdf_parser.h"
      8 #include "../../include/fpdfapi/fpdf_page.h"
      9 #include "../../include/fpdfdoc/fpdf_tagged.h"
     10 #include "tagged_int.h"
     11 const int nMaxRecursion = 32;
     12 static FX_BOOL IsTagged(const CPDF_Document* pDoc)
     13 {
     14     CPDF_Dictionary* pCatalog = pDoc->GetRoot();
     15     CPDF_Dictionary* pMarkInfo = pCatalog->GetDict(FX_BSTRC("MarkInfo"));
     16     return pMarkInfo != NULL && pMarkInfo->GetInteger(FX_BSTRC("Marked"));
     17 }
     18 CPDF_StructTree* CPDF_StructTree::LoadPage(const CPDF_Document* pDoc, const CPDF_Dictionary* pPageDict)
     19 {
     20     if (!IsTagged(pDoc)) {
     21         return NULL;
     22     }
     23     CPDF_StructTreeImpl* pTree = FX_NEW CPDF_StructTreeImpl(pDoc);
     24     if (pTree == NULL) {
     25         return NULL;
     26     }
     27     pTree->LoadPageTree(pPageDict);
     28     return pTree;
     29 }
     30 CPDF_StructTree* CPDF_StructTree::LoadDoc(const CPDF_Document* pDoc)
     31 {
     32     if (!IsTagged(pDoc)) {
     33         return NULL;
     34     }
     35     CPDF_StructTreeImpl* pTree = FX_NEW CPDF_StructTreeImpl(pDoc);
     36     if (pTree == NULL) {
     37         return NULL;
     38     }
     39     pTree->LoadDocTree();
     40     return pTree;
     41 }
     42 CPDF_StructTreeImpl::CPDF_StructTreeImpl(const CPDF_Document* pDoc)
     43 {
     44     CPDF_Dictionary* pCatalog = pDoc->GetRoot();
     45     m_pTreeRoot = pCatalog->GetDict(FX_BSTRC("StructTreeRoot"));
     46     if (m_pTreeRoot == NULL) {
     47         return;
     48     }
     49     m_pRoleMap = m_pTreeRoot->GetDict(FX_BSTRC("RoleMap"));
     50 }
     51 CPDF_StructTreeImpl::~CPDF_StructTreeImpl()
     52 {
     53     for (int i = 0; i < m_Kids.GetSize(); i ++)
     54         if (m_Kids[i]) {
     55             m_Kids[i]->Release();
     56         }
     57 }
     58 void CPDF_StructTreeImpl::LoadDocTree()
     59 {
     60     m_pPage = NULL;
     61     if (m_pTreeRoot == NULL) {
     62         return;
     63     }
     64     CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
     65     if (pKids == NULL) {
     66         return;
     67     }
     68     if (pKids->GetType() == PDFOBJ_DICTIONARY) {
     69         CPDF_StructElementImpl* pStructElementImpl = FX_NEW CPDF_StructElementImpl(this, NULL, (CPDF_Dictionary*)pKids);
     70         if (pStructElementImpl == NULL) {
     71             return;
     72         }
     73         m_Kids.Add(pStructElementImpl);
     74         return;
     75     }
     76     if (pKids->GetType() != PDFOBJ_ARRAY) {
     77         return;
     78     }
     79     CPDF_Array* pArray = (CPDF_Array*)pKids;
     80     for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
     81         CPDF_Dictionary* pKid = pArray->GetDict(i);
     82         CPDF_StructElementImpl* pStructElementImpl = FX_NEW CPDF_StructElementImpl(this, NULL, pKid);
     83         if (pStructElementImpl == NULL) {
     84             return;
     85         }
     86         m_Kids.Add(pStructElementImpl);
     87     }
     88 }
     89 void CPDF_StructTreeImpl::LoadPageTree(const CPDF_Dictionary* pPageDict)
     90 {
     91     m_pPage = pPageDict;
     92     if (m_pTreeRoot == NULL) {
     93         return;
     94     }
     95     CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
     96     if (pKids == NULL) {
     97         return;
     98     }
     99     FX_DWORD dwKids = 0;
    100     if (pKids->GetType() == PDFOBJ_DICTIONARY) {
    101         dwKids = 1;
    102     } else if (pKids->GetType() == PDFOBJ_ARRAY) {
    103         dwKids = ((CPDF_Array*)pKids)->GetCount();
    104     } else {
    105         return;
    106     }
    107     FX_DWORD i;
    108     m_Kids.SetSize(dwKids);
    109     for (i = 0; i < dwKids; i ++) {
    110         m_Kids[i] = NULL;
    111     }
    112     CFX_MapPtrToPtr element_map;
    113     CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDict(FX_BSTRC("ParentTree"));
    114     if (pParentTree == NULL) {
    115         return;
    116     }
    117     CPDF_NumberTree parent_tree(pParentTree);
    118     int parents_id = pPageDict->GetInteger(FX_BSTRC("StructParents"), -1);
    119     if (parents_id >= 0) {
    120         CPDF_Object* pParents = parent_tree.LookupValue(parents_id);
    121         if (pParents == NULL || pParents->GetType() != PDFOBJ_ARRAY) {
    122             return;
    123         }
    124         CPDF_Array* pParentArray = (CPDF_Array*)pParents;
    125         for (i = 0; i < pParentArray->GetCount(); i ++) {
    126             CPDF_Dictionary* pParent = pParentArray->GetDict(i);
    127             if (pParent == NULL) {
    128                 continue;
    129             }
    130             AddPageNode(pParent, element_map);
    131         }
    132     }
    133 }
    134 CPDF_StructElementImpl* CPDF_StructTreeImpl::AddPageNode(CPDF_Dictionary* pDict, CFX_MapPtrToPtr& map, int nLevel)
    135 {
    136     if (nLevel > nMaxRecursion) {
    137         return NULL;
    138     }
    139     CPDF_StructElementImpl* pElement = NULL;
    140     if (map.Lookup(pDict, (FX_LPVOID&)pElement)) {
    141         return pElement;
    142     }
    143     pElement = FX_NEW CPDF_StructElementImpl(this, NULL, pDict);
    144     if (pElement == NULL) {
    145         return NULL;
    146     }
    147     map.SetAt(pDict, pElement);
    148     CPDF_Dictionary* pParent = pDict->GetDict(FX_BSTRC("P"));
    149     if (pParent == NULL || pParent->GetString(FX_BSTRC("Type")) == FX_BSTRC("StructTreeRoot")) {
    150         if (!AddTopLevelNode(pDict, pElement)) {
    151             pElement->Release();
    152             map.RemoveKey(pDict);
    153         }
    154     } else {
    155         CPDF_StructElementImpl* pParentElement = AddPageNode(pParent, map, nLevel + 1);
    156         FX_BOOL bSave = FALSE;
    157         for (int i = 0; i < pParentElement->m_Kids.GetSize(); i ++) {
    158             if (pParentElement->m_Kids[i].m_Type != CPDF_StructKid::Element) {
    159                 continue;
    160             }
    161             if (pParentElement->m_Kids[i].m_Element.m_pDict != pDict) {
    162                 continue;
    163             }
    164             pParentElement->m_Kids[i].m_Element.m_pElement = pElement->Retain();
    165             bSave = TRUE;
    166         }
    167         if (!bSave) {
    168             pElement->Release();
    169             map.RemoveKey(pDict);
    170         }
    171     }
    172     return pElement;
    173 }
    174 FX_BOOL CPDF_StructTreeImpl::AddTopLevelNode(CPDF_Dictionary* pDict, CPDF_StructElementImpl* pElement)
    175 {
    176     CPDF_Object *pObj = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
    177     if (!pObj) {
    178         return FALSE;
    179     }
    180     if (pObj->GetType() == PDFOBJ_DICTIONARY) {
    181         if (pObj->GetObjNum() == pDict->GetObjNum()) {
    182             if (m_Kids[0]) {
    183                 m_Kids[0]->Release();
    184             }
    185             m_Kids[0] = pElement->Retain();
    186         } else {
    187             return FALSE;
    188         }
    189     }
    190     if (pObj->GetType() == PDFOBJ_ARRAY) {
    191         CPDF_Array* pTopKids = (CPDF_Array*)pObj;
    192         FX_DWORD i;
    193         FX_BOOL bSave = FALSE;
    194         for (i = 0; i < pTopKids->GetCount(); i ++) {
    195             CPDF_Reference* pKidRef = (CPDF_Reference*)pTopKids->GetElement(i);
    196             if (pKidRef->GetType() != PDFOBJ_REFERENCE || pKidRef->GetRefObjNum() != pDict->GetObjNum()) {
    197                 continue;
    198             }
    199             if (m_Kids[i]) {
    200                 m_Kids[i]->Release();
    201             }
    202             m_Kids[i] = pElement->Retain();
    203             bSave = TRUE;
    204         }
    205         if (!bSave) {
    206             return FALSE;
    207         }
    208     }
    209     return TRUE;
    210 }
    211 CPDF_StructElementImpl::CPDF_StructElementImpl(CPDF_StructTreeImpl* pTree, CPDF_StructElementImpl* pParent, CPDF_Dictionary* pDict)
    212     : m_RefCount(0)
    213 {
    214     m_pTree = pTree;
    215     m_pDict = pDict;
    216     m_Type = pDict->GetString(FX_BSTRC("S"));
    217     CFX_ByteString mapped = pTree->m_pRoleMap->GetString(m_Type);
    218     if (!mapped.IsEmpty()) {
    219         m_Type = mapped;
    220     }
    221     m_pParent = pParent;
    222     LoadKids(pDict);
    223 }
    224 CPDF_StructElementImpl::~CPDF_StructElementImpl()
    225 {
    226     for (int i = 0; i < m_Kids.GetSize(); i ++) {
    227         if (m_Kids[i].m_Type == CPDF_StructKid::Element && m_Kids[i].m_Element.m_pElement) {
    228             ((CPDF_StructElementImpl*)m_Kids[i].m_Element.m_pElement)->Release();
    229         }
    230     }
    231 }
    232 CPDF_StructElementImpl* CPDF_StructElementImpl::Retain()
    233 {
    234     m_RefCount++;
    235     return this;
    236 }
    237 void CPDF_StructElementImpl::Release()
    238 {
    239     if(--m_RefCount < 1) {
    240         delete this;
    241     }
    242 }
    243 void CPDF_StructElementImpl::LoadKids(CPDF_Dictionary* pDict)
    244 {
    245     CPDF_Object* pObj = pDict->GetElement(FX_BSTRC("Pg"));
    246     FX_DWORD PageObjNum = 0;
    247     if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
    248         PageObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
    249     }
    250     CPDF_Object* pKids = pDict->GetElementValue(FX_BSTRC("K"));
    251     if (pKids == NULL) {
    252         return;
    253     }
    254     if (pKids->GetType() == PDFOBJ_ARRAY) {
    255         CPDF_Array* pArray = (CPDF_Array*)pKids;
    256         m_Kids.SetSize(pArray->GetCount());
    257         for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
    258             CPDF_Object* pKid = pArray->GetElementValue(i);
    259             LoadKid(PageObjNum, pKid, &m_Kids[i]);
    260         }
    261     } else {
    262         m_Kids.SetSize(1);
    263         LoadKid(PageObjNum, pKids, &m_Kids[0]);
    264     }
    265 }
    266 void CPDF_StructElementImpl::LoadKid(FX_DWORD PageObjNum, CPDF_Object* pKidObj, CPDF_StructKid* pKid)
    267 {
    268     pKid->m_Type = CPDF_StructKid::Invalid;
    269     if (pKidObj == NULL) {
    270         return;
    271     }
    272     if (pKidObj->GetType() == PDFOBJ_NUMBER) {
    273         if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
    274             return;
    275         }
    276         pKid->m_Type = CPDF_StructKid::PageContent;
    277         pKid->m_PageContent.m_ContentId = pKidObj->GetInteger();
    278         pKid->m_PageContent.m_PageObjNum = PageObjNum;
    279         return;
    280     }
    281     if (pKidObj->GetType() != PDFOBJ_DICTIONARY) {
    282         return;
    283     }
    284     CPDF_Dictionary* pKidDict = (CPDF_Dictionary*)pKidObj;
    285     CPDF_Object* pPageObj = pKidDict->GetElement(FX_BSTRC("Pg"));
    286     if (pPageObj && pPageObj->GetType() == PDFOBJ_REFERENCE) {
    287         PageObjNum = ((CPDF_Reference*)pPageObj)->GetRefObjNum();
    288     }
    289     CFX_ByteString type = pKidDict->GetString(FX_BSTRC("Type"));
    290     if (type == FX_BSTRC("MCR")) {
    291         if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
    292             return;
    293         }
    294         pKid->m_Type = CPDF_StructKid::StreamContent;
    295         CPDF_Object* pStreamObj = pKidDict->GetElement(FX_BSTRC("Stm"));
    296         if (pStreamObj && pStreamObj->GetType() == PDFOBJ_REFERENCE) {
    297             pKid->m_StreamContent.m_RefObjNum = ((CPDF_Reference*)pStreamObj)->GetRefObjNum();
    298         } else {
    299             pKid->m_StreamContent.m_RefObjNum = 0;
    300         }
    301         pKid->m_StreamContent.m_PageObjNum = PageObjNum;
    302         pKid->m_StreamContent.m_ContentId = pKidDict->GetInteger(FX_BSTRC("MCID"));
    303     } else if (type == FX_BSTRC("OBJR")) {
    304         if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
    305             return;
    306         }
    307         pKid->m_Type = CPDF_StructKid::Object;
    308         CPDF_Object* pObj = pKidDict->GetElement(FX_BSTRC("Obj"));
    309         if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
    310             pKid->m_Object.m_RefObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
    311         } else {
    312             pKid->m_Object.m_RefObjNum = 0;
    313         }
    314         pKid->m_Object.m_PageObjNum = PageObjNum;
    315     } else {
    316         pKid->m_Type = CPDF_StructKid::Element;
    317         pKid->m_Element.m_pDict = pKidDict;
    318         if (m_pTree->m_pPage == NULL) {
    319             pKid->m_Element.m_pElement = FX_NEW CPDF_StructElementImpl(m_pTree, this, pKidDict);
    320         } else {
    321             pKid->m_Element.m_pElement = NULL;
    322         }
    323     }
    324 }
    325 static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs, FX_BSTR owner, FX_FLOAT nLevel = 0.0F)
    326 {
    327     if (nLevel > nMaxRecursion) {
    328         return NULL;
    329     }
    330     if (pAttrs == NULL) {
    331         return NULL;
    332     }
    333     CPDF_Dictionary* pDict = NULL;
    334     if (pAttrs->GetType() == PDFOBJ_DICTIONARY) {
    335         pDict = (CPDF_Dictionary*)pAttrs;
    336     } else if (pAttrs->GetType() == PDFOBJ_STREAM) {
    337         pDict = ((CPDF_Stream*)pAttrs)->GetDict();
    338     } else if (pAttrs->GetType() == PDFOBJ_ARRAY) {
    339         CPDF_Array* pArray = (CPDF_Array*)pAttrs;
    340         for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
    341             CPDF_Object* pElement = pArray->GetElementValue(i);
    342             pDict = FindAttrDict(pElement, owner, nLevel + 1);
    343             if (pDict) {
    344                 return pDict;
    345             }
    346         }
    347     }
    348     if (pDict && pDict->GetString(FX_BSTRC("O")) == owner) {
    349         return pDict;
    350     }
    351     return NULL;
    352 }
    353 CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, FX_FLOAT fLevel)
    354 {
    355     if (fLevel > nMaxRecursion) {
    356         return NULL;
    357     }
    358     if (bInheritable) {
    359         CPDF_Object* pAttr = GetAttr(owner, name, FALSE);
    360         if (pAttr) {
    361             return pAttr;
    362         }
    363         if (m_pParent == NULL) {
    364             return NULL;
    365         }
    366         return m_pParent->GetAttr(owner, name, TRUE, fLevel + 1);
    367     }
    368     CPDF_Object* pA = m_pDict->GetElementValue(FX_BSTRC("A"));
    369     if (pA) {
    370         CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner);
    371         if (pAttrDict) {
    372             CPDF_Object* pAttr = pAttrDict->GetElementValue(name);
    373             if (pAttr) {
    374                 return pAttr;
    375             }
    376         }
    377     }
    378     CPDF_Object* pC = m_pDict->GetElementValue(FX_BSTRC("C"));
    379     if (pC == NULL) {
    380         return NULL;
    381     }
    382     CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDict(FX_BSTRC("ClassMap"));
    383     if (pClassMap == NULL) {
    384         return NULL;
    385     }
    386     if (pC->GetType() == PDFOBJ_ARRAY) {
    387         CPDF_Array* pArray = (CPDF_Array*)pC;
    388         for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
    389             CFX_ByteString class_name = pArray->GetString(i);
    390             CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
    391             if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
    392                 return pClassDict->GetElementValue(name);
    393             }
    394         }
    395         return NULL;
    396     }
    397     CFX_ByteString class_name = pC->GetString();
    398     CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
    399     if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
    400         return pClassDict->GetElementValue(name);
    401     }
    402     return NULL;
    403 }
    404 CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, int subindex)
    405 {
    406     CPDF_Object* pAttr = GetAttr(owner, name, bInheritable);
    407     if (pAttr == NULL || subindex == -1 || pAttr->GetType() != PDFOBJ_ARRAY) {
    408         return pAttr;
    409     }
    410     CPDF_Array* pArray = (CPDF_Array*)pAttr;
    411     if (subindex >= (int)pArray->GetCount()) {
    412         return pAttr;
    413     }
    414     return pArray->GetElementValue(subindex);
    415 }
    416 CFX_ByteString CPDF_StructElementImpl::GetName(FX_BSTR owner, FX_BSTR name, FX_BSTR default_value, FX_BOOL bInheritable, int subindex)
    417 {
    418     CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
    419     if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NAME) {
    420         return default_value;
    421     }
    422     return pAttr->GetString();
    423 }
    424 FX_ARGB	CPDF_StructElementImpl::GetColor(FX_BSTR owner, FX_BSTR name, FX_ARGB default_value, FX_BOOL bInheritable, int subindex)
    425 {
    426     CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
    427     if (pAttr == NULL || pAttr->GetType() != PDFOBJ_ARRAY) {
    428         return default_value;
    429     }
    430     CPDF_Array* pArray = (CPDF_Array*)pAttr;
    431     return 0xff000000 | ((int)(pArray->GetNumber(0) * 255) << 16) | ((int)(pArray->GetNumber(1) * 255) << 8) | (int)(pArray->GetNumber(2) * 255);
    432 }
    433 FX_FLOAT CPDF_StructElementImpl::GetNumber(FX_BSTR owner, FX_BSTR name, FX_FLOAT default_value, FX_BOOL bInheritable, int subindex)
    434 {
    435     CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
    436     if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
    437         return default_value;
    438     }
    439     return pAttr->GetNumber();
    440 }
    441 int	CPDF_StructElementImpl::GetInteger(FX_BSTR owner, FX_BSTR name, int default_value, FX_BOOL bInheritable, int subindex)
    442 {
    443     CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
    444     if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
    445         return default_value;
    446     }
    447     return pAttr->GetInteger();
    448 }
    449