Home | History | Annotate | Download | only in fpdfdoc
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "../../include/fpdfapi/fpdf_parser.h"
      8 #include "../../include/fpdfapi/fpdf_page.h"
      9 #include "../../include/fpdfdoc/fpdf_tagged.h"
     10 #include "tagged_int.h"
     11 const int nMaxRecursion = 32;
     12 static FX_BOOL IsTagged(const CPDF_Document* pDoc)
     13 {
     14     CPDF_Dictionary* pCatalog = pDoc->GetRoot();
     15     CPDF_Dictionary* pMarkInfo = pCatalog->GetDict(FX_BSTRC("MarkInfo"));
     16     return pMarkInfo != NULL && pMarkInfo->GetInteger(FX_BSTRC("Marked"));
     17 }
     18 CPDF_StructTree* CPDF_StructTree::LoadPage(const CPDF_Document* pDoc, const CPDF_Dictionary* pPageDict)
     19 {
     20     if (!IsTagged(pDoc)) {
     21         return NULL;
     22     }
     23     CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc);
     24     pTree->LoadPageTree(pPageDict);
     25     return pTree;
     26 }
     27 CPDF_StructTree* CPDF_StructTree::LoadDoc(const CPDF_Document* pDoc)
     28 {
     29     if (!IsTagged(pDoc)) {
     30         return NULL;
     31     }
     32     CPDF_StructTreeImpl* pTree = new CPDF_StructTreeImpl(pDoc);
     33     pTree->LoadDocTree();
     34     return pTree;
     35 }
     36 CPDF_StructTreeImpl::CPDF_StructTreeImpl(const CPDF_Document* pDoc)
     37 {
     38     CPDF_Dictionary* pCatalog = pDoc->GetRoot();
     39     m_pTreeRoot = pCatalog->GetDict(FX_BSTRC("StructTreeRoot"));
     40     if (m_pTreeRoot == NULL) {
     41         return;
     42     }
     43     m_pRoleMap = m_pTreeRoot->GetDict(FX_BSTRC("RoleMap"));
     44 }
     45 CPDF_StructTreeImpl::~CPDF_StructTreeImpl()
     46 {
     47     for (int i = 0; i < m_Kids.GetSize(); i ++)
     48         if (m_Kids[i]) {
     49             m_Kids[i]->Release();
     50         }
     51 }
     52 void CPDF_StructTreeImpl::LoadDocTree()
     53 {
     54     m_pPage = NULL;
     55     if (m_pTreeRoot == NULL) {
     56         return;
     57     }
     58     CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
     59     if (pKids == NULL) {
     60         return;
     61     }
     62     if (pKids->GetType() == PDFOBJ_DICTIONARY) {
     63         CPDF_StructElementImpl* pStructElementImpl = new CPDF_StructElementImpl(this, NULL, (CPDF_Dictionary*)pKids);
     64         m_Kids.Add(pStructElementImpl);
     65         return;
     66     }
     67     if (pKids->GetType() != PDFOBJ_ARRAY) {
     68         return;
     69     }
     70     CPDF_Array* pArray = (CPDF_Array*)pKids;
     71     for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
     72         CPDF_Dictionary* pKid = pArray->GetDict(i);
     73         CPDF_StructElementImpl* pStructElementImpl = new CPDF_StructElementImpl(this, NULL, pKid);
     74         m_Kids.Add(pStructElementImpl);
     75     }
     76 }
     77 void CPDF_StructTreeImpl::LoadPageTree(const CPDF_Dictionary* pPageDict)
     78 {
     79     m_pPage = pPageDict;
     80     if (m_pTreeRoot == NULL) {
     81         return;
     82     }
     83     CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
     84     if (pKids == NULL) {
     85         return;
     86     }
     87     FX_DWORD dwKids = 0;
     88     if (pKids->GetType() == PDFOBJ_DICTIONARY) {
     89         dwKids = 1;
     90     } else if (pKids->GetType() == PDFOBJ_ARRAY) {
     91         dwKids = ((CPDF_Array*)pKids)->GetCount();
     92     } else {
     93         return;
     94     }
     95     FX_DWORD i;
     96     m_Kids.SetSize(dwKids);
     97     for (i = 0; i < dwKids; i ++) {
     98         m_Kids[i] = NULL;
     99     }
    100     CFX_MapPtrToPtr element_map;
    101     CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDict(FX_BSTRC("ParentTree"));
    102     if (pParentTree == NULL) {
    103         return;
    104     }
    105     CPDF_NumberTree parent_tree(pParentTree);
    106     int parents_id = pPageDict->GetInteger(FX_BSTRC("StructParents"), -1);
    107     if (parents_id >= 0) {
    108         CPDF_Object* pParents = parent_tree.LookupValue(parents_id);
    109         if (pParents == NULL || pParents->GetType() != PDFOBJ_ARRAY) {
    110             return;
    111         }
    112         CPDF_Array* pParentArray = (CPDF_Array*)pParents;
    113         for (i = 0; i < pParentArray->GetCount(); i ++) {
    114             CPDF_Dictionary* pParent = pParentArray->GetDict(i);
    115             if (pParent == NULL) {
    116                 continue;
    117             }
    118             AddPageNode(pParent, element_map);
    119         }
    120     }
    121 }
    122 CPDF_StructElementImpl* CPDF_StructTreeImpl::AddPageNode(CPDF_Dictionary* pDict, CFX_MapPtrToPtr& map, int nLevel)
    123 {
    124     if (nLevel > nMaxRecursion) {
    125         return NULL;
    126     }
    127     CPDF_StructElementImpl* pElement = NULL;
    128     if (map.Lookup(pDict, (FX_LPVOID&)pElement)) {
    129         return pElement;
    130     }
    131     pElement = new CPDF_StructElementImpl(this, NULL, pDict);
    132     map.SetAt(pDict, pElement);
    133     CPDF_Dictionary* pParent = pDict->GetDict(FX_BSTRC("P"));
    134     if (pParent == NULL || pParent->GetString(FX_BSTRC("Type")) == FX_BSTRC("StructTreeRoot")) {
    135         if (!AddTopLevelNode(pDict, pElement)) {
    136             pElement->Release();
    137             map.RemoveKey(pDict);
    138         }
    139     } else {
    140         CPDF_StructElementImpl* pParentElement = AddPageNode(pParent, map, nLevel + 1);
    141         FX_BOOL bSave = FALSE;
    142         for (int i = 0; i < pParentElement->m_Kids.GetSize(); i ++) {
    143             if (pParentElement->m_Kids[i].m_Type != CPDF_StructKid::Element) {
    144                 continue;
    145             }
    146             if (pParentElement->m_Kids[i].m_Element.m_pDict != pDict) {
    147                 continue;
    148             }
    149             pParentElement->m_Kids[i].m_Element.m_pElement = pElement->Retain();
    150             bSave = TRUE;
    151         }
    152         if (!bSave) {
    153             pElement->Release();
    154             map.RemoveKey(pDict);
    155         }
    156     }
    157     return pElement;
    158 }
    159 FX_BOOL CPDF_StructTreeImpl::AddTopLevelNode(CPDF_Dictionary* pDict, CPDF_StructElementImpl* pElement)
    160 {
    161     CPDF_Object *pObj = m_pTreeRoot->GetElementValue(FX_BSTRC("K"));
    162     if (!pObj) {
    163         return FALSE;
    164     }
    165     if (pObj->GetType() == PDFOBJ_DICTIONARY) {
    166         if (pObj->GetObjNum() == pDict->GetObjNum()) {
    167             if (m_Kids[0]) {
    168                 m_Kids[0]->Release();
    169             }
    170             m_Kids[0] = pElement->Retain();
    171         } else {
    172             return FALSE;
    173         }
    174     }
    175     if (pObj->GetType() == PDFOBJ_ARRAY) {
    176         CPDF_Array* pTopKids = (CPDF_Array*)pObj;
    177         FX_DWORD i;
    178         FX_BOOL bSave = FALSE;
    179         for (i = 0; i < pTopKids->GetCount(); i ++) {
    180             CPDF_Object* pKidRef = pTopKids->GetElement(i);
    181             if (pKidRef == NULL || pKidRef->GetType() != PDFOBJ_REFERENCE) {
    182                 continue;
    183             }
    184             if (((CPDF_Reference*) pKidRef)->GetRefObjNum() != pDict->GetObjNum()) {
    185                 continue;
    186             }
    187             if (m_Kids[i]) {
    188                 m_Kids[i]->Release();
    189             }
    190             m_Kids[i] = pElement->Retain();
    191             bSave = TRUE;
    192         }
    193         if (!bSave) {
    194             return FALSE;
    195         }
    196     }
    197     return TRUE;
    198 }
    199 CPDF_StructElementImpl::CPDF_StructElementImpl(CPDF_StructTreeImpl* pTree, CPDF_StructElementImpl* pParent, CPDF_Dictionary* pDict)
    200     : m_RefCount(0)
    201 {
    202     m_pTree = pTree;
    203     m_pDict = pDict;
    204     m_Type = pDict->GetString(FX_BSTRC("S"));
    205     if (pTree->m_pRoleMap) {
    206         CFX_ByteString mapped = pTree->m_pRoleMap->GetString(m_Type);
    207         if (!mapped.IsEmpty()) {
    208             m_Type = mapped;
    209         }
    210     }
    211     m_pParent = pParent;
    212     LoadKids(pDict);
    213 }
    214 CPDF_StructElementImpl::~CPDF_StructElementImpl()
    215 {
    216     for (int i = 0; i < m_Kids.GetSize(); i ++) {
    217         if (m_Kids[i].m_Type == CPDF_StructKid::Element && m_Kids[i].m_Element.m_pElement) {
    218             ((CPDF_StructElementImpl*)m_Kids[i].m_Element.m_pElement)->Release();
    219         }
    220     }
    221 }
    222 CPDF_StructElementImpl* CPDF_StructElementImpl::Retain()
    223 {
    224     m_RefCount++;
    225     return this;
    226 }
    227 void CPDF_StructElementImpl::Release()
    228 {
    229     if(--m_RefCount < 1) {
    230         delete this;
    231     }
    232 }
    233 void CPDF_StructElementImpl::LoadKids(CPDF_Dictionary* pDict)
    234 {
    235     CPDF_Object* pObj = pDict->GetElement(FX_BSTRC("Pg"));
    236     FX_DWORD PageObjNum = 0;
    237     if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
    238         PageObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
    239     }
    240     CPDF_Object* pKids = pDict->GetElementValue(FX_BSTRC("K"));
    241     if (pKids == NULL) {
    242         return;
    243     }
    244     if (pKids->GetType() == PDFOBJ_ARRAY) {
    245         CPDF_Array* pArray = (CPDF_Array*)pKids;
    246         m_Kids.SetSize(pArray->GetCount());
    247         for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
    248             CPDF_Object* pKid = pArray->GetElementValue(i);
    249             LoadKid(PageObjNum, pKid, &m_Kids[i]);
    250         }
    251     } else {
    252         m_Kids.SetSize(1);
    253         LoadKid(PageObjNum, pKids, &m_Kids[0]);
    254     }
    255 }
    256 void CPDF_StructElementImpl::LoadKid(FX_DWORD PageObjNum, CPDF_Object* pKidObj, CPDF_StructKid* pKid)
    257 {
    258     pKid->m_Type = CPDF_StructKid::Invalid;
    259     if (pKidObj == NULL) {
    260         return;
    261     }
    262     if (pKidObj->GetType() == PDFOBJ_NUMBER) {
    263         if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
    264             return;
    265         }
    266         pKid->m_Type = CPDF_StructKid::PageContent;
    267         pKid->m_PageContent.m_ContentId = pKidObj->GetInteger();
    268         pKid->m_PageContent.m_PageObjNum = PageObjNum;
    269         return;
    270     }
    271     if (pKidObj->GetType() != PDFOBJ_DICTIONARY) {
    272         return;
    273     }
    274     CPDF_Dictionary* pKidDict = (CPDF_Dictionary*)pKidObj;
    275     CPDF_Object* pPageObj = pKidDict->GetElement(FX_BSTRC("Pg"));
    276     if (pPageObj && pPageObj->GetType() == PDFOBJ_REFERENCE) {
    277         PageObjNum = ((CPDF_Reference*)pPageObj)->GetRefObjNum();
    278     }
    279     CFX_ByteString type = pKidDict->GetString(FX_BSTRC("Type"));
    280     if (type == FX_BSTRC("MCR")) {
    281         if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
    282             return;
    283         }
    284         pKid->m_Type = CPDF_StructKid::StreamContent;
    285         CPDF_Object* pStreamObj = pKidDict->GetElement(FX_BSTRC("Stm"));
    286         if (pStreamObj && pStreamObj->GetType() == PDFOBJ_REFERENCE) {
    287             pKid->m_StreamContent.m_RefObjNum = ((CPDF_Reference*)pStreamObj)->GetRefObjNum();
    288         } else {
    289             pKid->m_StreamContent.m_RefObjNum = 0;
    290         }
    291         pKid->m_StreamContent.m_PageObjNum = PageObjNum;
    292         pKid->m_StreamContent.m_ContentId = pKidDict->GetInteger(FX_BSTRC("MCID"));
    293     } else if (type == FX_BSTRC("OBJR")) {
    294         if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
    295             return;
    296         }
    297         pKid->m_Type = CPDF_StructKid::Object;
    298         CPDF_Object* pObj = pKidDict->GetElement(FX_BSTRC("Obj"));
    299         if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) {
    300             pKid->m_Object.m_RefObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum();
    301         } else {
    302             pKid->m_Object.m_RefObjNum = 0;
    303         }
    304         pKid->m_Object.m_PageObjNum = PageObjNum;
    305     } else {
    306         pKid->m_Type = CPDF_StructKid::Element;
    307         pKid->m_Element.m_pDict = pKidDict;
    308         if (m_pTree->m_pPage == NULL) {
    309             pKid->m_Element.m_pElement = new CPDF_StructElementImpl(m_pTree, this, pKidDict);
    310         } else {
    311             pKid->m_Element.m_pElement = NULL;
    312         }
    313     }
    314 }
    315 static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs, FX_BSTR owner, FX_FLOAT nLevel = 0.0F)
    316 {
    317     if (nLevel > nMaxRecursion) {
    318         return NULL;
    319     }
    320     if (pAttrs == NULL) {
    321         return NULL;
    322     }
    323     CPDF_Dictionary* pDict = NULL;
    324     if (pAttrs->GetType() == PDFOBJ_DICTIONARY) {
    325         pDict = (CPDF_Dictionary*)pAttrs;
    326     } else if (pAttrs->GetType() == PDFOBJ_STREAM) {
    327         pDict = ((CPDF_Stream*)pAttrs)->GetDict();
    328     } else if (pAttrs->GetType() == PDFOBJ_ARRAY) {
    329         CPDF_Array* pArray = (CPDF_Array*)pAttrs;
    330         for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
    331             CPDF_Object* pElement = pArray->GetElementValue(i);
    332             pDict = FindAttrDict(pElement, owner, nLevel + 1);
    333             if (pDict) {
    334                 return pDict;
    335             }
    336         }
    337     }
    338     if (pDict && pDict->GetString(FX_BSTRC("O")) == owner) {
    339         return pDict;
    340     }
    341     return NULL;
    342 }
    343 CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, FX_FLOAT fLevel)
    344 {
    345     if (fLevel > nMaxRecursion) {
    346         return NULL;
    347     }
    348     if (bInheritable) {
    349         CPDF_Object* pAttr = GetAttr(owner, name, FALSE);
    350         if (pAttr) {
    351             return pAttr;
    352         }
    353         if (m_pParent == NULL) {
    354             return NULL;
    355         }
    356         return m_pParent->GetAttr(owner, name, TRUE, fLevel + 1);
    357     }
    358     CPDF_Object* pA = m_pDict->GetElementValue(FX_BSTRC("A"));
    359     if (pA) {
    360         CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner);
    361         if (pAttrDict) {
    362             CPDF_Object* pAttr = pAttrDict->GetElementValue(name);
    363             if (pAttr) {
    364                 return pAttr;
    365             }
    366         }
    367     }
    368     CPDF_Object* pC = m_pDict->GetElementValue(FX_BSTRC("C"));
    369     if (pC == NULL) {
    370         return NULL;
    371     }
    372     CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDict(FX_BSTRC("ClassMap"));
    373     if (pClassMap == NULL) {
    374         return NULL;
    375     }
    376     if (pC->GetType() == PDFOBJ_ARRAY) {
    377         CPDF_Array* pArray = (CPDF_Array*)pC;
    378         for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) {
    379             CFX_ByteString class_name = pArray->GetString(i);
    380             CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
    381             if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
    382                 return pClassDict->GetElementValue(name);
    383             }
    384         }
    385         return NULL;
    386     }
    387     CFX_ByteString class_name = pC->GetString();
    388     CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name);
    389     if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) {
    390         return pClassDict->GetElementValue(name);
    391     }
    392     return NULL;
    393 }
    394 CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, int subindex)
    395 {
    396     CPDF_Object* pAttr = GetAttr(owner, name, bInheritable);
    397     if (pAttr == NULL || subindex == -1 || pAttr->GetType() != PDFOBJ_ARRAY) {
    398         return pAttr;
    399     }
    400     CPDF_Array* pArray = (CPDF_Array*)pAttr;
    401     if (subindex >= (int)pArray->GetCount()) {
    402         return pAttr;
    403     }
    404     return pArray->GetElementValue(subindex);
    405 }
    406 CFX_ByteString CPDF_StructElementImpl::GetName(FX_BSTR owner, FX_BSTR name, FX_BSTR default_value, FX_BOOL bInheritable, int subindex)
    407 {
    408     CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
    409     if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NAME) {
    410         return default_value;
    411     }
    412     return pAttr->GetString();
    413 }
    414 FX_ARGB	CPDF_StructElementImpl::GetColor(FX_BSTR owner, FX_BSTR name, FX_ARGB default_value, FX_BOOL bInheritable, int subindex)
    415 {
    416     CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
    417     if (pAttr == NULL || pAttr->GetType() != PDFOBJ_ARRAY) {
    418         return default_value;
    419     }
    420     CPDF_Array* pArray = (CPDF_Array*)pAttr;
    421     return 0xff000000 | ((int)(pArray->GetNumber(0) * 255) << 16) | ((int)(pArray->GetNumber(1) * 255) << 8) | (int)(pArray->GetNumber(2) * 255);
    422 }
    423 FX_FLOAT CPDF_StructElementImpl::GetNumber(FX_BSTR owner, FX_BSTR name, FX_FLOAT default_value, FX_BOOL bInheritable, int subindex)
    424 {
    425     CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
    426     if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
    427         return default_value;
    428     }
    429     return pAttr->GetNumber();
    430 }
    431 int	CPDF_StructElementImpl::GetInteger(FX_BSTR owner, FX_BSTR name, int default_value, FX_BOOL bInheritable, int subindex)
    432 {
    433     CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
    434     if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) {
    435         return default_value;
    436     }
    437     return pAttr->GetInteger();
    438 }
    439