Home | History | Annotate | Download | only in fpdfdoc
      1 // Copyright 2014 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include <map>
      8 #include <memory>
      9 #include <utility>
     10 
     11 #include "core/fpdfapi/parser/cpdf_array.h"
     12 #include "core/fpdfapi/parser/cpdf_dictionary.h"
     13 #include "core/fpdfapi/parser/cpdf_document.h"
     14 #include "core/fpdfapi/parser/cpdf_name.h"
     15 #include "core/fpdfapi/parser/cpdf_number.h"
     16 #include "core/fpdfapi/parser/cpdf_reference.h"
     17 #include "core/fpdfapi/parser/cpdf_stream.h"
     18 #include "core/fpdfdoc/cpdf_numbertree.h"
     19 #include "core/fpdfdoc/fpdf_tagged.h"
     20 #include "core/fpdfdoc/tagged_int.h"
     21 #include "third_party/base/ptr_util.h"
     22 
     23 namespace {
     24 
     25 const int nMaxRecursion = 32;
     26 
     27 bool IsTagged(const CPDF_Document* pDoc) {
     28   CPDF_Dictionary* pCatalog = pDoc->GetRoot();
     29   CPDF_Dictionary* pMarkInfo = pCatalog->GetDictFor("MarkInfo");
     30   return pMarkInfo && pMarkInfo->GetIntegerFor("Marked");
     31 }
     32 
     33 }  // namespace
     34 
     35 CPDF_StructKid::CPDF_StructKid()
     36     : m_Type(Invalid),
     37       m_pDict(nullptr),
     38       m_PageObjNum(0),
     39       m_RefObjNum(0),
     40       m_ContentId(0) {}
     41 
     42 CPDF_StructKid::CPDF_StructKid(const CPDF_StructKid& that) = default;
     43 
     44 CPDF_StructKid::~CPDF_StructKid() {}
     45 
     46 // static
     47 std::unique_ptr<IPDF_StructTree> IPDF_StructTree::LoadPage(
     48     const CPDF_Document* pDoc,
     49     const CPDF_Dictionary* pPageDict) {
     50   if (!IsTagged(pDoc))
     51     return nullptr;
     52 
     53   auto pTree = pdfium::MakeUnique<CPDF_StructTree>(pDoc);
     54   pTree->LoadPageTree(pPageDict);
     55   return std::move(pTree);
     56 }
     57 
     58 CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc)
     59     : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")),
     60       m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr),
     61       m_pPage(nullptr) {}
     62 
     63 CPDF_StructTree::~CPDF_StructTree() {}
     64 
     65 int CPDF_StructTree::CountTopElements() const {
     66   return pdfium::CollectionSize<int>(m_Kids);
     67 }
     68 
     69 IPDF_StructElement* CPDF_StructTree::GetTopElement(int i) const {
     70   return m_Kids[i].Get();
     71 }
     72 
     73 void CPDF_StructTree::LoadPageTree(const CPDF_Dictionary* pPageDict) {
     74   m_pPage = pPageDict;
     75   if (!m_pTreeRoot)
     76     return;
     77 
     78   CPDF_Object* pKids = m_pTreeRoot->GetDirectObjectFor("K");
     79   if (!pKids)
     80     return;
     81 
     82   uint32_t dwKids = 0;
     83   if (pKids->IsDictionary())
     84     dwKids = 1;
     85   else if (CPDF_Array* pArray = pKids->AsArray())
     86     dwKids = pArray->GetCount();
     87   else
     88     return;
     89 
     90   m_Kids.clear();
     91   m_Kids.resize(dwKids);
     92   CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDictFor("ParentTree");
     93   if (!pParentTree)
     94     return;
     95 
     96   CPDF_NumberTree parent_tree(pParentTree);
     97   int parents_id = pPageDict->GetIntegerFor("StructParents", -1);
     98   if (parents_id < 0)
     99     return;
    100 
    101   CPDF_Array* pParentArray = ToArray(parent_tree.LookupValue(parents_id));
    102   if (!pParentArray)
    103     return;
    104 
    105   std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>> element_map;
    106   for (size_t i = 0; i < pParentArray->GetCount(); i++) {
    107     if (CPDF_Dictionary* pParent = pParentArray->GetDictAt(i))
    108       AddPageNode(pParent, &element_map);
    109   }
    110 }
    111 
    112 CFX_RetainPtr<CPDF_StructElement> CPDF_StructTree::AddPageNode(
    113     CPDF_Dictionary* pDict,
    114     std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>>* map,
    115     int nLevel) {
    116   if (nLevel > nMaxRecursion)
    117     return nullptr;
    118 
    119   auto it = map->find(pDict);
    120   if (it != map->end())
    121     return it->second;
    122 
    123   auto pElement = pdfium::MakeRetain<CPDF_StructElement>(this, nullptr, pDict);
    124   (*map)[pDict] = pElement;
    125   CPDF_Dictionary* pParent = pDict->GetDictFor("P");
    126   if (!pParent || pParent->GetStringFor("Type") == "StructTreeRoot") {
    127     if (!AddTopLevelNode(pDict, pElement))
    128       map->erase(pDict);
    129     return pElement;
    130   }
    131 
    132   CFX_RetainPtr<CPDF_StructElement> pParentElement =
    133       AddPageNode(pParent, map, nLevel + 1);
    134   bool bSave = false;
    135   for (CPDF_StructKid& kid : *pParentElement->GetKids()) {
    136     if (kid.m_Type == CPDF_StructKid::Element && kid.m_pDict == pDict) {
    137       kid.m_pElement = pElement;
    138       bSave = true;
    139     }
    140   }
    141   if (!bSave)
    142     map->erase(pDict);
    143   return pElement;
    144 }
    145 
    146 bool CPDF_StructTree::AddTopLevelNode(
    147     CPDF_Dictionary* pDict,
    148     const CFX_RetainPtr<CPDF_StructElement>& pElement) {
    149   CPDF_Object* pObj = m_pTreeRoot->GetDirectObjectFor("K");
    150   if (!pObj)
    151     return false;
    152 
    153   if (pObj->IsDictionary()) {
    154     if (pObj->GetObjNum() != pDict->GetObjNum())
    155       return false;
    156     m_Kids[0] = pElement;
    157   }
    158   if (CPDF_Array* pTopKids = pObj->AsArray()) {
    159     bool bSave = false;
    160     for (size_t i = 0; i < pTopKids->GetCount(); i++) {
    161       CPDF_Reference* pKidRef = ToReference(pTopKids->GetObjectAt(i));
    162       if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) {
    163         m_Kids[i] = pElement;
    164         bSave = true;
    165       }
    166     }
    167     if (!bSave)
    168       return false;
    169   }
    170   return true;
    171 }
    172 
    173 CPDF_StructElement::CPDF_StructElement(CPDF_StructTree* pTree,
    174                                        CPDF_StructElement* pParent,
    175                                        CPDF_Dictionary* pDict)
    176     : m_pTree(pTree),
    177       m_pParent(pParent),
    178       m_pDict(pDict),
    179       m_Type(pDict->GetStringFor("S")) {
    180   if (pTree->m_pRoleMap) {
    181     CFX_ByteString mapped = pTree->m_pRoleMap->GetStringFor(m_Type);
    182     if (!mapped.IsEmpty())
    183       m_Type = mapped;
    184   }
    185   LoadKids(pDict);
    186 }
    187 
    188 IPDF_StructTree* CPDF_StructElement::GetTree() const {
    189   return m_pTree;
    190 }
    191 
    192 const CFX_ByteString& CPDF_StructElement::GetType() const {
    193   return m_Type;
    194 }
    195 
    196 IPDF_StructElement* CPDF_StructElement::GetParent() const {
    197   return m_pParent;
    198 }
    199 
    200 CPDF_Dictionary* CPDF_StructElement::GetDict() const {
    201   return m_pDict;
    202 }
    203 
    204 int CPDF_StructElement::CountKids() const {
    205   return pdfium::CollectionSize<int>(m_Kids);
    206 }
    207 
    208 IPDF_StructElement* CPDF_StructElement::GetKidIfElement(int index) const {
    209   if (m_Kids[index].m_Type != CPDF_StructKid::Element)
    210     return nullptr;
    211 
    212   return m_Kids[index].m_pElement.Get();
    213 }
    214 
    215 CPDF_StructElement::~CPDF_StructElement() {}
    216 
    217 void CPDF_StructElement::LoadKids(CPDF_Dictionary* pDict) {
    218   CPDF_Object* pObj = pDict->GetObjectFor("Pg");
    219   uint32_t PageObjNum = 0;
    220   if (CPDF_Reference* pRef = ToReference(pObj))
    221     PageObjNum = pRef->GetRefObjNum();
    222 
    223   CPDF_Object* pKids = pDict->GetDirectObjectFor("K");
    224   if (!pKids)
    225     return;
    226 
    227   m_Kids.clear();
    228   if (CPDF_Array* pArray = pKids->AsArray()) {
    229     m_Kids.resize(pArray->GetCount());
    230     for (uint32_t i = 0; i < pArray->GetCount(); i++) {
    231       CPDF_Object* pKid = pArray->GetDirectObjectAt(i);
    232       LoadKid(PageObjNum, pKid, &m_Kids[i]);
    233     }
    234   } else {
    235     m_Kids.resize(1);
    236     LoadKid(PageObjNum, pKids, &m_Kids[0]);
    237   }
    238 }
    239 void CPDF_StructElement::LoadKid(uint32_t PageObjNum,
    240                                  CPDF_Object* pKidObj,
    241                                  CPDF_StructKid* pKid) {
    242   pKid->m_Type = CPDF_StructKid::Invalid;
    243   if (!pKidObj)
    244     return;
    245 
    246   if (pKidObj->IsNumber()) {
    247     if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
    248       return;
    249     }
    250     pKid->m_Type = CPDF_StructKid::PageContent;
    251     pKid->m_ContentId = pKidObj->GetInteger();
    252     pKid->m_PageObjNum = PageObjNum;
    253     return;
    254   }
    255 
    256   CPDF_Dictionary* pKidDict = pKidObj->AsDictionary();
    257   if (!pKidDict)
    258     return;
    259 
    260   if (CPDF_Reference* pRef = ToReference(pKidDict->GetObjectFor("Pg")))
    261     PageObjNum = pRef->GetRefObjNum();
    262 
    263   CFX_ByteString type = pKidDict->GetStringFor("Type");
    264   if (type == "MCR") {
    265     if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
    266       return;
    267     }
    268     pKid->m_Type = CPDF_StructKid::StreamContent;
    269     CPDF_Reference* pRef = ToReference(pKidDict->GetObjectFor("Stm"));
    270     pKid->m_RefObjNum = pRef ? pRef->GetRefObjNum() : 0;
    271     pKid->m_PageObjNum = PageObjNum;
    272     pKid->m_ContentId = pKidDict->GetIntegerFor("MCID");
    273   } else if (type == "OBJR") {
    274     if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) {
    275       return;
    276     }
    277     pKid->m_Type = CPDF_StructKid::Object;
    278     CPDF_Reference* pObj = ToReference(pKidDict->GetObjectFor("Obj"));
    279     pKid->m_RefObjNum = pObj ? pObj->GetRefObjNum() : 0;
    280     pKid->m_PageObjNum = PageObjNum;
    281   } else {
    282     pKid->m_Type = CPDF_StructKid::Element;
    283     pKid->m_pDict = pKidDict;
    284     if (!m_pTree->m_pPage) {
    285       pKid->m_pElement =
    286           pdfium::MakeRetain<CPDF_StructElement>(m_pTree, this, pKidDict);
    287     } else {
    288       pKid->m_pElement = nullptr;
    289     }
    290   }
    291 }
    292 static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs,
    293                                      const CFX_ByteStringC& owner,
    294                                      FX_FLOAT nLevel = 0.0F) {
    295   if (nLevel > nMaxRecursion)
    296     return nullptr;
    297   if (!pAttrs)
    298     return nullptr;
    299 
    300   CPDF_Dictionary* pDict = nullptr;
    301   if (pAttrs->IsDictionary()) {
    302     pDict = pAttrs->AsDictionary();
    303   } else if (CPDF_Stream* pStream = pAttrs->AsStream()) {
    304     pDict = pStream->GetDict();
    305   } else if (CPDF_Array* pArray = pAttrs->AsArray()) {
    306     for (uint32_t i = 0; i < pArray->GetCount(); i++) {
    307       CPDF_Object* pElement = pArray->GetDirectObjectAt(i);
    308       pDict = FindAttrDict(pElement, owner, nLevel + 1);
    309       if (pDict)
    310         return pDict;
    311     }
    312   }
    313   if (pDict && pDict->GetStringFor("O") == owner)
    314     return pDict;
    315   return nullptr;
    316 }
    317 CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner,
    318                                          const CFX_ByteStringC& name,
    319                                          bool bInheritable,
    320                                          FX_FLOAT fLevel) {
    321   if (fLevel > nMaxRecursion) {
    322     return nullptr;
    323   }
    324   if (bInheritable) {
    325     CPDF_Object* pAttr = GetAttr(owner, name, false);
    326     if (pAttr) {
    327       return pAttr;
    328     }
    329     if (!m_pParent) {
    330       return nullptr;
    331     }
    332     return m_pParent->GetAttr(owner, name, true, fLevel + 1);
    333   }
    334   CPDF_Object* pA = m_pDict->GetDirectObjectFor("A");
    335   if (pA) {
    336     CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner);
    337     if (pAttrDict) {
    338       CPDF_Object* pAttr = pAttrDict->GetDirectObjectFor(CFX_ByteString(name));
    339       if (pAttr) {
    340         return pAttr;
    341       }
    342     }
    343   }
    344   CPDF_Object* pC = m_pDict->GetDirectObjectFor("C");
    345   if (!pC)
    346     return nullptr;
    347 
    348   CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDictFor("ClassMap");
    349   if (!pClassMap)
    350     return nullptr;
    351 
    352   if (CPDF_Array* pArray = pC->AsArray()) {
    353     for (uint32_t i = 0; i < pArray->GetCount(); i++) {
    354       CFX_ByteString class_name = pArray->GetStringAt(i);
    355       CPDF_Dictionary* pClassDict = pClassMap->GetDictFor(class_name);
    356       if (pClassDict && pClassDict->GetStringFor("O") == owner)
    357         return pClassDict->GetDirectObjectFor(CFX_ByteString(name));
    358     }
    359     return nullptr;
    360   }
    361   CFX_ByteString class_name = pC->GetString();
    362   CPDF_Dictionary* pClassDict = pClassMap->GetDictFor(class_name);
    363   if (pClassDict && pClassDict->GetStringFor("O") == owner)
    364     return pClassDict->GetDirectObjectFor(CFX_ByteString(name));
    365   return nullptr;
    366 }
    367 CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner,
    368                                          const CFX_ByteStringC& name,
    369                                          bool bInheritable,
    370                                          int subindex) {
    371   CPDF_Object* pAttr = GetAttr(owner, name, bInheritable);
    372   CPDF_Array* pArray = ToArray(pAttr);
    373   if (!pArray || subindex == -1)
    374     return pAttr;
    375 
    376   if (subindex >= static_cast<int>(pArray->GetCount()))
    377     return pAttr;
    378   return pArray->GetDirectObjectAt(subindex);
    379 }
    380 CFX_ByteString CPDF_StructElement::GetName(const CFX_ByteStringC& owner,
    381                                            const CFX_ByteStringC& name,
    382                                            const CFX_ByteStringC& default_value,
    383                                            bool bInheritable,
    384                                            int subindex) {
    385   CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
    386   if (ToName(pAttr))
    387     return pAttr->GetString();
    388   return CFX_ByteString(default_value);
    389 }
    390 
    391 FX_ARGB CPDF_StructElement::GetColor(const CFX_ByteStringC& owner,
    392                                      const CFX_ByteStringC& name,
    393                                      FX_ARGB default_value,
    394                                      bool bInheritable,
    395                                      int subindex) {
    396   CPDF_Array* pArray = ToArray(GetAttr(owner, name, bInheritable, subindex));
    397   if (!pArray)
    398     return default_value;
    399   return 0xff000000 | ((int)(pArray->GetNumberAt(0) * 255) << 16) |
    400          ((int)(pArray->GetNumberAt(1) * 255) << 8) |
    401          (int)(pArray->GetNumberAt(2) * 255);
    402 }
    403 FX_FLOAT CPDF_StructElement::GetNumber(const CFX_ByteStringC& owner,
    404                                        const CFX_ByteStringC& name,
    405                                        FX_FLOAT default_value,
    406                                        bool bInheritable,
    407                                        int subindex) {
    408   CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
    409   return ToNumber(pAttr) ? pAttr->GetNumber() : default_value;
    410 }
    411 int CPDF_StructElement::GetInteger(const CFX_ByteStringC& owner,
    412                                    const CFX_ByteStringC& name,
    413                                    int default_value,
    414                                    bool bInheritable,
    415                                    int subindex) {
    416   CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex);
    417   return ToNumber(pAttr) ? pAttr->GetInteger() : default_value;
    418 }
    419