1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include <map> 8 #include <memory> 9 #include <utility> 10 11 #include "core/fpdfapi/parser/cpdf_array.h" 12 #include "core/fpdfapi/parser/cpdf_dictionary.h" 13 #include "core/fpdfapi/parser/cpdf_document.h" 14 #include "core/fpdfapi/parser/cpdf_name.h" 15 #include "core/fpdfapi/parser/cpdf_number.h" 16 #include "core/fpdfapi/parser/cpdf_reference.h" 17 #include "core/fpdfapi/parser/cpdf_stream.h" 18 #include "core/fpdfdoc/cpdf_numbertree.h" 19 #include "core/fpdfdoc/fpdf_tagged.h" 20 #include "core/fpdfdoc/tagged_int.h" 21 #include "third_party/base/ptr_util.h" 22 23 namespace { 24 25 const int nMaxRecursion = 32; 26 27 bool IsTagged(const CPDF_Document* pDoc) { 28 CPDF_Dictionary* pCatalog = pDoc->GetRoot(); 29 CPDF_Dictionary* pMarkInfo = pCatalog->GetDictFor("MarkInfo"); 30 return pMarkInfo && pMarkInfo->GetIntegerFor("Marked"); 31 } 32 33 } // namespace 34 35 CPDF_StructKid::CPDF_StructKid() 36 : m_Type(Invalid), 37 m_pDict(nullptr), 38 m_PageObjNum(0), 39 m_RefObjNum(0), 40 m_ContentId(0) {} 41 42 CPDF_StructKid::CPDF_StructKid(const CPDF_StructKid& that) = default; 43 44 CPDF_StructKid::~CPDF_StructKid() {} 45 46 // static 47 std::unique_ptr<IPDF_StructTree> IPDF_StructTree::LoadPage( 48 const CPDF_Document* pDoc, 49 const CPDF_Dictionary* pPageDict) { 50 if (!IsTagged(pDoc)) 51 return nullptr; 52 53 auto pTree = pdfium::MakeUnique<CPDF_StructTree>(pDoc); 54 pTree->LoadPageTree(pPageDict); 55 return std::move(pTree); 56 } 57 58 CPDF_StructTree::CPDF_StructTree(const CPDF_Document* pDoc) 59 : m_pTreeRoot(pDoc->GetRoot()->GetDictFor("StructTreeRoot")), 60 m_pRoleMap(m_pTreeRoot ? m_pTreeRoot->GetDictFor("RoleMap") : nullptr), 61 m_pPage(nullptr) {} 62 63 CPDF_StructTree::~CPDF_StructTree() {} 64 65 int CPDF_StructTree::CountTopElements() const { 66 return pdfium::CollectionSize<int>(m_Kids); 67 } 68 69 IPDF_StructElement* CPDF_StructTree::GetTopElement(int i) const { 70 return m_Kids[i].Get(); 71 } 72 73 void CPDF_StructTree::LoadPageTree(const CPDF_Dictionary* pPageDict) { 74 m_pPage = pPageDict; 75 if (!m_pTreeRoot) 76 return; 77 78 CPDF_Object* pKids = m_pTreeRoot->GetDirectObjectFor("K"); 79 if (!pKids) 80 return; 81 82 uint32_t dwKids = 0; 83 if (pKids->IsDictionary()) 84 dwKids = 1; 85 else if (CPDF_Array* pArray = pKids->AsArray()) 86 dwKids = pArray->GetCount(); 87 else 88 return; 89 90 m_Kids.clear(); 91 m_Kids.resize(dwKids); 92 CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDictFor("ParentTree"); 93 if (!pParentTree) 94 return; 95 96 CPDF_NumberTree parent_tree(pParentTree); 97 int parents_id = pPageDict->GetIntegerFor("StructParents", -1); 98 if (parents_id < 0) 99 return; 100 101 CPDF_Array* pParentArray = ToArray(parent_tree.LookupValue(parents_id)); 102 if (!pParentArray) 103 return; 104 105 std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>> element_map; 106 for (size_t i = 0; i < pParentArray->GetCount(); i++) { 107 if (CPDF_Dictionary* pParent = pParentArray->GetDictAt(i)) 108 AddPageNode(pParent, &element_map); 109 } 110 } 111 112 CFX_RetainPtr<CPDF_StructElement> CPDF_StructTree::AddPageNode( 113 CPDF_Dictionary* pDict, 114 std::map<CPDF_Dictionary*, CFX_RetainPtr<CPDF_StructElement>>* map, 115 int nLevel) { 116 if (nLevel > nMaxRecursion) 117 return nullptr; 118 119 auto it = map->find(pDict); 120 if (it != map->end()) 121 return it->second; 122 123 auto pElement = pdfium::MakeRetain<CPDF_StructElement>(this, nullptr, pDict); 124 (*map)[pDict] = pElement; 125 CPDF_Dictionary* pParent = pDict->GetDictFor("P"); 126 if (!pParent || pParent->GetStringFor("Type") == "StructTreeRoot") { 127 if (!AddTopLevelNode(pDict, pElement)) 128 map->erase(pDict); 129 return pElement; 130 } 131 132 CFX_RetainPtr<CPDF_StructElement> pParentElement = 133 AddPageNode(pParent, map, nLevel + 1); 134 bool bSave = false; 135 for (CPDF_StructKid& kid : *pParentElement->GetKids()) { 136 if (kid.m_Type == CPDF_StructKid::Element && kid.m_pDict == pDict) { 137 kid.m_pElement = pElement; 138 bSave = true; 139 } 140 } 141 if (!bSave) 142 map->erase(pDict); 143 return pElement; 144 } 145 146 bool CPDF_StructTree::AddTopLevelNode( 147 CPDF_Dictionary* pDict, 148 const CFX_RetainPtr<CPDF_StructElement>& pElement) { 149 CPDF_Object* pObj = m_pTreeRoot->GetDirectObjectFor("K"); 150 if (!pObj) 151 return false; 152 153 if (pObj->IsDictionary()) { 154 if (pObj->GetObjNum() != pDict->GetObjNum()) 155 return false; 156 m_Kids[0] = pElement; 157 } 158 if (CPDF_Array* pTopKids = pObj->AsArray()) { 159 bool bSave = false; 160 for (size_t i = 0; i < pTopKids->GetCount(); i++) { 161 CPDF_Reference* pKidRef = ToReference(pTopKids->GetObjectAt(i)); 162 if (pKidRef && pKidRef->GetRefObjNum() == pDict->GetObjNum()) { 163 m_Kids[i] = pElement; 164 bSave = true; 165 } 166 } 167 if (!bSave) 168 return false; 169 } 170 return true; 171 } 172 173 CPDF_StructElement::CPDF_StructElement(CPDF_StructTree* pTree, 174 CPDF_StructElement* pParent, 175 CPDF_Dictionary* pDict) 176 : m_pTree(pTree), 177 m_pParent(pParent), 178 m_pDict(pDict), 179 m_Type(pDict->GetStringFor("S")) { 180 if (pTree->m_pRoleMap) { 181 CFX_ByteString mapped = pTree->m_pRoleMap->GetStringFor(m_Type); 182 if (!mapped.IsEmpty()) 183 m_Type = mapped; 184 } 185 LoadKids(pDict); 186 } 187 188 IPDF_StructTree* CPDF_StructElement::GetTree() const { 189 return m_pTree; 190 } 191 192 const CFX_ByteString& CPDF_StructElement::GetType() const { 193 return m_Type; 194 } 195 196 IPDF_StructElement* CPDF_StructElement::GetParent() const { 197 return m_pParent; 198 } 199 200 CPDF_Dictionary* CPDF_StructElement::GetDict() const { 201 return m_pDict; 202 } 203 204 int CPDF_StructElement::CountKids() const { 205 return pdfium::CollectionSize<int>(m_Kids); 206 } 207 208 IPDF_StructElement* CPDF_StructElement::GetKidIfElement(int index) const { 209 if (m_Kids[index].m_Type != CPDF_StructKid::Element) 210 return nullptr; 211 212 return m_Kids[index].m_pElement.Get(); 213 } 214 215 CPDF_StructElement::~CPDF_StructElement() {} 216 217 void CPDF_StructElement::LoadKids(CPDF_Dictionary* pDict) { 218 CPDF_Object* pObj = pDict->GetObjectFor("Pg"); 219 uint32_t PageObjNum = 0; 220 if (CPDF_Reference* pRef = ToReference(pObj)) 221 PageObjNum = pRef->GetRefObjNum(); 222 223 CPDF_Object* pKids = pDict->GetDirectObjectFor("K"); 224 if (!pKids) 225 return; 226 227 m_Kids.clear(); 228 if (CPDF_Array* pArray = pKids->AsArray()) { 229 m_Kids.resize(pArray->GetCount()); 230 for (uint32_t i = 0; i < pArray->GetCount(); i++) { 231 CPDF_Object* pKid = pArray->GetDirectObjectAt(i); 232 LoadKid(PageObjNum, pKid, &m_Kids[i]); 233 } 234 } else { 235 m_Kids.resize(1); 236 LoadKid(PageObjNum, pKids, &m_Kids[0]); 237 } 238 } 239 void CPDF_StructElement::LoadKid(uint32_t PageObjNum, 240 CPDF_Object* pKidObj, 241 CPDF_StructKid* pKid) { 242 pKid->m_Type = CPDF_StructKid::Invalid; 243 if (!pKidObj) 244 return; 245 246 if (pKidObj->IsNumber()) { 247 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { 248 return; 249 } 250 pKid->m_Type = CPDF_StructKid::PageContent; 251 pKid->m_ContentId = pKidObj->GetInteger(); 252 pKid->m_PageObjNum = PageObjNum; 253 return; 254 } 255 256 CPDF_Dictionary* pKidDict = pKidObj->AsDictionary(); 257 if (!pKidDict) 258 return; 259 260 if (CPDF_Reference* pRef = ToReference(pKidDict->GetObjectFor("Pg"))) 261 PageObjNum = pRef->GetRefObjNum(); 262 263 CFX_ByteString type = pKidDict->GetStringFor("Type"); 264 if (type == "MCR") { 265 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { 266 return; 267 } 268 pKid->m_Type = CPDF_StructKid::StreamContent; 269 CPDF_Reference* pRef = ToReference(pKidDict->GetObjectFor("Stm")); 270 pKid->m_RefObjNum = pRef ? pRef->GetRefObjNum() : 0; 271 pKid->m_PageObjNum = PageObjNum; 272 pKid->m_ContentId = pKidDict->GetIntegerFor("MCID"); 273 } else if (type == "OBJR") { 274 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { 275 return; 276 } 277 pKid->m_Type = CPDF_StructKid::Object; 278 CPDF_Reference* pObj = ToReference(pKidDict->GetObjectFor("Obj")); 279 pKid->m_RefObjNum = pObj ? pObj->GetRefObjNum() : 0; 280 pKid->m_PageObjNum = PageObjNum; 281 } else { 282 pKid->m_Type = CPDF_StructKid::Element; 283 pKid->m_pDict = pKidDict; 284 if (!m_pTree->m_pPage) { 285 pKid->m_pElement = 286 pdfium::MakeRetain<CPDF_StructElement>(m_pTree, this, pKidDict); 287 } else { 288 pKid->m_pElement = nullptr; 289 } 290 } 291 } 292 static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs, 293 const CFX_ByteStringC& owner, 294 FX_FLOAT nLevel = 0.0F) { 295 if (nLevel > nMaxRecursion) 296 return nullptr; 297 if (!pAttrs) 298 return nullptr; 299 300 CPDF_Dictionary* pDict = nullptr; 301 if (pAttrs->IsDictionary()) { 302 pDict = pAttrs->AsDictionary(); 303 } else if (CPDF_Stream* pStream = pAttrs->AsStream()) { 304 pDict = pStream->GetDict(); 305 } else if (CPDF_Array* pArray = pAttrs->AsArray()) { 306 for (uint32_t i = 0; i < pArray->GetCount(); i++) { 307 CPDF_Object* pElement = pArray->GetDirectObjectAt(i); 308 pDict = FindAttrDict(pElement, owner, nLevel + 1); 309 if (pDict) 310 return pDict; 311 } 312 } 313 if (pDict && pDict->GetStringFor("O") == owner) 314 return pDict; 315 return nullptr; 316 } 317 CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner, 318 const CFX_ByteStringC& name, 319 bool bInheritable, 320 FX_FLOAT fLevel) { 321 if (fLevel > nMaxRecursion) { 322 return nullptr; 323 } 324 if (bInheritable) { 325 CPDF_Object* pAttr = GetAttr(owner, name, false); 326 if (pAttr) { 327 return pAttr; 328 } 329 if (!m_pParent) { 330 return nullptr; 331 } 332 return m_pParent->GetAttr(owner, name, true, fLevel + 1); 333 } 334 CPDF_Object* pA = m_pDict->GetDirectObjectFor("A"); 335 if (pA) { 336 CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner); 337 if (pAttrDict) { 338 CPDF_Object* pAttr = pAttrDict->GetDirectObjectFor(CFX_ByteString(name)); 339 if (pAttr) { 340 return pAttr; 341 } 342 } 343 } 344 CPDF_Object* pC = m_pDict->GetDirectObjectFor("C"); 345 if (!pC) 346 return nullptr; 347 348 CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDictFor("ClassMap"); 349 if (!pClassMap) 350 return nullptr; 351 352 if (CPDF_Array* pArray = pC->AsArray()) { 353 for (uint32_t i = 0; i < pArray->GetCount(); i++) { 354 CFX_ByteString class_name = pArray->GetStringAt(i); 355 CPDF_Dictionary* pClassDict = pClassMap->GetDictFor(class_name); 356 if (pClassDict && pClassDict->GetStringFor("O") == owner) 357 return pClassDict->GetDirectObjectFor(CFX_ByteString(name)); 358 } 359 return nullptr; 360 } 361 CFX_ByteString class_name = pC->GetString(); 362 CPDF_Dictionary* pClassDict = pClassMap->GetDictFor(class_name); 363 if (pClassDict && pClassDict->GetStringFor("O") == owner) 364 return pClassDict->GetDirectObjectFor(CFX_ByteString(name)); 365 return nullptr; 366 } 367 CPDF_Object* CPDF_StructElement::GetAttr(const CFX_ByteStringC& owner, 368 const CFX_ByteStringC& name, 369 bool bInheritable, 370 int subindex) { 371 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable); 372 CPDF_Array* pArray = ToArray(pAttr); 373 if (!pArray || subindex == -1) 374 return pAttr; 375 376 if (subindex >= static_cast<int>(pArray->GetCount())) 377 return pAttr; 378 return pArray->GetDirectObjectAt(subindex); 379 } 380 CFX_ByteString CPDF_StructElement::GetName(const CFX_ByteStringC& owner, 381 const CFX_ByteStringC& name, 382 const CFX_ByteStringC& default_value, 383 bool bInheritable, 384 int subindex) { 385 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); 386 if (ToName(pAttr)) 387 return pAttr->GetString(); 388 return CFX_ByteString(default_value); 389 } 390 391 FX_ARGB CPDF_StructElement::GetColor(const CFX_ByteStringC& owner, 392 const CFX_ByteStringC& name, 393 FX_ARGB default_value, 394 bool bInheritable, 395 int subindex) { 396 CPDF_Array* pArray = ToArray(GetAttr(owner, name, bInheritable, subindex)); 397 if (!pArray) 398 return default_value; 399 return 0xff000000 | ((int)(pArray->GetNumberAt(0) * 255) << 16) | 400 ((int)(pArray->GetNumberAt(1) * 255) << 8) | 401 (int)(pArray->GetNumberAt(2) * 255); 402 } 403 FX_FLOAT CPDF_StructElement::GetNumber(const CFX_ByteStringC& owner, 404 const CFX_ByteStringC& name, 405 FX_FLOAT default_value, 406 bool bInheritable, 407 int subindex) { 408 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); 409 return ToNumber(pAttr) ? pAttr->GetNumber() : default_value; 410 } 411 int CPDF_StructElement::GetInteger(const CFX_ByteStringC& owner, 412 const CFX_ByteStringC& name, 413 int default_value, 414 bool bInheritable, 415 int subindex) { 416 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); 417 return ToNumber(pAttr) ? pAttr->GetInteger() : default_value; 418 } 419