1 // Copyright 2014 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "../../include/fpdfapi/fpdf_parser.h" 8 #include "../../include/fpdfapi/fpdf_page.h" 9 #include "../../include/fpdfdoc/fpdf_tagged.h" 10 #include "tagged_int.h" 11 const int nMaxRecursion = 32; 12 static FX_BOOL IsTagged(const CPDF_Document* pDoc) 13 { 14 CPDF_Dictionary* pCatalog = pDoc->GetRoot(); 15 CPDF_Dictionary* pMarkInfo = pCatalog->GetDict(FX_BSTRC("MarkInfo")); 16 return pMarkInfo != NULL && pMarkInfo->GetInteger(FX_BSTRC("Marked")); 17 } 18 CPDF_StructTree* CPDF_StructTree::LoadPage(const CPDF_Document* pDoc, const CPDF_Dictionary* pPageDict) 19 { 20 if (!IsTagged(pDoc)) { 21 return NULL; 22 } 23 CPDF_StructTreeImpl* pTree = FX_NEW CPDF_StructTreeImpl(pDoc); 24 if (pTree == NULL) { 25 return NULL; 26 } 27 pTree->LoadPageTree(pPageDict); 28 return pTree; 29 } 30 CPDF_StructTree* CPDF_StructTree::LoadDoc(const CPDF_Document* pDoc) 31 { 32 if (!IsTagged(pDoc)) { 33 return NULL; 34 } 35 CPDF_StructTreeImpl* pTree = FX_NEW CPDF_StructTreeImpl(pDoc); 36 if (pTree == NULL) { 37 return NULL; 38 } 39 pTree->LoadDocTree(); 40 return pTree; 41 } 42 CPDF_StructTreeImpl::CPDF_StructTreeImpl(const CPDF_Document* pDoc) 43 { 44 CPDF_Dictionary* pCatalog = pDoc->GetRoot(); 45 m_pTreeRoot = pCatalog->GetDict(FX_BSTRC("StructTreeRoot")); 46 if (m_pTreeRoot == NULL) { 47 return; 48 } 49 m_pRoleMap = m_pTreeRoot->GetDict(FX_BSTRC("RoleMap")); 50 } 51 CPDF_StructTreeImpl::~CPDF_StructTreeImpl() 52 { 53 for (int i = 0; i < m_Kids.GetSize(); i ++) 54 if (m_Kids[i]) { 55 m_Kids[i]->Release(); 56 } 57 } 58 void CPDF_StructTreeImpl::LoadDocTree() 59 { 60 m_pPage = NULL; 61 if (m_pTreeRoot == NULL) { 62 return; 63 } 64 CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K")); 65 if (pKids == NULL) { 66 return; 67 } 68 if (pKids->GetType() == PDFOBJ_DICTIONARY) { 69 CPDF_StructElementImpl* pStructElementImpl = FX_NEW CPDF_StructElementImpl(this, NULL, (CPDF_Dictionary*)pKids); 70 if (pStructElementImpl == NULL) { 71 return; 72 } 73 m_Kids.Add(pStructElementImpl); 74 return; 75 } 76 if (pKids->GetType() != PDFOBJ_ARRAY) { 77 return; 78 } 79 CPDF_Array* pArray = (CPDF_Array*)pKids; 80 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) { 81 CPDF_Dictionary* pKid = pArray->GetDict(i); 82 CPDF_StructElementImpl* pStructElementImpl = FX_NEW CPDF_StructElementImpl(this, NULL, pKid); 83 if (pStructElementImpl == NULL) { 84 return; 85 } 86 m_Kids.Add(pStructElementImpl); 87 } 88 } 89 void CPDF_StructTreeImpl::LoadPageTree(const CPDF_Dictionary* pPageDict) 90 { 91 m_pPage = pPageDict; 92 if (m_pTreeRoot == NULL) { 93 return; 94 } 95 CPDF_Object* pKids = m_pTreeRoot->GetElementValue(FX_BSTRC("K")); 96 if (pKids == NULL) { 97 return; 98 } 99 FX_DWORD dwKids = 0; 100 if (pKids->GetType() == PDFOBJ_DICTIONARY) { 101 dwKids = 1; 102 } else if (pKids->GetType() == PDFOBJ_ARRAY) { 103 dwKids = ((CPDF_Array*)pKids)->GetCount(); 104 } else { 105 return; 106 } 107 FX_DWORD i; 108 m_Kids.SetSize(dwKids); 109 for (i = 0; i < dwKids; i ++) { 110 m_Kids[i] = NULL; 111 } 112 CFX_MapPtrToPtr element_map; 113 CPDF_Dictionary* pParentTree = m_pTreeRoot->GetDict(FX_BSTRC("ParentTree")); 114 if (pParentTree == NULL) { 115 return; 116 } 117 CPDF_NumberTree parent_tree(pParentTree); 118 int parents_id = pPageDict->GetInteger(FX_BSTRC("StructParents"), -1); 119 if (parents_id >= 0) { 120 CPDF_Object* pParents = parent_tree.LookupValue(parents_id); 121 if (pParents == NULL || pParents->GetType() != PDFOBJ_ARRAY) { 122 return; 123 } 124 CPDF_Array* pParentArray = (CPDF_Array*)pParents; 125 for (i = 0; i < pParentArray->GetCount(); i ++) { 126 CPDF_Dictionary* pParent = pParentArray->GetDict(i); 127 if (pParent == NULL) { 128 continue; 129 } 130 AddPageNode(pParent, element_map); 131 } 132 } 133 } 134 CPDF_StructElementImpl* CPDF_StructTreeImpl::AddPageNode(CPDF_Dictionary* pDict, CFX_MapPtrToPtr& map, int nLevel) 135 { 136 if (nLevel > nMaxRecursion) { 137 return NULL; 138 } 139 CPDF_StructElementImpl* pElement = NULL; 140 if (map.Lookup(pDict, (FX_LPVOID&)pElement)) { 141 return pElement; 142 } 143 pElement = FX_NEW CPDF_StructElementImpl(this, NULL, pDict); 144 if (pElement == NULL) { 145 return NULL; 146 } 147 map.SetAt(pDict, pElement); 148 CPDF_Dictionary* pParent = pDict->GetDict(FX_BSTRC("P")); 149 if (pParent == NULL || pParent->GetString(FX_BSTRC("Type")) == FX_BSTRC("StructTreeRoot")) { 150 if (!AddTopLevelNode(pDict, pElement)) { 151 pElement->Release(); 152 map.RemoveKey(pDict); 153 } 154 } else { 155 CPDF_StructElementImpl* pParentElement = AddPageNode(pParent, map, nLevel + 1); 156 FX_BOOL bSave = FALSE; 157 for (int i = 0; i < pParentElement->m_Kids.GetSize(); i ++) { 158 if (pParentElement->m_Kids[i].m_Type != CPDF_StructKid::Element) { 159 continue; 160 } 161 if (pParentElement->m_Kids[i].m_Element.m_pDict != pDict) { 162 continue; 163 } 164 pParentElement->m_Kids[i].m_Element.m_pElement = pElement->Retain(); 165 bSave = TRUE; 166 } 167 if (!bSave) { 168 pElement->Release(); 169 map.RemoveKey(pDict); 170 } 171 } 172 return pElement; 173 } 174 FX_BOOL CPDF_StructTreeImpl::AddTopLevelNode(CPDF_Dictionary* pDict, CPDF_StructElementImpl* pElement) 175 { 176 CPDF_Object *pObj = m_pTreeRoot->GetElementValue(FX_BSTRC("K")); 177 if (!pObj) { 178 return FALSE; 179 } 180 if (pObj->GetType() == PDFOBJ_DICTIONARY) { 181 if (pObj->GetObjNum() == pDict->GetObjNum()) { 182 if (m_Kids[0]) { 183 m_Kids[0]->Release(); 184 } 185 m_Kids[0] = pElement->Retain(); 186 } else { 187 return FALSE; 188 } 189 } 190 if (pObj->GetType() == PDFOBJ_ARRAY) { 191 CPDF_Array* pTopKids = (CPDF_Array*)pObj; 192 FX_DWORD i; 193 FX_BOOL bSave = FALSE; 194 for (i = 0; i < pTopKids->GetCount(); i ++) { 195 CPDF_Reference* pKidRef = (CPDF_Reference*)pTopKids->GetElement(i); 196 if (pKidRef->GetType() != PDFOBJ_REFERENCE || pKidRef->GetRefObjNum() != pDict->GetObjNum()) { 197 continue; 198 } 199 if (m_Kids[i]) { 200 m_Kids[i]->Release(); 201 } 202 m_Kids[i] = pElement->Retain(); 203 bSave = TRUE; 204 } 205 if (!bSave) { 206 return FALSE; 207 } 208 } 209 return TRUE; 210 } 211 CPDF_StructElementImpl::CPDF_StructElementImpl(CPDF_StructTreeImpl* pTree, CPDF_StructElementImpl* pParent, CPDF_Dictionary* pDict) 212 : m_RefCount(0) 213 { 214 m_pTree = pTree; 215 m_pDict = pDict; 216 m_Type = pDict->GetString(FX_BSTRC("S")); 217 CFX_ByteString mapped = pTree->m_pRoleMap->GetString(m_Type); 218 if (!mapped.IsEmpty()) { 219 m_Type = mapped; 220 } 221 m_pParent = pParent; 222 LoadKids(pDict); 223 } 224 CPDF_StructElementImpl::~CPDF_StructElementImpl() 225 { 226 for (int i = 0; i < m_Kids.GetSize(); i ++) { 227 if (m_Kids[i].m_Type == CPDF_StructKid::Element && m_Kids[i].m_Element.m_pElement) { 228 ((CPDF_StructElementImpl*)m_Kids[i].m_Element.m_pElement)->Release(); 229 } 230 } 231 } 232 CPDF_StructElementImpl* CPDF_StructElementImpl::Retain() 233 { 234 m_RefCount++; 235 return this; 236 } 237 void CPDF_StructElementImpl::Release() 238 { 239 if(--m_RefCount < 1) { 240 delete this; 241 } 242 } 243 void CPDF_StructElementImpl::LoadKids(CPDF_Dictionary* pDict) 244 { 245 CPDF_Object* pObj = pDict->GetElement(FX_BSTRC("Pg")); 246 FX_DWORD PageObjNum = 0; 247 if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) { 248 PageObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum(); 249 } 250 CPDF_Object* pKids = pDict->GetElementValue(FX_BSTRC("K")); 251 if (pKids == NULL) { 252 return; 253 } 254 if (pKids->GetType() == PDFOBJ_ARRAY) { 255 CPDF_Array* pArray = (CPDF_Array*)pKids; 256 m_Kids.SetSize(pArray->GetCount()); 257 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) { 258 CPDF_Object* pKid = pArray->GetElementValue(i); 259 LoadKid(PageObjNum, pKid, &m_Kids[i]); 260 } 261 } else { 262 m_Kids.SetSize(1); 263 LoadKid(PageObjNum, pKids, &m_Kids[0]); 264 } 265 } 266 void CPDF_StructElementImpl::LoadKid(FX_DWORD PageObjNum, CPDF_Object* pKidObj, CPDF_StructKid* pKid) 267 { 268 pKid->m_Type = CPDF_StructKid::Invalid; 269 if (pKidObj == NULL) { 270 return; 271 } 272 if (pKidObj->GetType() == PDFOBJ_NUMBER) { 273 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { 274 return; 275 } 276 pKid->m_Type = CPDF_StructKid::PageContent; 277 pKid->m_PageContent.m_ContentId = pKidObj->GetInteger(); 278 pKid->m_PageContent.m_PageObjNum = PageObjNum; 279 return; 280 } 281 if (pKidObj->GetType() != PDFOBJ_DICTIONARY) { 282 return; 283 } 284 CPDF_Dictionary* pKidDict = (CPDF_Dictionary*)pKidObj; 285 CPDF_Object* pPageObj = pKidDict->GetElement(FX_BSTRC("Pg")); 286 if (pPageObj && pPageObj->GetType() == PDFOBJ_REFERENCE) { 287 PageObjNum = ((CPDF_Reference*)pPageObj)->GetRefObjNum(); 288 } 289 CFX_ByteString type = pKidDict->GetString(FX_BSTRC("Type")); 290 if (type == FX_BSTRC("MCR")) { 291 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { 292 return; 293 } 294 pKid->m_Type = CPDF_StructKid::StreamContent; 295 CPDF_Object* pStreamObj = pKidDict->GetElement(FX_BSTRC("Stm")); 296 if (pStreamObj && pStreamObj->GetType() == PDFOBJ_REFERENCE) { 297 pKid->m_StreamContent.m_RefObjNum = ((CPDF_Reference*)pStreamObj)->GetRefObjNum(); 298 } else { 299 pKid->m_StreamContent.m_RefObjNum = 0; 300 } 301 pKid->m_StreamContent.m_PageObjNum = PageObjNum; 302 pKid->m_StreamContent.m_ContentId = pKidDict->GetInteger(FX_BSTRC("MCID")); 303 } else if (type == FX_BSTRC("OBJR")) { 304 if (m_pTree->m_pPage && m_pTree->m_pPage->GetObjNum() != PageObjNum) { 305 return; 306 } 307 pKid->m_Type = CPDF_StructKid::Object; 308 CPDF_Object* pObj = pKidDict->GetElement(FX_BSTRC("Obj")); 309 if (pObj && pObj->GetType() == PDFOBJ_REFERENCE) { 310 pKid->m_Object.m_RefObjNum = ((CPDF_Reference*)pObj)->GetRefObjNum(); 311 } else { 312 pKid->m_Object.m_RefObjNum = 0; 313 } 314 pKid->m_Object.m_PageObjNum = PageObjNum; 315 } else { 316 pKid->m_Type = CPDF_StructKid::Element; 317 pKid->m_Element.m_pDict = pKidDict; 318 if (m_pTree->m_pPage == NULL) { 319 pKid->m_Element.m_pElement = FX_NEW CPDF_StructElementImpl(m_pTree, this, pKidDict); 320 } else { 321 pKid->m_Element.m_pElement = NULL; 322 } 323 } 324 } 325 static CPDF_Dictionary* FindAttrDict(CPDF_Object* pAttrs, FX_BSTR owner, FX_FLOAT nLevel = 0.0F) 326 { 327 if (nLevel > nMaxRecursion) { 328 return NULL; 329 } 330 if (pAttrs == NULL) { 331 return NULL; 332 } 333 CPDF_Dictionary* pDict = NULL; 334 if (pAttrs->GetType() == PDFOBJ_DICTIONARY) { 335 pDict = (CPDF_Dictionary*)pAttrs; 336 } else if (pAttrs->GetType() == PDFOBJ_STREAM) { 337 pDict = ((CPDF_Stream*)pAttrs)->GetDict(); 338 } else if (pAttrs->GetType() == PDFOBJ_ARRAY) { 339 CPDF_Array* pArray = (CPDF_Array*)pAttrs; 340 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) { 341 CPDF_Object* pElement = pArray->GetElementValue(i); 342 pDict = FindAttrDict(pElement, owner, nLevel + 1); 343 if (pDict) { 344 return pDict; 345 } 346 } 347 } 348 if (pDict && pDict->GetString(FX_BSTRC("O")) == owner) { 349 return pDict; 350 } 351 return NULL; 352 } 353 CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, FX_FLOAT fLevel) 354 { 355 if (fLevel > nMaxRecursion) { 356 return NULL; 357 } 358 if (bInheritable) { 359 CPDF_Object* pAttr = GetAttr(owner, name, FALSE); 360 if (pAttr) { 361 return pAttr; 362 } 363 if (m_pParent == NULL) { 364 return NULL; 365 } 366 return m_pParent->GetAttr(owner, name, TRUE, fLevel + 1); 367 } 368 CPDF_Object* pA = m_pDict->GetElementValue(FX_BSTRC("A")); 369 if (pA) { 370 CPDF_Dictionary* pAttrDict = FindAttrDict(pA, owner); 371 if (pAttrDict) { 372 CPDF_Object* pAttr = pAttrDict->GetElementValue(name); 373 if (pAttr) { 374 return pAttr; 375 } 376 } 377 } 378 CPDF_Object* pC = m_pDict->GetElementValue(FX_BSTRC("C")); 379 if (pC == NULL) { 380 return NULL; 381 } 382 CPDF_Dictionary* pClassMap = m_pTree->m_pTreeRoot->GetDict(FX_BSTRC("ClassMap")); 383 if (pClassMap == NULL) { 384 return NULL; 385 } 386 if (pC->GetType() == PDFOBJ_ARRAY) { 387 CPDF_Array* pArray = (CPDF_Array*)pC; 388 for (FX_DWORD i = 0; i < pArray->GetCount(); i ++) { 389 CFX_ByteString class_name = pArray->GetString(i); 390 CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name); 391 if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) { 392 return pClassDict->GetElementValue(name); 393 } 394 } 395 return NULL; 396 } 397 CFX_ByteString class_name = pC->GetString(); 398 CPDF_Dictionary* pClassDict = pClassMap->GetDict(class_name); 399 if (pClassDict && pClassDict->GetString(FX_BSTRC("O")) == owner) { 400 return pClassDict->GetElementValue(name); 401 } 402 return NULL; 403 } 404 CPDF_Object* CPDF_StructElementImpl::GetAttr(FX_BSTR owner, FX_BSTR name, FX_BOOL bInheritable, int subindex) 405 { 406 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable); 407 if (pAttr == NULL || subindex == -1 || pAttr->GetType() != PDFOBJ_ARRAY) { 408 return pAttr; 409 } 410 CPDF_Array* pArray = (CPDF_Array*)pAttr; 411 if (subindex >= (int)pArray->GetCount()) { 412 return pAttr; 413 } 414 return pArray->GetElementValue(subindex); 415 } 416 CFX_ByteString CPDF_StructElementImpl::GetName(FX_BSTR owner, FX_BSTR name, FX_BSTR default_value, FX_BOOL bInheritable, int subindex) 417 { 418 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); 419 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NAME) { 420 return default_value; 421 } 422 return pAttr->GetString(); 423 } 424 FX_ARGB CPDF_StructElementImpl::GetColor(FX_BSTR owner, FX_BSTR name, FX_ARGB default_value, FX_BOOL bInheritable, int subindex) 425 { 426 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); 427 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_ARRAY) { 428 return default_value; 429 } 430 CPDF_Array* pArray = (CPDF_Array*)pAttr; 431 return 0xff000000 | ((int)(pArray->GetNumber(0) * 255) << 16) | ((int)(pArray->GetNumber(1) * 255) << 8) | (int)(pArray->GetNumber(2) * 255); 432 } 433 FX_FLOAT CPDF_StructElementImpl::GetNumber(FX_BSTR owner, FX_BSTR name, FX_FLOAT default_value, FX_BOOL bInheritable, int subindex) 434 { 435 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); 436 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) { 437 return default_value; 438 } 439 return pAttr->GetNumber(); 440 } 441 int CPDF_StructElementImpl::GetInteger(FX_BSTR owner, FX_BSTR name, int default_value, FX_BOOL bInheritable, int subindex) 442 { 443 CPDF_Object* pAttr = GetAttr(owner, name, bInheritable, subindex); 444 if (pAttr == NULL || pAttr->GetType() != PDFOBJ_NUMBER) { 445 return default_value; 446 } 447 return pAttr->GetInteger(); 448 } 449