1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfdoc/cpdf_nametree.h" 8 9 #include <utility> 10 #include <vector> 11 12 #include "core/fpdfapi/parser/cpdf_array.h" 13 #include "core/fpdfapi/parser/cpdf_dictionary.h" 14 #include "core/fpdfapi/parser/cpdf_document.h" 15 #include "core/fpdfapi/parser/cpdf_string.h" 16 #include "core/fpdfapi/parser/fpdf_parser_decode.h" 17 18 namespace { 19 20 constexpr int kNameTreeMaxRecursion = 32; 21 22 std::pair<WideString, WideString> GetNodeLimitsMaybeSwap(CPDF_Array* pLimits) { 23 ASSERT(pLimits); 24 WideString csLeft = pLimits->GetUnicodeTextAt(0); 25 WideString csRight = pLimits->GetUnicodeTextAt(1); 26 // If the lower limit is greater than the upper limit, swap them. 27 if (csLeft.Compare(csRight) > 0) { 28 pLimits->SetNewAt<CPDF_String>(0, csRight); 29 pLimits->SetNewAt<CPDF_String>(1, csLeft); 30 csLeft = pLimits->GetUnicodeTextAt(0); 31 csRight = pLimits->GetUnicodeTextAt(1); 32 } 33 return {csLeft, csRight}; 34 } 35 36 // Get the limit arrays that leaf array |pFind| is under in the tree with root 37 // |pNode|. |pLimits| will hold all the limit arrays from the leaf up to before 38 // the root. Return true if successful. 39 bool GetNodeAncestorsLimits(const CPDF_Dictionary* pNode, 40 const CPDF_Array* pFind, 41 int nLevel, 42 std::vector<CPDF_Array*>* pLimits) { 43 if (nLevel > kNameTreeMaxRecursion) 44 return false; 45 46 if (pNode->GetArrayFor("Names") == pFind) { 47 pLimits->push_back(pNode->GetArrayFor("Limits")); 48 return true; 49 } 50 51 CPDF_Array* pKids = pNode->GetArrayFor("Kids"); 52 if (!pKids) 53 return false; 54 55 for (size_t i = 0; i < pKids->GetCount(); ++i) { 56 CPDF_Dictionary* pKid = pKids->GetDictAt(i); 57 if (!pKid) 58 continue; 59 60 if (GetNodeAncestorsLimits(pKid, pFind, nLevel + 1, pLimits)) { 61 pLimits->push_back(pNode->GetArrayFor("Limits")); 62 return true; 63 } 64 } 65 return false; 66 } 67 68 // Upon the deletion of |csName| from leaf array |pFind|, update the ancestors 69 // of |pFind|. Specifically, the limits of |pFind|'s ancestors will be updated 70 // if needed, and any ancestors that are now empty will be removed. 71 bool UpdateNodesAndLimitsUponDeletion(CPDF_Dictionary* pNode, 72 const CPDF_Array* pFind, 73 const WideString& csName, 74 int nLevel) { 75 if (nLevel > kNameTreeMaxRecursion) 76 return false; 77 78 CPDF_Array* pLimits = pNode->GetArrayFor("Limits"); 79 WideString csLeft; 80 WideString csRight; 81 if (pLimits) 82 std::tie(csLeft, csRight) = GetNodeLimitsMaybeSwap(pLimits); 83 84 CPDF_Array* pNames = pNode->GetArrayFor("Names"); 85 if (pNames) { 86 if (pNames != pFind) 87 return false; 88 if (pNames->IsEmpty() || !pLimits) 89 return true; 90 if (csLeft != csName && csRight != csName) 91 return true; 92 93 // Since |csName| defines |pNode|'s limits, we need to loop through the 94 // names to find the new lower and upper limits. 95 WideString csNewLeft = csRight; 96 WideString csNewRight = csLeft; 97 for (size_t i = 0; i < pNames->GetCount() / 2; ++i) { 98 WideString wsName = pNames->GetUnicodeTextAt(i * 2); 99 if (wsName.Compare(csNewLeft) < 0) 100 csNewLeft = wsName; 101 if (wsName.Compare(csNewRight) > 0) 102 csNewRight = wsName; 103 } 104 pLimits->SetNewAt<CPDF_String>(0, csNewLeft); 105 pLimits->SetNewAt<CPDF_String>(1, csNewRight); 106 return true; 107 } 108 109 CPDF_Array* pKids = pNode->GetArrayFor("Kids"); 110 if (!pKids) 111 return false; 112 113 // Loop through the kids to find the leaf array |pFind|. 114 for (size_t i = 0; i < pKids->GetCount(); ++i) { 115 CPDF_Dictionary* pKid = pKids->GetDictAt(i); 116 if (!pKid) 117 continue; 118 if (!UpdateNodesAndLimitsUponDeletion(pKid, pFind, csName, nLevel + 1)) 119 continue; 120 121 // Remove this child node if it's empty. 122 if ((pKid->KeyExist("Names") && pKid->GetArrayFor("Names")->IsEmpty()) || 123 (pKid->KeyExist("Kids") && pKid->GetArrayFor("Kids")->IsEmpty())) { 124 pKids->RemoveAt(i); 125 } 126 if (pKids->IsEmpty() || !pLimits) 127 return true; 128 if (csLeft != csName && csRight != csName) 129 return true; 130 131 // Since |csName| defines |pNode|'s limits, we need to loop through the 132 // kids to find the new lower and upper limits. 133 WideString csNewLeft = csRight; 134 WideString csNewRight = csLeft; 135 for (size_t j = 0; j < pKids->GetCount(); ++j) { 136 CPDF_Array* pKidLimits = pKids->GetDictAt(j)->GetArrayFor("Limits"); 137 ASSERT(pKidLimits); 138 if (pKidLimits->GetUnicodeTextAt(0).Compare(csNewLeft) < 0) 139 csNewLeft = pKidLimits->GetUnicodeTextAt(0); 140 if (pKidLimits->GetUnicodeTextAt(1).Compare(csNewRight) > 0) 141 csNewRight = pKidLimits->GetUnicodeTextAt(1); 142 } 143 pLimits->SetNewAt<CPDF_String>(0, csNewLeft); 144 pLimits->SetNewAt<CPDF_String>(1, csNewRight); 145 return true; 146 } 147 return false; 148 } 149 150 // Search for |csName| in the tree with root |pNode|. If successful, return the 151 // value that |csName| points to; |nIndex| will be the index of |csName|, 152 // |ppFind| will be the leaf array that |csName| is found in, and |pFindIndex| 153 // will be the index of |csName| in |ppFind|. If |csName| is not found, |ppFind| 154 // will be the leaf array that |csName| should be added to, and |pFindIndex| 155 // will be the index that it should be added at. 156 CPDF_Object* SearchNameNodeByName(const CPDF_Dictionary* pNode, 157 const WideString& csName, 158 int nLevel, 159 size_t* nIndex, 160 CPDF_Array** ppFind, 161 int* pFindIndex) { 162 if (nLevel > kNameTreeMaxRecursion) 163 return nullptr; 164 165 CPDF_Array* pLimits = pNode->GetArrayFor("Limits"); 166 CPDF_Array* pNames = pNode->GetArrayFor("Names"); 167 if (pLimits) { 168 WideString csLeft; 169 WideString csRight; 170 std::tie(csLeft, csRight) = GetNodeLimitsMaybeSwap(pLimits); 171 // Skip this node if the name to look for is smaller than its lower limit. 172 if (csName.Compare(csLeft) < 0) 173 return nullptr; 174 175 // Skip this node if the name to look for is greater than its higher limit, 176 // and the node itself is a leaf node. 177 if (csName.Compare(csRight) > 0 && pNames) { 178 if (ppFind) 179 *ppFind = pNames; 180 if (pFindIndex) 181 *pFindIndex = pNames->GetCount() / 2 - 1; 182 183 return nullptr; 184 } 185 } 186 187 // If the node is a leaf node, look for the name in its names array. 188 if (pNames) { 189 size_t dwCount = pNames->GetCount() / 2; 190 for (size_t i = 0; i < dwCount; i++) { 191 WideString csValue = pNames->GetUnicodeTextAt(i * 2); 192 int32_t iCompare = csValue.Compare(csName); 193 if (iCompare > 0) 194 break; 195 if (ppFind) 196 *ppFind = pNames; 197 if (pFindIndex) 198 *pFindIndex = i; 199 if (iCompare < 0) 200 continue; 201 202 *nIndex += i; 203 return pNames->GetDirectObjectAt(i * 2 + 1); 204 } 205 *nIndex += dwCount; 206 return nullptr; 207 } 208 209 // Search through the node's children. 210 CPDF_Array* pKids = pNode->GetArrayFor("Kids"); 211 if (!pKids) 212 return nullptr; 213 214 for (size_t i = 0; i < pKids->GetCount(); i++) { 215 CPDF_Dictionary* pKid = pKids->GetDictAt(i); 216 if (!pKid) 217 continue; 218 219 CPDF_Object* pFound = SearchNameNodeByName(pKid, csName, nLevel + 1, nIndex, 220 ppFind, pFindIndex); 221 if (pFound) 222 return pFound; 223 } 224 return nullptr; 225 } 226 227 // Get the key-value pair at |nIndex| in the tree with root |pNode|. If 228 // successful, return the value object; |csName| will be the key, |ppFind| 229 // will be the leaf array that this pair is in, and |pFindIndex| will be the 230 // index of the pair in |pFind|. 231 CPDF_Object* SearchNameNodeByIndex(const CPDF_Dictionary* pNode, 232 size_t nIndex, 233 int nLevel, 234 size_t* nCurIndex, 235 WideString* csName, 236 CPDF_Array** ppFind, 237 int* pFindIndex) { 238 if (nLevel > kNameTreeMaxRecursion) 239 return nullptr; 240 241 CPDF_Array* pNames = pNode->GetArrayFor("Names"); 242 if (pNames) { 243 size_t nCount = pNames->GetCount() / 2; 244 if (nIndex >= *nCurIndex + nCount) { 245 *nCurIndex += nCount; 246 return nullptr; 247 } 248 if (ppFind) 249 *ppFind = pNames; 250 if (pFindIndex) 251 *pFindIndex = nIndex - *nCurIndex; 252 253 *csName = pNames->GetUnicodeTextAt((nIndex - *nCurIndex) * 2); 254 return pNames->GetDirectObjectAt((nIndex - *nCurIndex) * 2 + 1); 255 } 256 257 CPDF_Array* pKids = pNode->GetArrayFor("Kids"); 258 if (!pKids) 259 return nullptr; 260 261 for (size_t i = 0; i < pKids->GetCount(); i++) { 262 CPDF_Dictionary* pKid = pKids->GetDictAt(i); 263 if (!pKid) 264 continue; 265 CPDF_Object* pFound = SearchNameNodeByIndex( 266 pKid, nIndex, nLevel + 1, nCurIndex, csName, ppFind, pFindIndex); 267 if (pFound) 268 return pFound; 269 } 270 return nullptr; 271 } 272 273 // Get the total number of key-value pairs in the tree with root |pNode|. 274 size_t CountNamesInternal(CPDF_Dictionary* pNode, int nLevel) { 275 if (nLevel > kNameTreeMaxRecursion) 276 return 0; 277 278 CPDF_Array* pNames = pNode->GetArrayFor("Names"); 279 if (pNames) 280 return pNames->GetCount() / 2; 281 282 CPDF_Array* pKids = pNode->GetArrayFor("Kids"); 283 if (!pKids) 284 return 0; 285 286 size_t nCount = 0; 287 for (size_t i = 0; i < pKids->GetCount(); i++) { 288 CPDF_Dictionary* pKid = pKids->GetDictAt(i); 289 if (!pKid) 290 continue; 291 292 nCount += CountNamesInternal(pKid, nLevel + 1); 293 } 294 return nCount; 295 } 296 297 } // namespace 298 299 CPDF_NameTree::CPDF_NameTree(CPDF_Dictionary* pRoot) : m_pRoot(pRoot) {} 300 301 CPDF_NameTree::CPDF_NameTree(const CPDF_Document* pDoc, 302 const ByteString& category) 303 : m_pRoot(nullptr) { 304 const CPDF_Dictionary* pRoot = pDoc->GetRoot(); 305 if (!pRoot) 306 return; 307 308 CPDF_Dictionary* pNames = pRoot->GetDictFor("Names"); 309 if (!pNames) 310 return; 311 312 m_pRoot = pNames->GetDictFor(category); 313 } 314 315 CPDF_NameTree::~CPDF_NameTree() {} 316 317 size_t CPDF_NameTree::GetCount() const { 318 return m_pRoot ? CountNamesInternal(m_pRoot.Get(), 0) : 0; 319 } 320 321 int CPDF_NameTree::GetIndex(const WideString& csName) const { 322 if (!m_pRoot) 323 return -1; 324 325 size_t nIndex = 0; 326 if (!SearchNameNodeByName(m_pRoot.Get(), csName, 0, &nIndex, nullptr, 327 nullptr)) { 328 return -1; 329 } 330 return nIndex; 331 } 332 333 bool CPDF_NameTree::AddValueAndName(std::unique_ptr<CPDF_Object> pObj, 334 const WideString& name) { 335 if (!m_pRoot) 336 return false; 337 338 size_t nIndex = 0; 339 CPDF_Array* pFind = nullptr; 340 int nFindIndex = -1; 341 // Fail if the tree already contains this name or if the tree is too deep. 342 if (SearchNameNodeByName(m_pRoot.Get(), name, 0, &nIndex, &pFind, 343 &nFindIndex)) { 344 return false; 345 } 346 347 // If the returned |pFind| is a nullptr, then |name| is smaller than all 348 // existing entries in the tree, and we did not find a leaf array to place 349 // |name| into. We instead will find the leftmost leaf array in which to place 350 // |name| and |pObj|. 351 if (!pFind) { 352 size_t nCurIndex = 0; 353 WideString csName; 354 SearchNameNodeByIndex(m_pRoot.Get(), 0, 0, &nCurIndex, &csName, &pFind, 355 nullptr); 356 } 357 ASSERT(pFind); 358 359 // Insert the name and the object into the leaf array found. Note that the 360 // insertion position is right after the key-value pair returned by |index|. 361 size_t nNameIndex = (nFindIndex + 1) * 2; 362 size_t nValueIndex = nNameIndex + 1; 363 pFind->InsertNewAt<CPDF_String>(nNameIndex, name); 364 pFind->InsertAt(nValueIndex, std::move(pObj)); 365 366 // Expand the limits that the newly added name is under, if the name falls 367 // outside of the limits of its leaf array or any arrays above it. 368 std::vector<CPDF_Array*> pLimits; 369 GetNodeAncestorsLimits(m_pRoot.Get(), pFind, 0, &pLimits); 370 for (auto* pLimit : pLimits) { 371 if (!pLimit) 372 continue; 373 374 if (name.Compare(pLimit->GetUnicodeTextAt(0)) < 0) 375 pLimit->SetNewAt<CPDF_String>(0, name); 376 377 if (name.Compare(pLimit->GetUnicodeTextAt(1)) > 0) 378 pLimit->SetNewAt<CPDF_String>(1, name); 379 } 380 return true; 381 } 382 383 bool CPDF_NameTree::DeleteValueAndName(int nIndex) { 384 if (!m_pRoot) 385 return false; 386 387 size_t nCurIndex = 0; 388 WideString csName; 389 CPDF_Array* pFind = nullptr; 390 int nFindIndex = -1; 391 // Fail if the tree does not contain |nIndex|. 392 if (!SearchNameNodeByIndex(m_pRoot.Get(), nIndex, 0, &nCurIndex, &csName, 393 &pFind, &nFindIndex)) { 394 return false; 395 } 396 397 // Remove the name and the object from the leaf array |pFind|. 398 pFind->RemoveAt(nFindIndex * 2); 399 pFind->RemoveAt(nFindIndex * 2); 400 401 // Delete empty nodes and update the limits of |pFind|'s ancestors as needed. 402 UpdateNodesAndLimitsUponDeletion(m_pRoot.Get(), pFind, csName, 0); 403 return true; 404 } 405 406 CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex, 407 WideString* csName) const { 408 csName->clear(); 409 if (!m_pRoot) 410 return nullptr; 411 412 size_t nCurIndex = 0; 413 return SearchNameNodeByIndex(m_pRoot.Get(), nIndex, 0, &nCurIndex, csName, 414 nullptr, nullptr); 415 } 416 417 CPDF_Object* CPDF_NameTree::LookupValue(const WideString& csName) const { 418 if (!m_pRoot) 419 return nullptr; 420 421 size_t nIndex = 0; 422 return SearchNameNodeByName(m_pRoot.Get(), csName, 0, &nIndex, nullptr, 423 nullptr); 424 } 425 426 CPDF_Array* CPDF_NameTree::LookupNamedDest(CPDF_Document* pDoc, 427 const WideString& sName) { 428 CPDF_Object* pValue = LookupValue(sName); 429 if (!pValue) { 430 CPDF_Dictionary* pDests = pDoc->GetRoot()->GetDictFor("Dests"); 431 if (!pDests) 432 return nullptr; 433 pValue = pDests->GetDirectObjectFor(PDF_EncodeText(sName)); 434 } 435 if (!pValue) 436 return nullptr; 437 if (CPDF_Array* pArray = pValue->AsArray()) 438 return pArray; 439 if (CPDF_Dictionary* pDict = pValue->AsDictionary()) 440 return pDict->GetArrayFor("D"); 441 return nullptr; 442 } 443