Home | History | Annotate | Download | only in fpdfdoc
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfdoc/cpdf_nametree.h"
      8 
      9 #include <utility>
     10 #include <vector>
     11 
     12 #include "core/fpdfapi/parser/cpdf_array.h"
     13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
     14 #include "core/fpdfapi/parser/cpdf_document.h"
     15 #include "core/fpdfapi/parser/cpdf_string.h"
     16 #include "core/fpdfapi/parser/fpdf_parser_decode.h"
     17 
     18 namespace {
     19 
     20 constexpr int kNameTreeMaxRecursion = 32;
     21 
     22 std::pair<WideString, WideString> GetNodeLimitsMaybeSwap(CPDF_Array* pLimits) {
     23   ASSERT(pLimits);
     24   WideString csLeft = pLimits->GetUnicodeTextAt(0);
     25   WideString csRight = pLimits->GetUnicodeTextAt(1);
     26   // If the lower limit is greater than the upper limit, swap them.
     27   if (csLeft.Compare(csRight) > 0) {
     28     pLimits->SetNewAt<CPDF_String>(0, csRight);
     29     pLimits->SetNewAt<CPDF_String>(1, csLeft);
     30     csLeft = pLimits->GetUnicodeTextAt(0);
     31     csRight = pLimits->GetUnicodeTextAt(1);
     32   }
     33   return {csLeft, csRight};
     34 }
     35 
     36 // Get the limit arrays that leaf array |pFind| is under in the tree with root
     37 // |pNode|. |pLimits| will hold all the limit arrays from the leaf up to before
     38 // the root. Return true if successful.
     39 bool GetNodeAncestorsLimits(const CPDF_Dictionary* pNode,
     40                             const CPDF_Array* pFind,
     41                             int nLevel,
     42                             std::vector<CPDF_Array*>* pLimits) {
     43   if (nLevel > kNameTreeMaxRecursion)
     44     return false;
     45 
     46   if (pNode->GetArrayFor("Names") == pFind) {
     47     pLimits->push_back(pNode->GetArrayFor("Limits"));
     48     return true;
     49   }
     50 
     51   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
     52   if (!pKids)
     53     return false;
     54 
     55   for (size_t i = 0; i < pKids->GetCount(); ++i) {
     56     CPDF_Dictionary* pKid = pKids->GetDictAt(i);
     57     if (!pKid)
     58       continue;
     59 
     60     if (GetNodeAncestorsLimits(pKid, pFind, nLevel + 1, pLimits)) {
     61       pLimits->push_back(pNode->GetArrayFor("Limits"));
     62       return true;
     63     }
     64   }
     65   return false;
     66 }
     67 
     68 // Upon the deletion of |csName| from leaf array |pFind|, update the ancestors
     69 // of |pFind|. Specifically, the limits of |pFind|'s ancestors will be updated
     70 // if needed, and any ancestors that are now empty will be removed.
     71 bool UpdateNodesAndLimitsUponDeletion(CPDF_Dictionary* pNode,
     72                                       const CPDF_Array* pFind,
     73                                       const WideString& csName,
     74                                       int nLevel) {
     75   if (nLevel > kNameTreeMaxRecursion)
     76     return false;
     77 
     78   CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
     79   WideString csLeft;
     80   WideString csRight;
     81   if (pLimits)
     82     std::tie(csLeft, csRight) = GetNodeLimitsMaybeSwap(pLimits);
     83 
     84   CPDF_Array* pNames = pNode->GetArrayFor("Names");
     85   if (pNames) {
     86     if (pNames != pFind)
     87       return false;
     88     if (pNames->IsEmpty() || !pLimits)
     89       return true;
     90     if (csLeft != csName && csRight != csName)
     91       return true;
     92 
     93     // Since |csName| defines |pNode|'s limits, we need to loop through the
     94     // names to find the new lower and upper limits.
     95     WideString csNewLeft = csRight;
     96     WideString csNewRight = csLeft;
     97     for (size_t i = 0; i < pNames->GetCount() / 2; ++i) {
     98       WideString wsName = pNames->GetUnicodeTextAt(i * 2);
     99       if (wsName.Compare(csNewLeft) < 0)
    100         csNewLeft = wsName;
    101       if (wsName.Compare(csNewRight) > 0)
    102         csNewRight = wsName;
    103     }
    104     pLimits->SetNewAt<CPDF_String>(0, csNewLeft);
    105     pLimits->SetNewAt<CPDF_String>(1, csNewRight);
    106     return true;
    107   }
    108 
    109   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
    110   if (!pKids)
    111     return false;
    112 
    113   // Loop through the kids to find the leaf array |pFind|.
    114   for (size_t i = 0; i < pKids->GetCount(); ++i) {
    115     CPDF_Dictionary* pKid = pKids->GetDictAt(i);
    116     if (!pKid)
    117       continue;
    118     if (!UpdateNodesAndLimitsUponDeletion(pKid, pFind, csName, nLevel + 1))
    119       continue;
    120 
    121     // Remove this child node if it's empty.
    122     if ((pKid->KeyExist("Names") && pKid->GetArrayFor("Names")->IsEmpty()) ||
    123         (pKid->KeyExist("Kids") && pKid->GetArrayFor("Kids")->IsEmpty())) {
    124       pKids->RemoveAt(i);
    125     }
    126     if (pKids->IsEmpty() || !pLimits)
    127       return true;
    128     if (csLeft != csName && csRight != csName)
    129       return true;
    130 
    131     // Since |csName| defines |pNode|'s limits, we need to loop through the
    132     // kids to find the new lower and upper limits.
    133     WideString csNewLeft = csRight;
    134     WideString csNewRight = csLeft;
    135     for (size_t j = 0; j < pKids->GetCount(); ++j) {
    136       CPDF_Array* pKidLimits = pKids->GetDictAt(j)->GetArrayFor("Limits");
    137       ASSERT(pKidLimits);
    138       if (pKidLimits->GetUnicodeTextAt(0).Compare(csNewLeft) < 0)
    139         csNewLeft = pKidLimits->GetUnicodeTextAt(0);
    140       if (pKidLimits->GetUnicodeTextAt(1).Compare(csNewRight) > 0)
    141         csNewRight = pKidLimits->GetUnicodeTextAt(1);
    142     }
    143     pLimits->SetNewAt<CPDF_String>(0, csNewLeft);
    144     pLimits->SetNewAt<CPDF_String>(1, csNewRight);
    145     return true;
    146   }
    147   return false;
    148 }
    149 
    150 // Search for |csName| in the tree with root |pNode|. If successful, return the
    151 // value that |csName| points to; |nIndex| will be the index of |csName|,
    152 // |ppFind| will be the leaf array that |csName| is found in, and |pFindIndex|
    153 // will be the index of |csName| in |ppFind|. If |csName| is not found, |ppFind|
    154 // will be the leaf array that |csName| should be added to, and |pFindIndex|
    155 // will be the index that it should be added at.
    156 CPDF_Object* SearchNameNodeByName(const CPDF_Dictionary* pNode,
    157                                   const WideString& csName,
    158                                   int nLevel,
    159                                   size_t* nIndex,
    160                                   CPDF_Array** ppFind,
    161                                   int* pFindIndex) {
    162   if (nLevel > kNameTreeMaxRecursion)
    163     return nullptr;
    164 
    165   CPDF_Array* pLimits = pNode->GetArrayFor("Limits");
    166   CPDF_Array* pNames = pNode->GetArrayFor("Names");
    167   if (pLimits) {
    168     WideString csLeft;
    169     WideString csRight;
    170     std::tie(csLeft, csRight) = GetNodeLimitsMaybeSwap(pLimits);
    171     // Skip this node if the name to look for is smaller than its lower limit.
    172     if (csName.Compare(csLeft) < 0)
    173       return nullptr;
    174 
    175     // Skip this node if the name to look for is greater than its higher limit,
    176     // and the node itself is a leaf node.
    177     if (csName.Compare(csRight) > 0 && pNames) {
    178       if (ppFind)
    179         *ppFind = pNames;
    180       if (pFindIndex)
    181         *pFindIndex = pNames->GetCount() / 2 - 1;
    182 
    183       return nullptr;
    184     }
    185   }
    186 
    187   // If the node is a leaf node, look for the name in its names array.
    188   if (pNames) {
    189     size_t dwCount = pNames->GetCount() / 2;
    190     for (size_t i = 0; i < dwCount; i++) {
    191       WideString csValue = pNames->GetUnicodeTextAt(i * 2);
    192       int32_t iCompare = csValue.Compare(csName);
    193       if (iCompare > 0)
    194         break;
    195       if (ppFind)
    196         *ppFind = pNames;
    197       if (pFindIndex)
    198         *pFindIndex = i;
    199       if (iCompare < 0)
    200         continue;
    201 
    202       *nIndex += i;
    203       return pNames->GetDirectObjectAt(i * 2 + 1);
    204     }
    205     *nIndex += dwCount;
    206     return nullptr;
    207   }
    208 
    209   // Search through the node's children.
    210   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
    211   if (!pKids)
    212     return nullptr;
    213 
    214   for (size_t i = 0; i < pKids->GetCount(); i++) {
    215     CPDF_Dictionary* pKid = pKids->GetDictAt(i);
    216     if (!pKid)
    217       continue;
    218 
    219     CPDF_Object* pFound = SearchNameNodeByName(pKid, csName, nLevel + 1, nIndex,
    220                                                ppFind, pFindIndex);
    221     if (pFound)
    222       return pFound;
    223   }
    224   return nullptr;
    225 }
    226 
    227 // Get the key-value pair at |nIndex| in the tree with root |pNode|. If
    228 // successful, return the value object; |csName| will be the key, |ppFind|
    229 // will be the leaf array that this pair is in, and |pFindIndex| will be the
    230 // index of the pair in |pFind|.
    231 CPDF_Object* SearchNameNodeByIndex(const CPDF_Dictionary* pNode,
    232                                    size_t nIndex,
    233                                    int nLevel,
    234                                    size_t* nCurIndex,
    235                                    WideString* csName,
    236                                    CPDF_Array** ppFind,
    237                                    int* pFindIndex) {
    238   if (nLevel > kNameTreeMaxRecursion)
    239     return nullptr;
    240 
    241   CPDF_Array* pNames = pNode->GetArrayFor("Names");
    242   if (pNames) {
    243     size_t nCount = pNames->GetCount() / 2;
    244     if (nIndex >= *nCurIndex + nCount) {
    245       *nCurIndex += nCount;
    246       return nullptr;
    247     }
    248     if (ppFind)
    249       *ppFind = pNames;
    250     if (pFindIndex)
    251       *pFindIndex = nIndex - *nCurIndex;
    252 
    253     *csName = pNames->GetUnicodeTextAt((nIndex - *nCurIndex) * 2);
    254     return pNames->GetDirectObjectAt((nIndex - *nCurIndex) * 2 + 1);
    255   }
    256 
    257   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
    258   if (!pKids)
    259     return nullptr;
    260 
    261   for (size_t i = 0; i < pKids->GetCount(); i++) {
    262     CPDF_Dictionary* pKid = pKids->GetDictAt(i);
    263     if (!pKid)
    264       continue;
    265     CPDF_Object* pFound = SearchNameNodeByIndex(
    266         pKid, nIndex, nLevel + 1, nCurIndex, csName, ppFind, pFindIndex);
    267     if (pFound)
    268       return pFound;
    269   }
    270   return nullptr;
    271 }
    272 
    273 // Get the total number of key-value pairs in the tree with root |pNode|.
    274 size_t CountNamesInternal(CPDF_Dictionary* pNode, int nLevel) {
    275   if (nLevel > kNameTreeMaxRecursion)
    276     return 0;
    277 
    278   CPDF_Array* pNames = pNode->GetArrayFor("Names");
    279   if (pNames)
    280     return pNames->GetCount() / 2;
    281 
    282   CPDF_Array* pKids = pNode->GetArrayFor("Kids");
    283   if (!pKids)
    284     return 0;
    285 
    286   size_t nCount = 0;
    287   for (size_t i = 0; i < pKids->GetCount(); i++) {
    288     CPDF_Dictionary* pKid = pKids->GetDictAt(i);
    289     if (!pKid)
    290       continue;
    291 
    292     nCount += CountNamesInternal(pKid, nLevel + 1);
    293   }
    294   return nCount;
    295 }
    296 
    297 }  // namespace
    298 
    299 CPDF_NameTree::CPDF_NameTree(CPDF_Dictionary* pRoot) : m_pRoot(pRoot) {}
    300 
    301 CPDF_NameTree::CPDF_NameTree(const CPDF_Document* pDoc,
    302                              const ByteString& category)
    303     : m_pRoot(nullptr) {
    304   const CPDF_Dictionary* pRoot = pDoc->GetRoot();
    305   if (!pRoot)
    306     return;
    307 
    308   CPDF_Dictionary* pNames = pRoot->GetDictFor("Names");
    309   if (!pNames)
    310     return;
    311 
    312   m_pRoot = pNames->GetDictFor(category);
    313 }
    314 
    315 CPDF_NameTree::~CPDF_NameTree() {}
    316 
    317 size_t CPDF_NameTree::GetCount() const {
    318   return m_pRoot ? CountNamesInternal(m_pRoot.Get(), 0) : 0;
    319 }
    320 
    321 int CPDF_NameTree::GetIndex(const WideString& csName) const {
    322   if (!m_pRoot)
    323     return -1;
    324 
    325   size_t nIndex = 0;
    326   if (!SearchNameNodeByName(m_pRoot.Get(), csName, 0, &nIndex, nullptr,
    327                             nullptr)) {
    328     return -1;
    329   }
    330   return nIndex;
    331 }
    332 
    333 bool CPDF_NameTree::AddValueAndName(std::unique_ptr<CPDF_Object> pObj,
    334                                     const WideString& name) {
    335   if (!m_pRoot)
    336     return false;
    337 
    338   size_t nIndex = 0;
    339   CPDF_Array* pFind = nullptr;
    340   int nFindIndex = -1;
    341   // Fail if the tree already contains this name or if the tree is too deep.
    342   if (SearchNameNodeByName(m_pRoot.Get(), name, 0, &nIndex, &pFind,
    343                            &nFindIndex)) {
    344     return false;
    345   }
    346 
    347   // If the returned |pFind| is a nullptr, then |name| is smaller than all
    348   // existing entries in the tree, and we did not find a leaf array to place
    349   // |name| into. We instead will find the leftmost leaf array in which to place
    350   // |name| and |pObj|.
    351   if (!pFind) {
    352     size_t nCurIndex = 0;
    353     WideString csName;
    354     SearchNameNodeByIndex(m_pRoot.Get(), 0, 0, &nCurIndex, &csName, &pFind,
    355                           nullptr);
    356   }
    357   ASSERT(pFind);
    358 
    359   // Insert the name and the object into the leaf array found. Note that the
    360   // insertion position is right after the key-value pair returned by |index|.
    361   size_t nNameIndex = (nFindIndex + 1) * 2;
    362   size_t nValueIndex = nNameIndex + 1;
    363   pFind->InsertNewAt<CPDF_String>(nNameIndex, name);
    364   pFind->InsertAt(nValueIndex, std::move(pObj));
    365 
    366   // Expand the limits that the newly added name is under, if the name falls
    367   // outside of the limits of its leaf array or any arrays above it.
    368   std::vector<CPDF_Array*> pLimits;
    369   GetNodeAncestorsLimits(m_pRoot.Get(), pFind, 0, &pLimits);
    370   for (auto* pLimit : pLimits) {
    371     if (!pLimit)
    372       continue;
    373 
    374     if (name.Compare(pLimit->GetUnicodeTextAt(0)) < 0)
    375       pLimit->SetNewAt<CPDF_String>(0, name);
    376 
    377     if (name.Compare(pLimit->GetUnicodeTextAt(1)) > 0)
    378       pLimit->SetNewAt<CPDF_String>(1, name);
    379   }
    380   return true;
    381 }
    382 
    383 bool CPDF_NameTree::DeleteValueAndName(int nIndex) {
    384   if (!m_pRoot)
    385     return false;
    386 
    387   size_t nCurIndex = 0;
    388   WideString csName;
    389   CPDF_Array* pFind = nullptr;
    390   int nFindIndex = -1;
    391   // Fail if the tree does not contain |nIndex|.
    392   if (!SearchNameNodeByIndex(m_pRoot.Get(), nIndex, 0, &nCurIndex, &csName,
    393                              &pFind, &nFindIndex)) {
    394     return false;
    395   }
    396 
    397   // Remove the name and the object from the leaf array |pFind|.
    398   pFind->RemoveAt(nFindIndex * 2);
    399   pFind->RemoveAt(nFindIndex * 2);
    400 
    401   // Delete empty nodes and update the limits of |pFind|'s ancestors as needed.
    402   UpdateNodesAndLimitsUponDeletion(m_pRoot.Get(), pFind, csName, 0);
    403   return true;
    404 }
    405 
    406 CPDF_Object* CPDF_NameTree::LookupValueAndName(int nIndex,
    407                                                WideString* csName) const {
    408   csName->clear();
    409   if (!m_pRoot)
    410     return nullptr;
    411 
    412   size_t nCurIndex = 0;
    413   return SearchNameNodeByIndex(m_pRoot.Get(), nIndex, 0, &nCurIndex, csName,
    414                                nullptr, nullptr);
    415 }
    416 
    417 CPDF_Object* CPDF_NameTree::LookupValue(const WideString& csName) const {
    418   if (!m_pRoot)
    419     return nullptr;
    420 
    421   size_t nIndex = 0;
    422   return SearchNameNodeByName(m_pRoot.Get(), csName, 0, &nIndex, nullptr,
    423                               nullptr);
    424 }
    425 
    426 CPDF_Array* CPDF_NameTree::LookupNamedDest(CPDF_Document* pDoc,
    427                                            const WideString& sName) {
    428   CPDF_Object* pValue = LookupValue(sName);
    429   if (!pValue) {
    430     CPDF_Dictionary* pDests = pDoc->GetRoot()->GetDictFor("Dests");
    431     if (!pDests)
    432       return nullptr;
    433     pValue = pDests->GetDirectObjectFor(PDF_EncodeText(sName));
    434   }
    435   if (!pValue)
    436     return nullptr;
    437   if (CPDF_Array* pArray = pValue->AsArray())
    438     return pArray;
    439   if (CPDF_Dictionary* pDict = pValue->AsDictionary())
    440     return pDict->GetArrayFor("D");
    441   return nullptr;
    442 }
    443