Home | History | Annotate | Download | only in parser
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
      8 
      9 #include <limits>
     10 
     11 #include "core/fpdfapi/parser/cpdf_array.h"
     12 #include "core/fpdfapi/parser/cpdf_data_avail.h"
     13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
     14 #include "core/fpdfapi/parser/cpdf_document.h"
     15 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
     16 #include "core/fpdfapi/parser/cpdf_read_validator.h"
     17 #include "core/fpdfapi/parser/cpdf_stream.h"
     18 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
     19 #include "core/fxcrt/cfx_bitstream.h"
     20 #include "core/fxcrt/fx_safe_types.h"
     21 #include "third_party/base/numerics/safe_conversions.h"
     22 
     23 namespace {
     24 
     25 bool CanReadFromBitStream(const CFX_BitStream* hStream,
     26                           const FX_SAFE_UINT32& bits) {
     27   return bits.IsValid() && hStream->BitsRemaining() >= bits.ValueOrDie();
     28 }
     29 
     30 // Sanity check values from the page table header. The note in the PDF 1.7
     31 // reference for Table F.3 says the valid range is only 0 through 32. Though 0
     32 // is not useful either.
     33 bool IsValidPageOffsetHintTableBitCount(uint32_t bits) {
     34   return bits > 0 && bits <= 32;
     35 }
     36 
     37 }  // namespace
     38 
     39 CPDF_HintTables::CPDF_HintTables(CPDF_ReadValidator* pValidator,
     40                                  CPDF_LinearizedHeader* pLinearized)
     41     : m_pValidator(pValidator),
     42       m_pLinearized(pLinearized),
     43       m_nFirstPageSharedObjs(0),
     44       m_szFirstPageObjOffset(0) {
     45   ASSERT(m_pLinearized);
     46 }
     47 
     48 CPDF_HintTables::~CPDF_HintTables() {}
     49 
     50 uint32_t CPDF_HintTables::GetItemLength(
     51     uint32_t index,
     52     const std::vector<FX_FILESIZE>& szArray) const {
     53   if (szArray.size() < 2 || index > szArray.size() - 2 ||
     54       szArray[index] > szArray[index + 1]) {
     55     return 0;
     56   }
     57   return szArray[index + 1] - szArray[index];
     58 }
     59 
     60 bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) {
     61   if (!hStream || hStream->IsEOF())
     62     return false;
     63 
     64   int nStreamOffset = ReadPrimaryHintStreamOffset();
     65   if (nStreamOffset < 0)
     66     return false;
     67 
     68   int nStreamLen = ReadPrimaryHintStreamLength();
     69   if (nStreamLen < 1 ||
     70       !pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(nStreamLen)) {
     71     return false;
     72   }
     73 
     74   const uint32_t kHeaderSize = 288;
     75   if (hStream->BitsRemaining() < kHeaderSize)
     76     return false;
     77 
     78   // Item 1: The least number of objects in a page.
     79   const uint32_t dwObjLeastNum = hStream->GetBits(32);
     80   if (!dwObjLeastNum)
     81     return false;
     82 
     83   // Item 2: The location of the first page's page object.
     84   const uint32_t dwFirstObjLoc = hStream->GetBits(32);
     85   if (dwFirstObjLoc > static_cast<uint32_t>(nStreamOffset)) {
     86     FX_SAFE_FILESIZE safeLoc = nStreamLen;
     87     safeLoc += dwFirstObjLoc;
     88     if (!safeLoc.IsValid())
     89       return false;
     90     m_szFirstPageObjOffset = safeLoc.ValueOrDie();
     91   } else {
     92     if (!pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(dwFirstObjLoc))
     93       return false;
     94     m_szFirstPageObjOffset = dwFirstObjLoc;
     95   }
     96 
     97   // Item 3: The number of bits needed to represent the difference
     98   // between the greatest and least number of objects in a page.
     99   const uint32_t dwDeltaObjectsBits = hStream->GetBits(16);
    100   if (!IsValidPageOffsetHintTableBitCount(dwDeltaObjectsBits))
    101     return false;
    102 
    103   // Item 4: The least length of a page in bytes.
    104   const uint32_t dwPageLeastLen = hStream->GetBits(32);
    105   if (!dwPageLeastLen)
    106     return false;
    107 
    108   // Item 5: The number of bits needed to represent the difference
    109   // between the greatest and least length of a page, in bytes.
    110   const uint32_t dwDeltaPageLenBits = hStream->GetBits(16);
    111   if (!IsValidPageOffsetHintTableBitCount(dwDeltaPageLenBits))
    112     return false;
    113 
    114   // Skip Item 6, 7, 8, 9 total 96 bits.
    115   hStream->SkipBits(96);
    116 
    117   // Item 10: The number of bits needed to represent the greatest
    118   // number of shared object references.
    119   const uint32_t dwSharedObjBits = hStream->GetBits(16);
    120   if (!IsValidPageOffsetHintTableBitCount(dwSharedObjBits))
    121     return false;
    122 
    123   // Item 11: The number of bits needed to represent the numerically
    124   // greatest shared object identifier used by the pages.
    125   const uint32_t dwSharedIdBits = hStream->GetBits(16);
    126   if (!IsValidPageOffsetHintTableBitCount(dwSharedIdBits))
    127     return false;
    128 
    129   // Item 12: The number of bits needed to represent the numerator of
    130   // the fractional position for each shared object reference. For each
    131   // shared object referenced from a page, there is an indication of
    132   // where in the page's content stream the object is first referenced.
    133   const uint32_t dwSharedNumeratorBits = hStream->GetBits(16);
    134   if (!IsValidPageOffsetHintTableBitCount(dwSharedNumeratorBits))
    135     return false;
    136 
    137   // Item 13: Skip Item 13 which has 16 bits.
    138   hStream->SkipBits(16);
    139 
    140   const int nPages = GetNumberOfPages();
    141   if (nPages < 1 || nPages >= FPDF_PAGE_MAX_NUM)
    142     return false;
    143 
    144   const uint32_t dwPages = pdfium::base::checked_cast<uint32_t>(nPages);
    145   FX_SAFE_UINT32 required_bits = dwDeltaObjectsBits;
    146   required_bits *= dwPages;
    147   if (!CanReadFromBitStream(hStream, required_bits))
    148     return false;
    149 
    150   for (int i = 0; i < nPages; ++i) {
    151     FX_SAFE_UINT32 safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits);
    152     safeDeltaObj += dwObjLeastNum;
    153     if (!safeDeltaObj.IsValid())
    154       return false;
    155     m_dwDeltaNObjsArray.push_back(safeDeltaObj.ValueOrDie());
    156   }
    157   hStream->ByteAlign();
    158 
    159   required_bits = dwDeltaPageLenBits;
    160   required_bits *= dwPages;
    161   if (!CanReadFromBitStream(hStream, required_bits))
    162     return false;
    163 
    164   std::vector<uint32_t> dwPageLenArray;
    165   for (int i = 0; i < nPages; ++i) {
    166     FX_SAFE_UINT32 safePageLen = hStream->GetBits(dwDeltaPageLenBits);
    167     safePageLen += dwPageLeastLen;
    168     if (!safePageLen.IsValid())
    169       return false;
    170 
    171     dwPageLenArray.push_back(safePageLen.ValueOrDie());
    172   }
    173 
    174   int nOffsetE = GetEndOfFirstPageOffset();
    175   if (nOffsetE < 0)
    176     return false;
    177 
    178   int nFirstPageNum = GetFirstPageNumber();
    179   if (nFirstPageNum < 0 || nFirstPageNum > std::numeric_limits<int>::max() - 1)
    180     return false;
    181 
    182   for (int i = 0; i < nPages; ++i) {
    183     if (i == nFirstPageNum) {
    184       m_szPageOffsetArray.push_back(m_szFirstPageObjOffset);
    185     } else if (i == nFirstPageNum + 1) {
    186       if (i == 1) {
    187         m_szPageOffsetArray.push_back(nOffsetE);
    188       } else {
    189         m_szPageOffsetArray.push_back(m_szPageOffsetArray[i - 2] +
    190                                       dwPageLenArray[i - 2]);
    191       }
    192     } else {
    193       if (i == 0) {
    194         m_szPageOffsetArray.push_back(nOffsetE);
    195       } else {
    196         m_szPageOffsetArray.push_back(m_szPageOffsetArray[i - 1] +
    197                                       dwPageLenArray[i - 1]);
    198       }
    199     }
    200   }
    201 
    202   m_szPageOffsetArray.push_back(m_szPageOffsetArray[nPages - 1] +
    203                                 dwPageLenArray[nPages - 1]);
    204   hStream->ByteAlign();
    205 
    206   // Number of shared objects.
    207   required_bits = dwSharedObjBits;
    208   required_bits *= dwPages;
    209   if (!CanReadFromBitStream(hStream, required_bits))
    210     return false;
    211 
    212   for (int i = 0; i < nPages; i++)
    213     m_dwNSharedObjsArray.push_back(hStream->GetBits(dwSharedObjBits));
    214   hStream->ByteAlign();
    215 
    216   // Array of identifiers, size = nshared_objects.
    217   for (int i = 0; i < nPages; i++) {
    218     required_bits = dwSharedIdBits;
    219     required_bits *= m_dwNSharedObjsArray[i];
    220     if (!CanReadFromBitStream(hStream, required_bits))
    221       return false;
    222 
    223     for (uint32_t j = 0; j < m_dwNSharedObjsArray[i]; j++)
    224       m_dwIdentifierArray.push_back(hStream->GetBits(dwSharedIdBits));
    225   }
    226   hStream->ByteAlign();
    227 
    228   for (int i = 0; i < nPages; i++) {
    229     FX_SAFE_UINT32 safeSize = m_dwNSharedObjsArray[i];
    230     safeSize *= dwSharedNumeratorBits;
    231     if (!CanReadFromBitStream(hStream, safeSize))
    232       return false;
    233 
    234     hStream->SkipBits(safeSize.ValueOrDie());
    235   }
    236   hStream->ByteAlign();
    237 
    238   FX_SAFE_UINT32 safeTotalPageLen = dwPages;
    239   safeTotalPageLen *= dwDeltaPageLenBits;
    240   if (!CanReadFromBitStream(hStream, safeTotalPageLen))
    241     return false;
    242 
    243   hStream->SkipBits(safeTotalPageLen.ValueOrDie());
    244   hStream->ByteAlign();
    245   return true;
    246 }
    247 
    248 bool CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream,
    249                                              uint32_t offset) {
    250   if (!hStream || hStream->IsEOF())
    251     return false;
    252 
    253   int nStreamOffset = ReadPrimaryHintStreamOffset();
    254   int nStreamLen = ReadPrimaryHintStreamLength();
    255   if (nStreamOffset < 0 || nStreamLen < 1)
    256     return false;
    257 
    258   FX_SAFE_UINT32 bit_offset = offset;
    259   bit_offset *= 8;
    260   if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie())
    261     return false;
    262   hStream->SkipBits((bit_offset - hStream->GetPos()).ValueOrDie());
    263 
    264   const uint32_t kHeaderSize = 192;
    265   if (hStream->BitsRemaining() < kHeaderSize)
    266     return false;
    267 
    268   // Item 1: The object number of the first object in the shared objects
    269   // section.
    270   uint32_t dwFirstSharedObjNum = hStream->GetBits(32);
    271 
    272   // Item 2: The location of the first object in the shared objects section.
    273   uint32_t dwFirstSharedObjLoc = hStream->GetBits(32);
    274   if (dwFirstSharedObjLoc > static_cast<uint32_t>(nStreamOffset))
    275     dwFirstSharedObjLoc += nStreamLen;
    276 
    277   // Item 3: The number of shared object entries for the first page.
    278   m_nFirstPageSharedObjs = hStream->GetBits(32);
    279 
    280   // Item 4: The number of shared object entries for the shared objects
    281   // section, including the number of shared object entries for the first page.
    282   uint32_t dwSharedObjTotal = hStream->GetBits(32);
    283 
    284   // Item 5: The number of bits needed to represent the greatest number of
    285   // objects in a shared object group. Skipped.
    286   hStream->SkipBits(16);
    287 
    288   // Item 6: The least length of a shared object group in bytes.
    289   uint32_t dwGroupLeastLen = hStream->GetBits(32);
    290 
    291   // Item 7: The number of bits needed to represent the difference between the
    292   // greatest and least length of a shared object group, in bytes.
    293   uint32_t dwDeltaGroupLen = hStream->GetBits(16);
    294 
    295   // Trying to decode more than 32 bits isn't going to work when we write into
    296   // a uint32_t.
    297   if (dwDeltaGroupLen > 31)
    298     return false;
    299 
    300   if (dwFirstSharedObjNum >= CPDF_Parser::kMaxObjectNumber ||
    301       m_nFirstPageSharedObjs >= CPDF_Parser::kMaxObjectNumber ||
    302       dwSharedObjTotal >= CPDF_Parser::kMaxObjectNumber) {
    303     return false;
    304   }
    305 
    306   int nFirstPageObjNum = GetFirstPageObjectNumber();
    307   if (nFirstPageObjNum < 0)
    308     return false;
    309 
    310   uint32_t dwPrevObjLen = 0;
    311   uint32_t dwCurObjLen = 0;
    312   FX_SAFE_UINT32 required_bits = dwSharedObjTotal;
    313   required_bits *= dwDeltaGroupLen;
    314   if (!CanReadFromBitStream(hStream, required_bits))
    315     return false;
    316 
    317   for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
    318     dwPrevObjLen = dwCurObjLen;
    319     FX_SAFE_UINT32 safeObjLen = hStream->GetBits(dwDeltaGroupLen);
    320     safeObjLen += dwGroupLeastLen;
    321     if (!safeObjLen.IsValid())
    322       return false;
    323 
    324     dwCurObjLen = safeObjLen.ValueOrDie();
    325     if (i < m_nFirstPageSharedObjs) {
    326       m_dwSharedObjNumArray.push_back(nFirstPageObjNum + i);
    327       if (i == 0)
    328         m_szSharedObjOffsetArray.push_back(m_szFirstPageObjOffset);
    329     } else {
    330       FX_SAFE_UINT32 safeObjNum = dwFirstSharedObjNum;
    331       safeObjNum += i - m_nFirstPageSharedObjs;
    332       if (!safeObjNum.IsValid())
    333         return false;
    334 
    335       m_dwSharedObjNumArray.push_back(safeObjNum.ValueOrDie());
    336       if (i == m_nFirstPageSharedObjs) {
    337         FX_SAFE_FILESIZE safeLoc = dwFirstSharedObjLoc;
    338         if (!safeLoc.IsValid())
    339           return false;
    340 
    341         m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie());
    342       }
    343     }
    344 
    345     if (i != 0 && i != m_nFirstPageSharedObjs) {
    346       FX_SAFE_FILESIZE safeLoc = dwPrevObjLen;
    347       safeLoc += m_szSharedObjOffsetArray[i - 1];
    348       if (!safeLoc.IsValid())
    349         return false;
    350 
    351       m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie());
    352     }
    353   }
    354 
    355   if (dwSharedObjTotal > 0) {
    356     FX_SAFE_FILESIZE safeLoc = dwCurObjLen;
    357     safeLoc += m_szSharedObjOffsetArray[dwSharedObjTotal - 1];
    358     if (!safeLoc.IsValid())
    359       return false;
    360 
    361     m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie());
    362   }
    363 
    364   hStream->ByteAlign();
    365   if (hStream->BitsRemaining() < dwSharedObjTotal)
    366     return false;
    367 
    368   hStream->SkipBits(dwSharedObjTotal);
    369   hStream->ByteAlign();
    370   return true;
    371 }
    372 
    373 bool CPDF_HintTables::GetPagePos(uint32_t index,
    374                                  FX_FILESIZE* szPageStartPos,
    375                                  FX_FILESIZE* szPageLength,
    376                                  uint32_t* dwObjNum) const {
    377   if (index >= m_pLinearized->GetPageCount())
    378     return false;
    379 
    380   *szPageStartPos = m_szPageOffsetArray[index];
    381   *szPageLength = GetItemLength(index, m_szPageOffsetArray);
    382 
    383   int nFirstPageObjNum = GetFirstPageObjectNumber();
    384   if (nFirstPageObjNum < 0)
    385     return false;
    386 
    387   int nFirstPageNum = GetFirstPageNumber();
    388   if (!pdfium::base::IsValueInRangeForNumericType<uint32_t>(nFirstPageNum))
    389     return false;
    390 
    391   uint32_t dwFirstPageNum = static_cast<uint32_t>(nFirstPageNum);
    392   if (index == dwFirstPageNum) {
    393     *dwObjNum = nFirstPageObjNum;
    394     return true;
    395   }
    396 
    397   // The object number of remaining pages starts from 1.
    398   *dwObjNum = 1;
    399   for (uint32_t i = 0; i < index; ++i) {
    400     if (i == dwFirstPageNum)
    401       continue;
    402     *dwObjNum += m_dwDeltaNObjsArray[i];
    403   }
    404   return true;
    405 }
    406 
    407 CPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage(uint32_t index) {
    408   int nFirstPageNum = GetFirstPageNumber();
    409   if (!pdfium::base::IsValueInRangeForNumericType<uint32_t>(nFirstPageNum))
    410     return CPDF_DataAvail::DataError;
    411 
    412   if (index == static_cast<uint32_t>(nFirstPageNum))
    413     return CPDF_DataAvail::DataAvailable;
    414 
    415   uint32_t dwLength = GetItemLength(index, m_szPageOffsetArray);
    416   // If two pages have the same offset, it should be treated as an error.
    417   if (!dwLength)
    418     return CPDF_DataAvail::DataError;
    419 
    420   if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
    421           m_szPageOffsetArray[index], dwLength))
    422     return CPDF_DataAvail::DataNotAvailable;
    423 
    424   // Download data of shared objects in the page.
    425   uint32_t offset = 0;
    426   for (uint32_t i = 0; i < index; ++i)
    427     offset += m_dwNSharedObjsArray[i];
    428 
    429   int nFirstPageObjNum = GetFirstPageObjectNumber();
    430   if (nFirstPageObjNum < 0)
    431     return CPDF_DataAvail::DataError;
    432 
    433   uint32_t dwIndex = 0;
    434   uint32_t dwObjNum = 0;
    435   for (uint32_t j = 0; j < m_dwNSharedObjsArray[index]; ++j) {
    436     dwIndex = m_dwIdentifierArray[offset + j];
    437     if (dwIndex >= m_dwSharedObjNumArray.size())
    438       return CPDF_DataAvail::DataNotAvailable;
    439 
    440     dwObjNum = m_dwSharedObjNumArray[dwIndex];
    441     if (dwObjNum >= static_cast<uint32_t>(nFirstPageObjNum) &&
    442         dwObjNum <
    443             static_cast<uint32_t>(nFirstPageObjNum) + m_nFirstPageSharedObjs) {
    444       continue;
    445     }
    446 
    447     dwLength = GetItemLength(dwIndex, m_szSharedObjOffsetArray);
    448     // If two objects have the same offset, it should be treated as an error.
    449     if (!dwLength)
    450       return CPDF_DataAvail::DataError;
    451 
    452     if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable(
    453             m_szSharedObjOffsetArray[dwIndex], dwLength)) {
    454       return CPDF_DataAvail::DataNotAvailable;
    455     }
    456   }
    457   return CPDF_DataAvail::DataAvailable;
    458 }
    459 
    460 bool CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) {
    461   if (!pHintStream)
    462     return false;
    463 
    464   CPDF_Dictionary* pDict = pHintStream->GetDict();
    465   CPDF_Object* pOffset = pDict ? pDict->GetObjectFor("S") : nullptr;
    466   if (!pOffset || !pOffset->IsNumber())
    467     return false;
    468 
    469   int shared_hint_table_offset = pOffset->GetInteger();
    470   if (shared_hint_table_offset <= 0)
    471     return false;
    472 
    473   auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pHintStream);
    474   pAcc->LoadAllDataFiltered();
    475 
    476   uint32_t size = pAcc->GetSize();
    477   // The header section of page offset hint table is 36 bytes.
    478   // The header section of shared object hint table is 24 bytes.
    479   // Hint table has at least 60 bytes.
    480   const uint32_t kMinStreamLength = 60;
    481   if (size < kMinStreamLength)
    482     return false;
    483 
    484   FX_SAFE_UINT32 safe_shared_hint_table_offset = shared_hint_table_offset;
    485   if (!safe_shared_hint_table_offset.IsValid() ||
    486       size < safe_shared_hint_table_offset.ValueOrDie()) {
    487     return false;
    488   }
    489 
    490   CFX_BitStream bs(pAcc->GetData(), size);
    491   return ReadPageHintTable(&bs) &&
    492          ReadSharedObjHintTable(&bs, shared_hint_table_offset);
    493 }
    494 
    495 int CPDF_HintTables::GetEndOfFirstPageOffset() const {
    496   return static_cast<int>(m_pLinearized->GetFirstPageEndOffset());
    497 }
    498 
    499 int CPDF_HintTables::GetNumberOfPages() const {
    500   return static_cast<int>(m_pLinearized->GetPageCount());
    501 }
    502 
    503 int CPDF_HintTables::GetFirstPageObjectNumber() const {
    504   return static_cast<int>(m_pLinearized->GetFirstPageObjNum());
    505 }
    506 
    507 int CPDF_HintTables::GetFirstPageNumber() const {
    508   return static_cast<int>(m_pLinearized->GetFirstPageNo());
    509 }
    510 
    511 int CPDF_HintTables::ReadPrimaryHintStreamOffset() const {
    512   return static_cast<int>(m_pLinearized->GetHintStart());
    513 }
    514 
    515 int CPDF_HintTables::ReadPrimaryHintStreamLength() const {
    516   return static_cast<int>(m_pLinearized->GetHintLength());
    517 }
    518