Home | History | Annotate | Download | only in parser
      1 // Copyright 2016 PDFium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
      6 
      7 #include "core/fpdfapi/parser/cpdf_hint_tables.h"
      8 
      9 #include <limits>
     10 
     11 #include "core/fpdfapi/parser/cpdf_array.h"
     12 #include "core/fpdfapi/parser/cpdf_data_avail.h"
     13 #include "core/fpdfapi/parser/cpdf_dictionary.h"
     14 #include "core/fpdfapi/parser/cpdf_document.h"
     15 #include "core/fpdfapi/parser/cpdf_linearized_header.h"
     16 #include "core/fpdfapi/parser/cpdf_stream.h"
     17 #include "core/fpdfapi/parser/cpdf_stream_acc.h"
     18 #include "core/fxcrt/fx_safe_types.h"
     19 #include "third_party/base/numerics/safe_conversions.h"
     20 
     21 namespace {
     22 
     23 bool CanReadFromBitStream(const CFX_BitStream* hStream,
     24                           const FX_SAFE_UINT32& bits) {
     25   return bits.IsValid() && hStream->BitsRemaining() >= bits.ValueOrDie();
     26 }
     27 
     28 // Sanity check values from the page table header. The note in the PDF 1.7
     29 // reference for Table F.3 says the valid range is only 0 through 32. Though 0
     30 // is not useful either.
     31 bool IsValidPageOffsetHintTableBitCount(uint32_t bits) {
     32   return bits > 0 && bits <= 32;
     33 }
     34 
     35 }  // namespace
     36 
     37 CPDF_HintTables::CPDF_HintTables(CPDF_DataAvail* pDataAvail,
     38                                  CPDF_LinearizedHeader* pLinearized)
     39     : m_pDataAvail(pDataAvail),
     40       m_pLinearized(pLinearized),
     41       m_nFirstPageSharedObjs(0),
     42       m_szFirstPageObjOffset(0) {
     43   ASSERT(m_pLinearized);
     44 }
     45 
     46 CPDF_HintTables::~CPDF_HintTables() {}
     47 
     48 uint32_t CPDF_HintTables::GetItemLength(
     49     uint32_t index,
     50     const std::vector<FX_FILESIZE>& szArray) {
     51   if (szArray.size() < 2 || index > szArray.size() - 2 ||
     52       szArray[index] > szArray[index + 1]) {
     53     return 0;
     54   }
     55   return szArray[index + 1] - szArray[index];
     56 }
     57 
     58 bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) {
     59   if (!hStream || hStream->IsEOF())
     60     return false;
     61 
     62   int nStreamOffset = ReadPrimaryHintStreamOffset();
     63   if (nStreamOffset < 0)
     64     return false;
     65 
     66   int nStreamLen = ReadPrimaryHintStreamLength();
     67   if (nStreamLen < 1 ||
     68       !pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(nStreamLen)) {
     69     return false;
     70   }
     71 
     72   const uint32_t kHeaderSize = 288;
     73   if (hStream->BitsRemaining() < kHeaderSize)
     74     return false;
     75 
     76   // Item 1: The least number of objects in a page.
     77   const uint32_t dwObjLeastNum = hStream->GetBits(32);
     78   if (!dwObjLeastNum)
     79     return false;
     80 
     81   // Item 2: The location of the first page's page object.
     82   const uint32_t dwFirstObjLoc = hStream->GetBits(32);
     83   if (dwFirstObjLoc > static_cast<uint32_t>(nStreamOffset)) {
     84     FX_SAFE_FILESIZE safeLoc = nStreamLen;
     85     safeLoc += dwFirstObjLoc;
     86     if (!safeLoc.IsValid())
     87       return false;
     88     m_szFirstPageObjOffset = safeLoc.ValueOrDie();
     89   } else {
     90     if (!pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(dwFirstObjLoc))
     91       return false;
     92     m_szFirstPageObjOffset = dwFirstObjLoc;
     93   }
     94 
     95   // Item 3: The number of bits needed to represent the difference
     96   // between the greatest and least number of objects in a page.
     97   const uint32_t dwDeltaObjectsBits = hStream->GetBits(16);
     98   if (!IsValidPageOffsetHintTableBitCount(dwDeltaObjectsBits))
     99     return false;
    100 
    101   // Item 4: The least length of a page in bytes.
    102   const uint32_t dwPageLeastLen = hStream->GetBits(32);
    103   if (!dwPageLeastLen)
    104     return false;
    105 
    106   // Item 5: The number of bits needed to represent the difference
    107   // between the greatest and least length of a page, in bytes.
    108   const uint32_t dwDeltaPageLenBits = hStream->GetBits(16);
    109   if (!IsValidPageOffsetHintTableBitCount(dwDeltaPageLenBits))
    110     return false;
    111 
    112   // Skip Item 6, 7, 8, 9 total 96 bits.
    113   hStream->SkipBits(96);
    114 
    115   // Item 10: The number of bits needed to represent the greatest
    116   // number of shared object references.
    117   const uint32_t dwSharedObjBits = hStream->GetBits(16);
    118   if (!IsValidPageOffsetHintTableBitCount(dwSharedObjBits))
    119     return false;
    120 
    121   // Item 11: The number of bits needed to represent the numerically
    122   // greatest shared object identifier used by the pages.
    123   const uint32_t dwSharedIdBits = hStream->GetBits(16);
    124   if (!IsValidPageOffsetHintTableBitCount(dwSharedIdBits))
    125     return false;
    126 
    127   // Item 12: The number of bits needed to represent the numerator of
    128   // the fractional position for each shared object reference. For each
    129   // shared object referenced from a page, there is an indication of
    130   // where in the page's content stream the object is first referenced.
    131   const uint32_t dwSharedNumeratorBits = hStream->GetBits(16);
    132   if (!IsValidPageOffsetHintTableBitCount(dwSharedNumeratorBits))
    133     return false;
    134 
    135   // Item 13: Skip Item 13 which has 16 bits.
    136   hStream->SkipBits(16);
    137 
    138   const int nPages = GetNumberOfPages();
    139   if (nPages < 1 || nPages >= FPDF_PAGE_MAX_NUM)
    140     return false;
    141 
    142   const uint32_t dwPages = pdfium::base::checked_cast<uint32_t>(nPages);
    143   FX_SAFE_UINT32 required_bits = dwDeltaObjectsBits;
    144   required_bits *= dwPages;
    145   if (!CanReadFromBitStream(hStream, required_bits))
    146     return false;
    147 
    148   for (int i = 0; i < nPages; ++i) {
    149     FX_SAFE_UINT32 safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits);
    150     safeDeltaObj += dwObjLeastNum;
    151     if (!safeDeltaObj.IsValid())
    152       return false;
    153     m_dwDeltaNObjsArray.push_back(safeDeltaObj.ValueOrDie());
    154   }
    155   hStream->ByteAlign();
    156 
    157   required_bits = dwDeltaPageLenBits;
    158   required_bits *= dwPages;
    159   if (!CanReadFromBitStream(hStream, required_bits))
    160     return false;
    161 
    162   std::vector<uint32_t> dwPageLenArray;
    163   for (int i = 0; i < nPages; ++i) {
    164     FX_SAFE_UINT32 safePageLen = hStream->GetBits(dwDeltaPageLenBits);
    165     safePageLen += dwPageLeastLen;
    166     if (!safePageLen.IsValid())
    167       return false;
    168 
    169     dwPageLenArray.push_back(safePageLen.ValueOrDie());
    170   }
    171 
    172   int nOffsetE = GetEndOfFirstPageOffset();
    173   if (nOffsetE < 0)
    174     return false;
    175 
    176   int nFirstPageNum = GetFirstPageNumber();
    177   if (nFirstPageNum < 0 || nFirstPageNum > std::numeric_limits<int>::max() - 1)
    178     return false;
    179 
    180   for (int i = 0; i < nPages; ++i) {
    181     if (i == nFirstPageNum) {
    182       m_szPageOffsetArray.push_back(m_szFirstPageObjOffset);
    183     } else if (i == nFirstPageNum + 1) {
    184       if (i == 1) {
    185         m_szPageOffsetArray.push_back(nOffsetE);
    186       } else {
    187         m_szPageOffsetArray.push_back(m_szPageOffsetArray[i - 2] +
    188                                       dwPageLenArray[i - 2]);
    189       }
    190     } else {
    191       if (i == 0) {
    192         m_szPageOffsetArray.push_back(nOffsetE);
    193       } else {
    194         m_szPageOffsetArray.push_back(m_szPageOffsetArray[i - 1] +
    195                                       dwPageLenArray[i - 1]);
    196       }
    197     }
    198   }
    199 
    200   m_szPageOffsetArray.push_back(m_szPageOffsetArray[nPages - 1] +
    201                                 dwPageLenArray[nPages - 1]);
    202   hStream->ByteAlign();
    203 
    204   // Number of shared objects.
    205   required_bits = dwSharedObjBits;
    206   required_bits *= dwPages;
    207   if (!CanReadFromBitStream(hStream, required_bits))
    208     return false;
    209 
    210   for (int i = 0; i < nPages; i++)
    211     m_dwNSharedObjsArray.push_back(hStream->GetBits(dwSharedObjBits));
    212   hStream->ByteAlign();
    213 
    214   // Array of identifiers, size = nshared_objects.
    215   for (int i = 0; i < nPages; i++) {
    216     required_bits = dwSharedIdBits;
    217     required_bits *= m_dwNSharedObjsArray[i];
    218     if (!CanReadFromBitStream(hStream, required_bits))
    219       return false;
    220 
    221     for (uint32_t j = 0; j < m_dwNSharedObjsArray[i]; j++)
    222       m_dwIdentifierArray.push_back(hStream->GetBits(dwSharedIdBits));
    223   }
    224   hStream->ByteAlign();
    225 
    226   for (int i = 0; i < nPages; i++) {
    227     FX_SAFE_UINT32 safeSize = m_dwNSharedObjsArray[i];
    228     safeSize *= dwSharedNumeratorBits;
    229     if (!CanReadFromBitStream(hStream, safeSize))
    230       return false;
    231 
    232     hStream->SkipBits(safeSize.ValueOrDie());
    233   }
    234   hStream->ByteAlign();
    235 
    236   FX_SAFE_UINT32 safeTotalPageLen = dwPages;
    237   safeTotalPageLen *= dwDeltaPageLenBits;
    238   if (!CanReadFromBitStream(hStream, safeTotalPageLen))
    239     return false;
    240 
    241   hStream->SkipBits(safeTotalPageLen.ValueOrDie());
    242   hStream->ByteAlign();
    243   return true;
    244 }
    245 
    246 bool CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream,
    247                                              uint32_t offset) {
    248   if (!hStream || hStream->IsEOF())
    249     return false;
    250 
    251   int nStreamOffset = ReadPrimaryHintStreamOffset();
    252   int nStreamLen = ReadPrimaryHintStreamLength();
    253   if (nStreamOffset < 0 || nStreamLen < 1)
    254     return false;
    255 
    256   FX_SAFE_UINT32 bit_offset = offset;
    257   bit_offset *= 8;
    258   if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie())
    259     return false;
    260   hStream->SkipBits((bit_offset - hStream->GetPos()).ValueOrDie());
    261 
    262   const uint32_t kHeaderSize = 192;
    263   if (hStream->BitsRemaining() < kHeaderSize)
    264     return false;
    265 
    266   // Item 1: The object number of the first object in the shared objects
    267   // section.
    268   uint32_t dwFirstSharedObjNum = hStream->GetBits(32);
    269 
    270   // Item 2: The location of the first object in the shared objects section.
    271   uint32_t dwFirstSharedObjLoc = hStream->GetBits(32);
    272   if (dwFirstSharedObjLoc > static_cast<uint32_t>(nStreamOffset))
    273     dwFirstSharedObjLoc += nStreamLen;
    274 
    275   // Item 3: The number of shared object entries for the first page.
    276   m_nFirstPageSharedObjs = hStream->GetBits(32);
    277 
    278   // Item 4: The number of shared object entries for the shared objects
    279   // section, including the number of shared object entries for the first page.
    280   uint32_t dwSharedObjTotal = hStream->GetBits(32);
    281 
    282   // Item 5: The number of bits needed to represent the greatest number of
    283   // objects in a shared object group. Skipped.
    284   hStream->SkipBits(16);
    285 
    286   // Item 6: The least length of a shared object group in bytes.
    287   uint32_t dwGroupLeastLen = hStream->GetBits(32);
    288 
    289   // Item 7: The number of bits needed to represent the difference between the
    290   // greatest and least length of a shared object group, in bytes.
    291   uint32_t dwDeltaGroupLen = hStream->GetBits(16);
    292 
    293   if (dwFirstSharedObjNum >= CPDF_Parser::kMaxObjectNumber ||
    294       m_nFirstPageSharedObjs >= CPDF_Parser::kMaxObjectNumber ||
    295       dwSharedObjTotal >= CPDF_Parser::kMaxObjectNumber) {
    296     return false;
    297   }
    298 
    299   int nFirstPageObjNum = GetFirstPageObjectNumber();
    300   if (nFirstPageObjNum < 0)
    301     return false;
    302 
    303   uint32_t dwPrevObjLen = 0;
    304   uint32_t dwCurObjLen = 0;
    305   FX_SAFE_UINT32 required_bits = dwSharedObjTotal;
    306   required_bits *= dwDeltaGroupLen;
    307   if (!CanReadFromBitStream(hStream, required_bits))
    308     return false;
    309 
    310   for (uint32_t i = 0; i < dwSharedObjTotal; ++i) {
    311     dwPrevObjLen = dwCurObjLen;
    312     FX_SAFE_UINT32 safeObjLen = hStream->GetBits(dwDeltaGroupLen);
    313     safeObjLen += dwGroupLeastLen;
    314     if (!safeObjLen.IsValid())
    315       return false;
    316 
    317     dwCurObjLen = safeObjLen.ValueOrDie();
    318     if (i < m_nFirstPageSharedObjs) {
    319       m_dwSharedObjNumArray.push_back(nFirstPageObjNum + i);
    320       if (i == 0)
    321         m_szSharedObjOffsetArray.push_back(m_szFirstPageObjOffset);
    322     } else {
    323       FX_SAFE_UINT32 safeObjNum = dwFirstSharedObjNum;
    324       safeObjNum += i - m_nFirstPageSharedObjs;
    325       if (!safeObjNum.IsValid())
    326         return false;
    327 
    328       m_dwSharedObjNumArray.push_back(safeObjNum.ValueOrDie());
    329       if (i == m_nFirstPageSharedObjs) {
    330         FX_SAFE_FILESIZE safeLoc = dwFirstSharedObjLoc;
    331         if (!safeLoc.IsValid())
    332           return false;
    333 
    334         m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie());
    335       }
    336     }
    337 
    338     if (i != 0 && i != m_nFirstPageSharedObjs) {
    339       FX_SAFE_FILESIZE safeLoc = dwPrevObjLen;
    340       safeLoc += m_szSharedObjOffsetArray[i - 1];
    341       if (!safeLoc.IsValid())
    342         return false;
    343 
    344       m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie());
    345     }
    346   }
    347 
    348   if (dwSharedObjTotal > 0) {
    349     FX_SAFE_FILESIZE safeLoc = dwCurObjLen;
    350     safeLoc += m_szSharedObjOffsetArray[dwSharedObjTotal - 1];
    351     if (!safeLoc.IsValid())
    352       return false;
    353 
    354     m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie());
    355   }
    356 
    357   hStream->ByteAlign();
    358   if (hStream->BitsRemaining() < dwSharedObjTotal)
    359     return false;
    360 
    361   hStream->SkipBits(dwSharedObjTotal);
    362   hStream->ByteAlign();
    363   return true;
    364 }
    365 
    366 bool CPDF_HintTables::GetPagePos(uint32_t index,
    367                                  FX_FILESIZE* szPageStartPos,
    368                                  FX_FILESIZE* szPageLength,
    369                                  uint32_t* dwObjNum) {
    370   *szPageStartPos = m_szPageOffsetArray[index];
    371   *szPageLength = GetItemLength(index, m_szPageOffsetArray);
    372 
    373   int nFirstPageObjNum = GetFirstPageObjectNumber();
    374   if (nFirstPageObjNum < 0)
    375     return false;
    376 
    377   int nFirstPageNum = GetFirstPageNumber();
    378   if (!pdfium::base::IsValueInRangeForNumericType<uint32_t>(nFirstPageNum))
    379     return false;
    380 
    381   uint32_t dwFirstPageNum = static_cast<uint32_t>(nFirstPageNum);
    382   if (index == dwFirstPageNum) {
    383     *dwObjNum = nFirstPageObjNum;
    384     return true;
    385   }
    386 
    387   // The object number of remaining pages starts from 1.
    388   *dwObjNum = 1;
    389   for (uint32_t i = 0; i < index; ++i) {
    390     if (i == dwFirstPageNum)
    391       continue;
    392     *dwObjNum += m_dwDeltaNObjsArray[i];
    393   }
    394   return true;
    395 }
    396 
    397 CPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage(
    398     uint32_t index,
    399     CPDF_DataAvail::DownloadHints* pHints) {
    400   if (!pHints)
    401     return CPDF_DataAvail::DataError;
    402 
    403   int nFirstPageNum = GetFirstPageNumber();
    404   if (!pdfium::base::IsValueInRangeForNumericType<uint32_t>(nFirstPageNum))
    405     return CPDF_DataAvail::DataError;
    406 
    407   if (index == static_cast<uint32_t>(nFirstPageNum))
    408     return CPDF_DataAvail::DataAvailable;
    409 
    410   uint32_t dwLength = GetItemLength(index, m_szPageOffsetArray);
    411   // If two pages have the same offset, it should be treated as an error.
    412   if (!dwLength)
    413     return CPDF_DataAvail::DataError;
    414 
    415   if (!m_pDataAvail->IsDataAvail(m_szPageOffsetArray[index], dwLength, pHints))
    416     return CPDF_DataAvail::DataNotAvailable;
    417 
    418   // Download data of shared objects in the page.
    419   uint32_t offset = 0;
    420   for (uint32_t i = 0; i < index; ++i)
    421     offset += m_dwNSharedObjsArray[i];
    422 
    423   int nFirstPageObjNum = GetFirstPageObjectNumber();
    424   if (nFirstPageObjNum < 0)
    425     return CPDF_DataAvail::DataError;
    426 
    427   uint32_t dwIndex = 0;
    428   uint32_t dwObjNum = 0;
    429   for (uint32_t j = 0; j < m_dwNSharedObjsArray[index]; ++j) {
    430     dwIndex = m_dwIdentifierArray[offset + j];
    431     if (dwIndex >= m_dwSharedObjNumArray.size())
    432       return CPDF_DataAvail::DataNotAvailable;
    433 
    434     dwObjNum = m_dwSharedObjNumArray[dwIndex];
    435     if (dwObjNum >= static_cast<uint32_t>(nFirstPageObjNum) &&
    436         dwObjNum <
    437             static_cast<uint32_t>(nFirstPageObjNum) + m_nFirstPageSharedObjs) {
    438       continue;
    439     }
    440 
    441     dwLength = GetItemLength(dwIndex, m_szSharedObjOffsetArray);
    442     // If two objects have the same offset, it should be treated as an error.
    443     if (!dwLength)
    444       return CPDF_DataAvail::DataError;
    445 
    446     if (!m_pDataAvail->IsDataAvail(m_szSharedObjOffsetArray[dwIndex], dwLength,
    447                                    pHints)) {
    448       return CPDF_DataAvail::DataNotAvailable;
    449     }
    450   }
    451   return CPDF_DataAvail::DataAvailable;
    452 }
    453 
    454 bool CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) {
    455   if (!pHintStream)
    456     return false;
    457 
    458   CPDF_Dictionary* pDict = pHintStream->GetDict();
    459   CPDF_Object* pOffset = pDict ? pDict->GetObjectFor("S") : nullptr;
    460   if (!pOffset || !pOffset->IsNumber())
    461     return false;
    462 
    463   int shared_hint_table_offset = pOffset->GetInteger();
    464   if (shared_hint_table_offset <= 0)
    465     return false;
    466 
    467   CPDF_StreamAcc acc;
    468   acc.LoadAllData(pHintStream);
    469 
    470   uint32_t size = acc.GetSize();
    471   // The header section of page offset hint table is 36 bytes.
    472   // The header section of shared object hint table is 24 bytes.
    473   // Hint table has at least 60 bytes.
    474   const uint32_t kMinStreamLength = 60;
    475   if (size < kMinStreamLength)
    476     return false;
    477 
    478   FX_SAFE_UINT32 safe_shared_hint_table_offset = shared_hint_table_offset;
    479   if (!safe_shared_hint_table_offset.IsValid() ||
    480       size < safe_shared_hint_table_offset.ValueOrDie()) {
    481     return false;
    482   }
    483 
    484   CFX_BitStream bs;
    485   bs.Init(acc.GetData(), size);
    486   return ReadPageHintTable(&bs) &&
    487          ReadSharedObjHintTable(&bs, shared_hint_table_offset);
    488 }
    489 
    490 int CPDF_HintTables::GetEndOfFirstPageOffset() const {
    491   return static_cast<int>(m_pLinearized->GetFirstPageEndOffset());
    492 }
    493 
    494 int CPDF_HintTables::GetNumberOfPages() const {
    495   return static_cast<int>(m_pLinearized->GetPageCount());
    496 }
    497 
    498 int CPDF_HintTables::GetFirstPageObjectNumber() const {
    499   return static_cast<int>(m_pLinearized->GetFirstPageObjNum());
    500 }
    501 
    502 int CPDF_HintTables::GetFirstPageNumber() const {
    503   return static_cast<int>(m_pLinearized->GetFirstPageNo());
    504 }
    505 
    506 int CPDF_HintTables::ReadPrimaryHintStreamOffset() const {
    507   return static_cast<int>(m_pLinearized->GetHintStart());
    508 }
    509 
    510 int CPDF_HintTables::ReadPrimaryHintStreamLength() const {
    511   return static_cast<int>(m_pLinearized->GetHintLength());
    512 }
    513