1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfapi/parser/cpdf_hint_tables.h" 8 9 #include <limits> 10 11 #include "core/fpdfapi/parser/cpdf_array.h" 12 #include "core/fpdfapi/parser/cpdf_data_avail.h" 13 #include "core/fpdfapi/parser/cpdf_dictionary.h" 14 #include "core/fpdfapi/parser/cpdf_document.h" 15 #include "core/fpdfapi/parser/cpdf_linearized_header.h" 16 #include "core/fpdfapi/parser/cpdf_stream.h" 17 #include "core/fpdfapi/parser/cpdf_stream_acc.h" 18 #include "core/fxcrt/fx_safe_types.h" 19 #include "third_party/base/numerics/safe_conversions.h" 20 21 namespace { 22 23 bool CanReadFromBitStream(const CFX_BitStream* hStream, 24 const FX_SAFE_UINT32& bits) { 25 return bits.IsValid() && hStream->BitsRemaining() >= bits.ValueOrDie(); 26 } 27 28 // Sanity check values from the page table header. The note in the PDF 1.7 29 // reference for Table F.3 says the valid range is only 0 through 32. Though 0 30 // is not useful either. 31 bool IsValidPageOffsetHintTableBitCount(uint32_t bits) { 32 return bits > 0 && bits <= 32; 33 } 34 35 } // namespace 36 37 CPDF_HintTables::CPDF_HintTables(CPDF_DataAvail* pDataAvail, 38 CPDF_LinearizedHeader* pLinearized) 39 : m_pDataAvail(pDataAvail), 40 m_pLinearized(pLinearized), 41 m_nFirstPageSharedObjs(0), 42 m_szFirstPageObjOffset(0) { 43 ASSERT(m_pLinearized); 44 } 45 46 CPDF_HintTables::~CPDF_HintTables() {} 47 48 uint32_t CPDF_HintTables::GetItemLength( 49 uint32_t index, 50 const std::vector<FX_FILESIZE>& szArray) { 51 if (szArray.size() < 2 || index > szArray.size() - 2 || 52 szArray[index] > szArray[index + 1]) { 53 return 0; 54 } 55 return szArray[index + 1] - szArray[index]; 56 } 57 58 bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) { 59 if (!hStream || hStream->IsEOF()) 60 return false; 61 62 int nStreamOffset = ReadPrimaryHintStreamOffset(); 63 if (nStreamOffset < 0) 64 return false; 65 66 int nStreamLen = ReadPrimaryHintStreamLength(); 67 if (nStreamLen < 1 || 68 !pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(nStreamLen)) { 69 return false; 70 } 71 72 const uint32_t kHeaderSize = 288; 73 if (hStream->BitsRemaining() < kHeaderSize) 74 return false; 75 76 // Item 1: The least number of objects in a page. 77 const uint32_t dwObjLeastNum = hStream->GetBits(32); 78 if (!dwObjLeastNum) 79 return false; 80 81 // Item 2: The location of the first page's page object. 82 const uint32_t dwFirstObjLoc = hStream->GetBits(32); 83 if (dwFirstObjLoc > static_cast<uint32_t>(nStreamOffset)) { 84 FX_SAFE_FILESIZE safeLoc = nStreamLen; 85 safeLoc += dwFirstObjLoc; 86 if (!safeLoc.IsValid()) 87 return false; 88 m_szFirstPageObjOffset = safeLoc.ValueOrDie(); 89 } else { 90 if (!pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(dwFirstObjLoc)) 91 return false; 92 m_szFirstPageObjOffset = dwFirstObjLoc; 93 } 94 95 // Item 3: The number of bits needed to represent the difference 96 // between the greatest and least number of objects in a page. 97 const uint32_t dwDeltaObjectsBits = hStream->GetBits(16); 98 if (!IsValidPageOffsetHintTableBitCount(dwDeltaObjectsBits)) 99 return false; 100 101 // Item 4: The least length of a page in bytes. 102 const uint32_t dwPageLeastLen = hStream->GetBits(32); 103 if (!dwPageLeastLen) 104 return false; 105 106 // Item 5: The number of bits needed to represent the difference 107 // between the greatest and least length of a page, in bytes. 108 const uint32_t dwDeltaPageLenBits = hStream->GetBits(16); 109 if (!IsValidPageOffsetHintTableBitCount(dwDeltaPageLenBits)) 110 return false; 111 112 // Skip Item 6, 7, 8, 9 total 96 bits. 113 hStream->SkipBits(96); 114 115 // Item 10: The number of bits needed to represent the greatest 116 // number of shared object references. 117 const uint32_t dwSharedObjBits = hStream->GetBits(16); 118 if (!IsValidPageOffsetHintTableBitCount(dwSharedObjBits)) 119 return false; 120 121 // Item 11: The number of bits needed to represent the numerically 122 // greatest shared object identifier used by the pages. 123 const uint32_t dwSharedIdBits = hStream->GetBits(16); 124 if (!IsValidPageOffsetHintTableBitCount(dwSharedIdBits)) 125 return false; 126 127 // Item 12: The number of bits needed to represent the numerator of 128 // the fractional position for each shared object reference. For each 129 // shared object referenced from a page, there is an indication of 130 // where in the page's content stream the object is first referenced. 131 const uint32_t dwSharedNumeratorBits = hStream->GetBits(16); 132 if (!IsValidPageOffsetHintTableBitCount(dwSharedNumeratorBits)) 133 return false; 134 135 // Item 13: Skip Item 13 which has 16 bits. 136 hStream->SkipBits(16); 137 138 const int nPages = GetNumberOfPages(); 139 if (nPages < 1 || nPages >= FPDF_PAGE_MAX_NUM) 140 return false; 141 142 const uint32_t dwPages = pdfium::base::checked_cast<uint32_t>(nPages); 143 FX_SAFE_UINT32 required_bits = dwDeltaObjectsBits; 144 required_bits *= dwPages; 145 if (!CanReadFromBitStream(hStream, required_bits)) 146 return false; 147 148 for (int i = 0; i < nPages; ++i) { 149 FX_SAFE_UINT32 safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits); 150 safeDeltaObj += dwObjLeastNum; 151 if (!safeDeltaObj.IsValid()) 152 return false; 153 m_dwDeltaNObjsArray.push_back(safeDeltaObj.ValueOrDie()); 154 } 155 hStream->ByteAlign(); 156 157 required_bits = dwDeltaPageLenBits; 158 required_bits *= dwPages; 159 if (!CanReadFromBitStream(hStream, required_bits)) 160 return false; 161 162 std::vector<uint32_t> dwPageLenArray; 163 for (int i = 0; i < nPages; ++i) { 164 FX_SAFE_UINT32 safePageLen = hStream->GetBits(dwDeltaPageLenBits); 165 safePageLen += dwPageLeastLen; 166 if (!safePageLen.IsValid()) 167 return false; 168 169 dwPageLenArray.push_back(safePageLen.ValueOrDie()); 170 } 171 172 int nOffsetE = GetEndOfFirstPageOffset(); 173 if (nOffsetE < 0) 174 return false; 175 176 int nFirstPageNum = GetFirstPageNumber(); 177 if (nFirstPageNum < 0 || nFirstPageNum > std::numeric_limits<int>::max() - 1) 178 return false; 179 180 for (int i = 0; i < nPages; ++i) { 181 if (i == nFirstPageNum) { 182 m_szPageOffsetArray.push_back(m_szFirstPageObjOffset); 183 } else if (i == nFirstPageNum + 1) { 184 if (i == 1) { 185 m_szPageOffsetArray.push_back(nOffsetE); 186 } else { 187 m_szPageOffsetArray.push_back(m_szPageOffsetArray[i - 2] + 188 dwPageLenArray[i - 2]); 189 } 190 } else { 191 if (i == 0) { 192 m_szPageOffsetArray.push_back(nOffsetE); 193 } else { 194 m_szPageOffsetArray.push_back(m_szPageOffsetArray[i - 1] + 195 dwPageLenArray[i - 1]); 196 } 197 } 198 } 199 200 m_szPageOffsetArray.push_back(m_szPageOffsetArray[nPages - 1] + 201 dwPageLenArray[nPages - 1]); 202 hStream->ByteAlign(); 203 204 // Number of shared objects. 205 required_bits = dwSharedObjBits; 206 required_bits *= dwPages; 207 if (!CanReadFromBitStream(hStream, required_bits)) 208 return false; 209 210 for (int i = 0; i < nPages; i++) 211 m_dwNSharedObjsArray.push_back(hStream->GetBits(dwSharedObjBits)); 212 hStream->ByteAlign(); 213 214 // Array of identifiers, size = nshared_objects. 215 for (int i = 0; i < nPages; i++) { 216 required_bits = dwSharedIdBits; 217 required_bits *= m_dwNSharedObjsArray[i]; 218 if (!CanReadFromBitStream(hStream, required_bits)) 219 return false; 220 221 for (uint32_t j = 0; j < m_dwNSharedObjsArray[i]; j++) 222 m_dwIdentifierArray.push_back(hStream->GetBits(dwSharedIdBits)); 223 } 224 hStream->ByteAlign(); 225 226 for (int i = 0; i < nPages; i++) { 227 FX_SAFE_UINT32 safeSize = m_dwNSharedObjsArray[i]; 228 safeSize *= dwSharedNumeratorBits; 229 if (!CanReadFromBitStream(hStream, safeSize)) 230 return false; 231 232 hStream->SkipBits(safeSize.ValueOrDie()); 233 } 234 hStream->ByteAlign(); 235 236 FX_SAFE_UINT32 safeTotalPageLen = dwPages; 237 safeTotalPageLen *= dwDeltaPageLenBits; 238 if (!CanReadFromBitStream(hStream, safeTotalPageLen)) 239 return false; 240 241 hStream->SkipBits(safeTotalPageLen.ValueOrDie()); 242 hStream->ByteAlign(); 243 return true; 244 } 245 246 bool CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream, 247 uint32_t offset) { 248 if (!hStream || hStream->IsEOF()) 249 return false; 250 251 int nStreamOffset = ReadPrimaryHintStreamOffset(); 252 int nStreamLen = ReadPrimaryHintStreamLength(); 253 if (nStreamOffset < 0 || nStreamLen < 1) 254 return false; 255 256 FX_SAFE_UINT32 bit_offset = offset; 257 bit_offset *= 8; 258 if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie()) 259 return false; 260 hStream->SkipBits((bit_offset - hStream->GetPos()).ValueOrDie()); 261 262 const uint32_t kHeaderSize = 192; 263 if (hStream->BitsRemaining() < kHeaderSize) 264 return false; 265 266 // Item 1: The object number of the first object in the shared objects 267 // section. 268 uint32_t dwFirstSharedObjNum = hStream->GetBits(32); 269 270 // Item 2: The location of the first object in the shared objects section. 271 uint32_t dwFirstSharedObjLoc = hStream->GetBits(32); 272 if (dwFirstSharedObjLoc > static_cast<uint32_t>(nStreamOffset)) 273 dwFirstSharedObjLoc += nStreamLen; 274 275 // Item 3: The number of shared object entries for the first page. 276 m_nFirstPageSharedObjs = hStream->GetBits(32); 277 278 // Item 4: The number of shared object entries for the shared objects 279 // section, including the number of shared object entries for the first page. 280 uint32_t dwSharedObjTotal = hStream->GetBits(32); 281 282 // Item 5: The number of bits needed to represent the greatest number of 283 // objects in a shared object group. Skipped. 284 hStream->SkipBits(16); 285 286 // Item 6: The least length of a shared object group in bytes. 287 uint32_t dwGroupLeastLen = hStream->GetBits(32); 288 289 // Item 7: The number of bits needed to represent the difference between the 290 // greatest and least length of a shared object group, in bytes. 291 uint32_t dwDeltaGroupLen = hStream->GetBits(16); 292 293 if (dwFirstSharedObjNum >= CPDF_Parser::kMaxObjectNumber || 294 m_nFirstPageSharedObjs >= CPDF_Parser::kMaxObjectNumber || 295 dwSharedObjTotal >= CPDF_Parser::kMaxObjectNumber) { 296 return false; 297 } 298 299 int nFirstPageObjNum = GetFirstPageObjectNumber(); 300 if (nFirstPageObjNum < 0) 301 return false; 302 303 uint32_t dwPrevObjLen = 0; 304 uint32_t dwCurObjLen = 0; 305 FX_SAFE_UINT32 required_bits = dwSharedObjTotal; 306 required_bits *= dwDeltaGroupLen; 307 if (!CanReadFromBitStream(hStream, required_bits)) 308 return false; 309 310 for (uint32_t i = 0; i < dwSharedObjTotal; ++i) { 311 dwPrevObjLen = dwCurObjLen; 312 FX_SAFE_UINT32 safeObjLen = hStream->GetBits(dwDeltaGroupLen); 313 safeObjLen += dwGroupLeastLen; 314 if (!safeObjLen.IsValid()) 315 return false; 316 317 dwCurObjLen = safeObjLen.ValueOrDie(); 318 if (i < m_nFirstPageSharedObjs) { 319 m_dwSharedObjNumArray.push_back(nFirstPageObjNum + i); 320 if (i == 0) 321 m_szSharedObjOffsetArray.push_back(m_szFirstPageObjOffset); 322 } else { 323 FX_SAFE_UINT32 safeObjNum = dwFirstSharedObjNum; 324 safeObjNum += i - m_nFirstPageSharedObjs; 325 if (!safeObjNum.IsValid()) 326 return false; 327 328 m_dwSharedObjNumArray.push_back(safeObjNum.ValueOrDie()); 329 if (i == m_nFirstPageSharedObjs) { 330 FX_SAFE_FILESIZE safeLoc = dwFirstSharedObjLoc; 331 if (!safeLoc.IsValid()) 332 return false; 333 334 m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie()); 335 } 336 } 337 338 if (i != 0 && i != m_nFirstPageSharedObjs) { 339 FX_SAFE_FILESIZE safeLoc = dwPrevObjLen; 340 safeLoc += m_szSharedObjOffsetArray[i - 1]; 341 if (!safeLoc.IsValid()) 342 return false; 343 344 m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie()); 345 } 346 } 347 348 if (dwSharedObjTotal > 0) { 349 FX_SAFE_FILESIZE safeLoc = dwCurObjLen; 350 safeLoc += m_szSharedObjOffsetArray[dwSharedObjTotal - 1]; 351 if (!safeLoc.IsValid()) 352 return false; 353 354 m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie()); 355 } 356 357 hStream->ByteAlign(); 358 if (hStream->BitsRemaining() < dwSharedObjTotal) 359 return false; 360 361 hStream->SkipBits(dwSharedObjTotal); 362 hStream->ByteAlign(); 363 return true; 364 } 365 366 bool CPDF_HintTables::GetPagePos(uint32_t index, 367 FX_FILESIZE* szPageStartPos, 368 FX_FILESIZE* szPageLength, 369 uint32_t* dwObjNum) { 370 *szPageStartPos = m_szPageOffsetArray[index]; 371 *szPageLength = GetItemLength(index, m_szPageOffsetArray); 372 373 int nFirstPageObjNum = GetFirstPageObjectNumber(); 374 if (nFirstPageObjNum < 0) 375 return false; 376 377 int nFirstPageNum = GetFirstPageNumber(); 378 if (!pdfium::base::IsValueInRangeForNumericType<uint32_t>(nFirstPageNum)) 379 return false; 380 381 uint32_t dwFirstPageNum = static_cast<uint32_t>(nFirstPageNum); 382 if (index == dwFirstPageNum) { 383 *dwObjNum = nFirstPageObjNum; 384 return true; 385 } 386 387 // The object number of remaining pages starts from 1. 388 *dwObjNum = 1; 389 for (uint32_t i = 0; i < index; ++i) { 390 if (i == dwFirstPageNum) 391 continue; 392 *dwObjNum += m_dwDeltaNObjsArray[i]; 393 } 394 return true; 395 } 396 397 CPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage( 398 uint32_t index, 399 CPDF_DataAvail::DownloadHints* pHints) { 400 if (!pHints) 401 return CPDF_DataAvail::DataError; 402 403 int nFirstPageNum = GetFirstPageNumber(); 404 if (!pdfium::base::IsValueInRangeForNumericType<uint32_t>(nFirstPageNum)) 405 return CPDF_DataAvail::DataError; 406 407 if (index == static_cast<uint32_t>(nFirstPageNum)) 408 return CPDF_DataAvail::DataAvailable; 409 410 uint32_t dwLength = GetItemLength(index, m_szPageOffsetArray); 411 // If two pages have the same offset, it should be treated as an error. 412 if (!dwLength) 413 return CPDF_DataAvail::DataError; 414 415 if (!m_pDataAvail->IsDataAvail(m_szPageOffsetArray[index], dwLength, pHints)) 416 return CPDF_DataAvail::DataNotAvailable; 417 418 // Download data of shared objects in the page. 419 uint32_t offset = 0; 420 for (uint32_t i = 0; i < index; ++i) 421 offset += m_dwNSharedObjsArray[i]; 422 423 int nFirstPageObjNum = GetFirstPageObjectNumber(); 424 if (nFirstPageObjNum < 0) 425 return CPDF_DataAvail::DataError; 426 427 uint32_t dwIndex = 0; 428 uint32_t dwObjNum = 0; 429 for (uint32_t j = 0; j < m_dwNSharedObjsArray[index]; ++j) { 430 dwIndex = m_dwIdentifierArray[offset + j]; 431 if (dwIndex >= m_dwSharedObjNumArray.size()) 432 return CPDF_DataAvail::DataNotAvailable; 433 434 dwObjNum = m_dwSharedObjNumArray[dwIndex]; 435 if (dwObjNum >= static_cast<uint32_t>(nFirstPageObjNum) && 436 dwObjNum < 437 static_cast<uint32_t>(nFirstPageObjNum) + m_nFirstPageSharedObjs) { 438 continue; 439 } 440 441 dwLength = GetItemLength(dwIndex, m_szSharedObjOffsetArray); 442 // If two objects have the same offset, it should be treated as an error. 443 if (!dwLength) 444 return CPDF_DataAvail::DataError; 445 446 if (!m_pDataAvail->IsDataAvail(m_szSharedObjOffsetArray[dwIndex], dwLength, 447 pHints)) { 448 return CPDF_DataAvail::DataNotAvailable; 449 } 450 } 451 return CPDF_DataAvail::DataAvailable; 452 } 453 454 bool CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) { 455 if (!pHintStream) 456 return false; 457 458 CPDF_Dictionary* pDict = pHintStream->GetDict(); 459 CPDF_Object* pOffset = pDict ? pDict->GetObjectFor("S") : nullptr; 460 if (!pOffset || !pOffset->IsNumber()) 461 return false; 462 463 int shared_hint_table_offset = pOffset->GetInteger(); 464 if (shared_hint_table_offset <= 0) 465 return false; 466 467 CPDF_StreamAcc acc; 468 acc.LoadAllData(pHintStream); 469 470 uint32_t size = acc.GetSize(); 471 // The header section of page offset hint table is 36 bytes. 472 // The header section of shared object hint table is 24 bytes. 473 // Hint table has at least 60 bytes. 474 const uint32_t kMinStreamLength = 60; 475 if (size < kMinStreamLength) 476 return false; 477 478 FX_SAFE_UINT32 safe_shared_hint_table_offset = shared_hint_table_offset; 479 if (!safe_shared_hint_table_offset.IsValid() || 480 size < safe_shared_hint_table_offset.ValueOrDie()) { 481 return false; 482 } 483 484 CFX_BitStream bs; 485 bs.Init(acc.GetData(), size); 486 return ReadPageHintTable(&bs) && 487 ReadSharedObjHintTable(&bs, shared_hint_table_offset); 488 } 489 490 int CPDF_HintTables::GetEndOfFirstPageOffset() const { 491 return static_cast<int>(m_pLinearized->GetFirstPageEndOffset()); 492 } 493 494 int CPDF_HintTables::GetNumberOfPages() const { 495 return static_cast<int>(m_pLinearized->GetPageCount()); 496 } 497 498 int CPDF_HintTables::GetFirstPageObjectNumber() const { 499 return static_cast<int>(m_pLinearized->GetFirstPageObjNum()); 500 } 501 502 int CPDF_HintTables::GetFirstPageNumber() const { 503 return static_cast<int>(m_pLinearized->GetFirstPageNo()); 504 } 505 506 int CPDF_HintTables::ReadPrimaryHintStreamOffset() const { 507 return static_cast<int>(m_pLinearized->GetHintStart()); 508 } 509 510 int CPDF_HintTables::ReadPrimaryHintStreamLength() const { 511 return static_cast<int>(m_pLinearized->GetHintLength()); 512 } 513