1 // Copyright 2016 PDFium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com 6 7 #include "core/fpdfapi/parser/cpdf_hint_tables.h" 8 9 #include <limits> 10 11 #include "core/fpdfapi/parser/cpdf_array.h" 12 #include "core/fpdfapi/parser/cpdf_data_avail.h" 13 #include "core/fpdfapi/parser/cpdf_dictionary.h" 14 #include "core/fpdfapi/parser/cpdf_document.h" 15 #include "core/fpdfapi/parser/cpdf_linearized_header.h" 16 #include "core/fpdfapi/parser/cpdf_read_validator.h" 17 #include "core/fpdfapi/parser/cpdf_stream.h" 18 #include "core/fpdfapi/parser/cpdf_stream_acc.h" 19 #include "core/fxcrt/cfx_bitstream.h" 20 #include "core/fxcrt/fx_safe_types.h" 21 #include "third_party/base/numerics/safe_conversions.h" 22 23 namespace { 24 25 bool CanReadFromBitStream(const CFX_BitStream* hStream, 26 const FX_SAFE_UINT32& bits) { 27 return bits.IsValid() && hStream->BitsRemaining() >= bits.ValueOrDie(); 28 } 29 30 // Sanity check values from the page table header. The note in the PDF 1.7 31 // reference for Table F.3 says the valid range is only 0 through 32. Though 0 32 // is not useful either. 33 bool IsValidPageOffsetHintTableBitCount(uint32_t bits) { 34 return bits > 0 && bits <= 32; 35 } 36 37 } // namespace 38 39 CPDF_HintTables::CPDF_HintTables(CPDF_ReadValidator* pValidator, 40 CPDF_LinearizedHeader* pLinearized) 41 : m_pValidator(pValidator), 42 m_pLinearized(pLinearized), 43 m_nFirstPageSharedObjs(0), 44 m_szFirstPageObjOffset(0) { 45 ASSERT(m_pLinearized); 46 } 47 48 CPDF_HintTables::~CPDF_HintTables() {} 49 50 uint32_t CPDF_HintTables::GetItemLength( 51 uint32_t index, 52 const std::vector<FX_FILESIZE>& szArray) const { 53 if (szArray.size() < 2 || index > szArray.size() - 2 || 54 szArray[index] > szArray[index + 1]) { 55 return 0; 56 } 57 return szArray[index + 1] - szArray[index]; 58 } 59 60 bool CPDF_HintTables::ReadPageHintTable(CFX_BitStream* hStream) { 61 if (!hStream || hStream->IsEOF()) 62 return false; 63 64 int nStreamOffset = ReadPrimaryHintStreamOffset(); 65 if (nStreamOffset < 0) 66 return false; 67 68 int nStreamLen = ReadPrimaryHintStreamLength(); 69 if (nStreamLen < 1 || 70 !pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(nStreamLen)) { 71 return false; 72 } 73 74 const uint32_t kHeaderSize = 288; 75 if (hStream->BitsRemaining() < kHeaderSize) 76 return false; 77 78 // Item 1: The least number of objects in a page. 79 const uint32_t dwObjLeastNum = hStream->GetBits(32); 80 if (!dwObjLeastNum) 81 return false; 82 83 // Item 2: The location of the first page's page object. 84 const uint32_t dwFirstObjLoc = hStream->GetBits(32); 85 if (dwFirstObjLoc > static_cast<uint32_t>(nStreamOffset)) { 86 FX_SAFE_FILESIZE safeLoc = nStreamLen; 87 safeLoc += dwFirstObjLoc; 88 if (!safeLoc.IsValid()) 89 return false; 90 m_szFirstPageObjOffset = safeLoc.ValueOrDie(); 91 } else { 92 if (!pdfium::base::IsValueInRangeForNumericType<FX_FILESIZE>(dwFirstObjLoc)) 93 return false; 94 m_szFirstPageObjOffset = dwFirstObjLoc; 95 } 96 97 // Item 3: The number of bits needed to represent the difference 98 // between the greatest and least number of objects in a page. 99 const uint32_t dwDeltaObjectsBits = hStream->GetBits(16); 100 if (!IsValidPageOffsetHintTableBitCount(dwDeltaObjectsBits)) 101 return false; 102 103 // Item 4: The least length of a page in bytes. 104 const uint32_t dwPageLeastLen = hStream->GetBits(32); 105 if (!dwPageLeastLen) 106 return false; 107 108 // Item 5: The number of bits needed to represent the difference 109 // between the greatest and least length of a page, in bytes. 110 const uint32_t dwDeltaPageLenBits = hStream->GetBits(16); 111 if (!IsValidPageOffsetHintTableBitCount(dwDeltaPageLenBits)) 112 return false; 113 114 // Skip Item 6, 7, 8, 9 total 96 bits. 115 hStream->SkipBits(96); 116 117 // Item 10: The number of bits needed to represent the greatest 118 // number of shared object references. 119 const uint32_t dwSharedObjBits = hStream->GetBits(16); 120 if (!IsValidPageOffsetHintTableBitCount(dwSharedObjBits)) 121 return false; 122 123 // Item 11: The number of bits needed to represent the numerically 124 // greatest shared object identifier used by the pages. 125 const uint32_t dwSharedIdBits = hStream->GetBits(16); 126 if (!IsValidPageOffsetHintTableBitCount(dwSharedIdBits)) 127 return false; 128 129 // Item 12: The number of bits needed to represent the numerator of 130 // the fractional position for each shared object reference. For each 131 // shared object referenced from a page, there is an indication of 132 // where in the page's content stream the object is first referenced. 133 const uint32_t dwSharedNumeratorBits = hStream->GetBits(16); 134 if (!IsValidPageOffsetHintTableBitCount(dwSharedNumeratorBits)) 135 return false; 136 137 // Item 13: Skip Item 13 which has 16 bits. 138 hStream->SkipBits(16); 139 140 const int nPages = GetNumberOfPages(); 141 if (nPages < 1 || nPages >= FPDF_PAGE_MAX_NUM) 142 return false; 143 144 const uint32_t dwPages = pdfium::base::checked_cast<uint32_t>(nPages); 145 FX_SAFE_UINT32 required_bits = dwDeltaObjectsBits; 146 required_bits *= dwPages; 147 if (!CanReadFromBitStream(hStream, required_bits)) 148 return false; 149 150 for (int i = 0; i < nPages; ++i) { 151 FX_SAFE_UINT32 safeDeltaObj = hStream->GetBits(dwDeltaObjectsBits); 152 safeDeltaObj += dwObjLeastNum; 153 if (!safeDeltaObj.IsValid()) 154 return false; 155 m_dwDeltaNObjsArray.push_back(safeDeltaObj.ValueOrDie()); 156 } 157 hStream->ByteAlign(); 158 159 required_bits = dwDeltaPageLenBits; 160 required_bits *= dwPages; 161 if (!CanReadFromBitStream(hStream, required_bits)) 162 return false; 163 164 std::vector<uint32_t> dwPageLenArray; 165 for (int i = 0; i < nPages; ++i) { 166 FX_SAFE_UINT32 safePageLen = hStream->GetBits(dwDeltaPageLenBits); 167 safePageLen += dwPageLeastLen; 168 if (!safePageLen.IsValid()) 169 return false; 170 171 dwPageLenArray.push_back(safePageLen.ValueOrDie()); 172 } 173 174 int nOffsetE = GetEndOfFirstPageOffset(); 175 if (nOffsetE < 0) 176 return false; 177 178 int nFirstPageNum = GetFirstPageNumber(); 179 if (nFirstPageNum < 0 || nFirstPageNum > std::numeric_limits<int>::max() - 1) 180 return false; 181 182 for (int i = 0; i < nPages; ++i) { 183 if (i == nFirstPageNum) { 184 m_szPageOffsetArray.push_back(m_szFirstPageObjOffset); 185 } else if (i == nFirstPageNum + 1) { 186 if (i == 1) { 187 m_szPageOffsetArray.push_back(nOffsetE); 188 } else { 189 m_szPageOffsetArray.push_back(m_szPageOffsetArray[i - 2] + 190 dwPageLenArray[i - 2]); 191 } 192 } else { 193 if (i == 0) { 194 m_szPageOffsetArray.push_back(nOffsetE); 195 } else { 196 m_szPageOffsetArray.push_back(m_szPageOffsetArray[i - 1] + 197 dwPageLenArray[i - 1]); 198 } 199 } 200 } 201 202 m_szPageOffsetArray.push_back(m_szPageOffsetArray[nPages - 1] + 203 dwPageLenArray[nPages - 1]); 204 hStream->ByteAlign(); 205 206 // Number of shared objects. 207 required_bits = dwSharedObjBits; 208 required_bits *= dwPages; 209 if (!CanReadFromBitStream(hStream, required_bits)) 210 return false; 211 212 for (int i = 0; i < nPages; i++) 213 m_dwNSharedObjsArray.push_back(hStream->GetBits(dwSharedObjBits)); 214 hStream->ByteAlign(); 215 216 // Array of identifiers, size = nshared_objects. 217 for (int i = 0; i < nPages; i++) { 218 required_bits = dwSharedIdBits; 219 required_bits *= m_dwNSharedObjsArray[i]; 220 if (!CanReadFromBitStream(hStream, required_bits)) 221 return false; 222 223 for (uint32_t j = 0; j < m_dwNSharedObjsArray[i]; j++) 224 m_dwIdentifierArray.push_back(hStream->GetBits(dwSharedIdBits)); 225 } 226 hStream->ByteAlign(); 227 228 for (int i = 0; i < nPages; i++) { 229 FX_SAFE_UINT32 safeSize = m_dwNSharedObjsArray[i]; 230 safeSize *= dwSharedNumeratorBits; 231 if (!CanReadFromBitStream(hStream, safeSize)) 232 return false; 233 234 hStream->SkipBits(safeSize.ValueOrDie()); 235 } 236 hStream->ByteAlign(); 237 238 FX_SAFE_UINT32 safeTotalPageLen = dwPages; 239 safeTotalPageLen *= dwDeltaPageLenBits; 240 if (!CanReadFromBitStream(hStream, safeTotalPageLen)) 241 return false; 242 243 hStream->SkipBits(safeTotalPageLen.ValueOrDie()); 244 hStream->ByteAlign(); 245 return true; 246 } 247 248 bool CPDF_HintTables::ReadSharedObjHintTable(CFX_BitStream* hStream, 249 uint32_t offset) { 250 if (!hStream || hStream->IsEOF()) 251 return false; 252 253 int nStreamOffset = ReadPrimaryHintStreamOffset(); 254 int nStreamLen = ReadPrimaryHintStreamLength(); 255 if (nStreamOffset < 0 || nStreamLen < 1) 256 return false; 257 258 FX_SAFE_UINT32 bit_offset = offset; 259 bit_offset *= 8; 260 if (!bit_offset.IsValid() || hStream->GetPos() > bit_offset.ValueOrDie()) 261 return false; 262 hStream->SkipBits((bit_offset - hStream->GetPos()).ValueOrDie()); 263 264 const uint32_t kHeaderSize = 192; 265 if (hStream->BitsRemaining() < kHeaderSize) 266 return false; 267 268 // Item 1: The object number of the first object in the shared objects 269 // section. 270 uint32_t dwFirstSharedObjNum = hStream->GetBits(32); 271 272 // Item 2: The location of the first object in the shared objects section. 273 uint32_t dwFirstSharedObjLoc = hStream->GetBits(32); 274 if (dwFirstSharedObjLoc > static_cast<uint32_t>(nStreamOffset)) 275 dwFirstSharedObjLoc += nStreamLen; 276 277 // Item 3: The number of shared object entries for the first page. 278 m_nFirstPageSharedObjs = hStream->GetBits(32); 279 280 // Item 4: The number of shared object entries for the shared objects 281 // section, including the number of shared object entries for the first page. 282 uint32_t dwSharedObjTotal = hStream->GetBits(32); 283 284 // Item 5: The number of bits needed to represent the greatest number of 285 // objects in a shared object group. Skipped. 286 hStream->SkipBits(16); 287 288 // Item 6: The least length of a shared object group in bytes. 289 uint32_t dwGroupLeastLen = hStream->GetBits(32); 290 291 // Item 7: The number of bits needed to represent the difference between the 292 // greatest and least length of a shared object group, in bytes. 293 uint32_t dwDeltaGroupLen = hStream->GetBits(16); 294 295 // Trying to decode more than 32 bits isn't going to work when we write into 296 // a uint32_t. 297 if (dwDeltaGroupLen > 31) 298 return false; 299 300 if (dwFirstSharedObjNum >= CPDF_Parser::kMaxObjectNumber || 301 m_nFirstPageSharedObjs >= CPDF_Parser::kMaxObjectNumber || 302 dwSharedObjTotal >= CPDF_Parser::kMaxObjectNumber) { 303 return false; 304 } 305 306 int nFirstPageObjNum = GetFirstPageObjectNumber(); 307 if (nFirstPageObjNum < 0) 308 return false; 309 310 uint32_t dwPrevObjLen = 0; 311 uint32_t dwCurObjLen = 0; 312 FX_SAFE_UINT32 required_bits = dwSharedObjTotal; 313 required_bits *= dwDeltaGroupLen; 314 if (!CanReadFromBitStream(hStream, required_bits)) 315 return false; 316 317 for (uint32_t i = 0; i < dwSharedObjTotal; ++i) { 318 dwPrevObjLen = dwCurObjLen; 319 FX_SAFE_UINT32 safeObjLen = hStream->GetBits(dwDeltaGroupLen); 320 safeObjLen += dwGroupLeastLen; 321 if (!safeObjLen.IsValid()) 322 return false; 323 324 dwCurObjLen = safeObjLen.ValueOrDie(); 325 if (i < m_nFirstPageSharedObjs) { 326 m_dwSharedObjNumArray.push_back(nFirstPageObjNum + i); 327 if (i == 0) 328 m_szSharedObjOffsetArray.push_back(m_szFirstPageObjOffset); 329 } else { 330 FX_SAFE_UINT32 safeObjNum = dwFirstSharedObjNum; 331 safeObjNum += i - m_nFirstPageSharedObjs; 332 if (!safeObjNum.IsValid()) 333 return false; 334 335 m_dwSharedObjNumArray.push_back(safeObjNum.ValueOrDie()); 336 if (i == m_nFirstPageSharedObjs) { 337 FX_SAFE_FILESIZE safeLoc = dwFirstSharedObjLoc; 338 if (!safeLoc.IsValid()) 339 return false; 340 341 m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie()); 342 } 343 } 344 345 if (i != 0 && i != m_nFirstPageSharedObjs) { 346 FX_SAFE_FILESIZE safeLoc = dwPrevObjLen; 347 safeLoc += m_szSharedObjOffsetArray[i - 1]; 348 if (!safeLoc.IsValid()) 349 return false; 350 351 m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie()); 352 } 353 } 354 355 if (dwSharedObjTotal > 0) { 356 FX_SAFE_FILESIZE safeLoc = dwCurObjLen; 357 safeLoc += m_szSharedObjOffsetArray[dwSharedObjTotal - 1]; 358 if (!safeLoc.IsValid()) 359 return false; 360 361 m_szSharedObjOffsetArray.push_back(safeLoc.ValueOrDie()); 362 } 363 364 hStream->ByteAlign(); 365 if (hStream->BitsRemaining() < dwSharedObjTotal) 366 return false; 367 368 hStream->SkipBits(dwSharedObjTotal); 369 hStream->ByteAlign(); 370 return true; 371 } 372 373 bool CPDF_HintTables::GetPagePos(uint32_t index, 374 FX_FILESIZE* szPageStartPos, 375 FX_FILESIZE* szPageLength, 376 uint32_t* dwObjNum) const { 377 if (index >= m_pLinearized->GetPageCount()) 378 return false; 379 380 *szPageStartPos = m_szPageOffsetArray[index]; 381 *szPageLength = GetItemLength(index, m_szPageOffsetArray); 382 383 int nFirstPageObjNum = GetFirstPageObjectNumber(); 384 if (nFirstPageObjNum < 0) 385 return false; 386 387 int nFirstPageNum = GetFirstPageNumber(); 388 if (!pdfium::base::IsValueInRangeForNumericType<uint32_t>(nFirstPageNum)) 389 return false; 390 391 uint32_t dwFirstPageNum = static_cast<uint32_t>(nFirstPageNum); 392 if (index == dwFirstPageNum) { 393 *dwObjNum = nFirstPageObjNum; 394 return true; 395 } 396 397 // The object number of remaining pages starts from 1. 398 *dwObjNum = 1; 399 for (uint32_t i = 0; i < index; ++i) { 400 if (i == dwFirstPageNum) 401 continue; 402 *dwObjNum += m_dwDeltaNObjsArray[i]; 403 } 404 return true; 405 } 406 407 CPDF_DataAvail::DocAvailStatus CPDF_HintTables::CheckPage(uint32_t index) { 408 int nFirstPageNum = GetFirstPageNumber(); 409 if (!pdfium::base::IsValueInRangeForNumericType<uint32_t>(nFirstPageNum)) 410 return CPDF_DataAvail::DataError; 411 412 if (index == static_cast<uint32_t>(nFirstPageNum)) 413 return CPDF_DataAvail::DataAvailable; 414 415 uint32_t dwLength = GetItemLength(index, m_szPageOffsetArray); 416 // If two pages have the same offset, it should be treated as an error. 417 if (!dwLength) 418 return CPDF_DataAvail::DataError; 419 420 if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable( 421 m_szPageOffsetArray[index], dwLength)) 422 return CPDF_DataAvail::DataNotAvailable; 423 424 // Download data of shared objects in the page. 425 uint32_t offset = 0; 426 for (uint32_t i = 0; i < index; ++i) 427 offset += m_dwNSharedObjsArray[i]; 428 429 int nFirstPageObjNum = GetFirstPageObjectNumber(); 430 if (nFirstPageObjNum < 0) 431 return CPDF_DataAvail::DataError; 432 433 uint32_t dwIndex = 0; 434 uint32_t dwObjNum = 0; 435 for (uint32_t j = 0; j < m_dwNSharedObjsArray[index]; ++j) { 436 dwIndex = m_dwIdentifierArray[offset + j]; 437 if (dwIndex >= m_dwSharedObjNumArray.size()) 438 return CPDF_DataAvail::DataNotAvailable; 439 440 dwObjNum = m_dwSharedObjNumArray[dwIndex]; 441 if (dwObjNum >= static_cast<uint32_t>(nFirstPageObjNum) && 442 dwObjNum < 443 static_cast<uint32_t>(nFirstPageObjNum) + m_nFirstPageSharedObjs) { 444 continue; 445 } 446 447 dwLength = GetItemLength(dwIndex, m_szSharedObjOffsetArray); 448 // If two objects have the same offset, it should be treated as an error. 449 if (!dwLength) 450 return CPDF_DataAvail::DataError; 451 452 if (!m_pValidator->CheckDataRangeAndRequestIfUnavailable( 453 m_szSharedObjOffsetArray[dwIndex], dwLength)) { 454 return CPDF_DataAvail::DataNotAvailable; 455 } 456 } 457 return CPDF_DataAvail::DataAvailable; 458 } 459 460 bool CPDF_HintTables::LoadHintStream(CPDF_Stream* pHintStream) { 461 if (!pHintStream) 462 return false; 463 464 CPDF_Dictionary* pDict = pHintStream->GetDict(); 465 CPDF_Object* pOffset = pDict ? pDict->GetObjectFor("S") : nullptr; 466 if (!pOffset || !pOffset->IsNumber()) 467 return false; 468 469 int shared_hint_table_offset = pOffset->GetInteger(); 470 if (shared_hint_table_offset <= 0) 471 return false; 472 473 auto pAcc = pdfium::MakeRetain<CPDF_StreamAcc>(pHintStream); 474 pAcc->LoadAllDataFiltered(); 475 476 uint32_t size = pAcc->GetSize(); 477 // The header section of page offset hint table is 36 bytes. 478 // The header section of shared object hint table is 24 bytes. 479 // Hint table has at least 60 bytes. 480 const uint32_t kMinStreamLength = 60; 481 if (size < kMinStreamLength) 482 return false; 483 484 FX_SAFE_UINT32 safe_shared_hint_table_offset = shared_hint_table_offset; 485 if (!safe_shared_hint_table_offset.IsValid() || 486 size < safe_shared_hint_table_offset.ValueOrDie()) { 487 return false; 488 } 489 490 CFX_BitStream bs(pAcc->GetData(), size); 491 return ReadPageHintTable(&bs) && 492 ReadSharedObjHintTable(&bs, shared_hint_table_offset); 493 } 494 495 int CPDF_HintTables::GetEndOfFirstPageOffset() const { 496 return static_cast<int>(m_pLinearized->GetFirstPageEndOffset()); 497 } 498 499 int CPDF_HintTables::GetNumberOfPages() const { 500 return static_cast<int>(m_pLinearized->GetPageCount()); 501 } 502 503 int CPDF_HintTables::GetFirstPageObjectNumber() const { 504 return static_cast<int>(m_pLinearized->GetFirstPageObjNum()); 505 } 506 507 int CPDF_HintTables::GetFirstPageNumber() const { 508 return static_cast<int>(m_pLinearized->GetFirstPageNo()); 509 } 510 511 int CPDF_HintTables::ReadPrimaryHintStreamOffset() const { 512 return static_cast<int>(m_pLinearized->GetHintStart()); 513 } 514 515 int CPDF_HintTables::ReadPrimaryHintStreamLength() const { 516 return static_cast<int>(m_pLinearized->GetHintLength()); 517 } 518