1 // Copyright 2017 The Chromium OS Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "puffin/src/huffman_table.h" 6 7 #include <algorithm> 8 #include <vector> 9 10 #include "puffin/src/include/puffin/errors.h" 11 #include "puffin/src/set_errors.h" 12 13 namespace puffin { 14 15 // clang-format off 16 // Permutations of input Huffman code lengths (used only to read code lengths 17 // necessary for reading Huffman table.) 18 const uint8_t kPermutations[19] = { 19 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}; 20 21 // The bases of each alphabet which is added to the integer value of extra 22 // bits that comes after the Huffman code in the input to create the given 23 // length value. The last element is a guard. 24 const uint16_t kLengthBases[30] = { 25 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 26 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0xFFFF}; 27 28 // Number of extra bits that comes after the associating Huffman code. 29 const uint8_t kLengthExtraBits[29] = { 30 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 31 5, 5, 0}; 32 33 // Same as |kLengthBases| but for the distances instead of lengths. The last 34 // element is a guard. 35 const uint16_t kDistanceBases[31] = { 36 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, 769, 37 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577, 0xFFFF}; 38 39 // Same as |kLengthExtraBits| but for distances instead of lengths. 40 const uint8_t kDistanceExtraBits[30] = { 41 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 42 9, 10, 10, 11, 11, 12, 12, 13, 13}; 43 // clang-format on 44 45 // 288 is the maximum number of needed huffman codes for an alphabet. Fixed 46 // huffman table needs 288 and dynamic huffman table needs maximum 286. 47 // 286 = 256 (coding a byte) + 48 // 1 (coding the end of block symbole) + 49 // 29 (coding the lengths) 50 HuffmanTable::HuffmanTable() : codeindexpairs_(288), initialized_(false) {} 51 52 bool HuffmanTable::InitHuffmanCodes(const Buffer& lens, size_t* max_bits) { 53 // Temporary buffers used in |InitHuffmanCodes|. 54 uint16_t len_count_[kMaxHuffmanBits + 1] = {0}; 55 uint16_t next_code_[kMaxHuffmanBits + 1] = {0}; 56 57 // 1. Count the number of codes for each length; 58 for (auto len : lens) { 59 len_count_[len]++; 60 } 61 62 for (*max_bits = kMaxHuffmanBits; *max_bits >= 1; (*max_bits)--) { 63 if (len_count_[*max_bits] != 0) { 64 break; 65 } 66 } 67 68 // Check for oversubscribed code lengths. (A code with length 'L' cannot have 69 // more than 2^L items. 70 for (size_t idx = 1; idx <= *max_bits; idx++) { 71 if (len_count_[idx] > (1 << idx)) { 72 LOG(ERROR) << "Oversubscribed code lengths error!"; 73 return false; 74 } 75 } 76 77 // 2. Compute the coding of the first element for each length. 78 uint16_t code = 0; 79 len_count_[0] = 0; 80 for (size_t bits = 1; bits <= kMaxHuffmanBits; bits++) { 81 code = (code + len_count_[bits - 1]) << 1; 82 next_code_[bits] = code; 83 } 84 85 codeindexpairs_.clear(); 86 // 3. Calculate all the code values. 87 for (size_t idx = 0; idx < lens.size(); idx++) { 88 auto len = lens[idx]; 89 if (len == 0) { 90 continue; 91 } 92 93 // Reverse the code 94 CodeIndexPair cip; 95 cip.code = 0; 96 auto tmp_code = next_code_[len]; 97 for (size_t r = 0; r < len; r++) { 98 cip.code <<= 1; 99 cip.code |= tmp_code & 1U; 100 tmp_code >>= 1; 101 } 102 cip.index = idx; 103 codeindexpairs_.push_back(cip); 104 next_code_[len]++; 105 } 106 return true; 107 } 108 109 bool HuffmanTable::BuildHuffmanCodes(const Buffer& lens, 110 std::vector<uint16_t>* hcodes, 111 size_t* max_bits) { 112 TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits)); 113 // Sort descending based on the bit-length of the code. 114 std::sort(codeindexpairs_.begin(), 115 codeindexpairs_.end(), 116 [&lens](const CodeIndexPair& a, const CodeIndexPair& b) { 117 return lens[a.index] > lens[b.index]; 118 }); 119 120 // Only zero out the part of hcodes which is valuable. 121 memset(hcodes->data(), 0, (1 << *max_bits) * sizeof(uint16_t)); 122 for (const auto& cip : codeindexpairs_) { 123 // The MSB bit of the code in hcodes is set if it is a valid code and its 124 // code exists in the input Huffman table. 125 (*hcodes)[cip.code] = cip.index | 0x8000; 126 auto fill_bits = *max_bits - lens[cip.index]; 127 for (auto idx = 1; idx < (1 << fill_bits); idx++) { 128 auto location = (idx << lens[cip.index]) | cip.code; 129 if (!((*hcodes)[location] & 0x8000)) { 130 (*hcodes)[location] = cip.index | 0x8000; 131 } 132 } 133 } 134 return true; 135 } 136 137 bool HuffmanTable::BuildHuffmanReverseCodes(const Buffer& lens, 138 std::vector<uint16_t>* rcodes, 139 size_t* max_bits) { 140 TEST_AND_RETURN_FALSE(InitHuffmanCodes(lens, max_bits)); 141 // Sort ascending based on the index. 142 std::sort(codeindexpairs_.begin(), 143 codeindexpairs_.end(), 144 [](const CodeIndexPair& a, const CodeIndexPair& b) -> bool { 145 return a.index < b.index; 146 }); 147 148 size_t index = 0; 149 for (size_t idx = 0; idx < rcodes->size(); idx++) { 150 if (index < codeindexpairs_.size() && idx == codeindexpairs_[index].index) { 151 (*rcodes)[idx] = codeindexpairs_[index].code; 152 index++; 153 } else { 154 (*rcodes)[idx] = 0; 155 } 156 } 157 return true; 158 } 159 160 bool HuffmanTable::BuildFixedHuffmanTable() { 161 if (!initialized_) { 162 // For all the vectors used in this class, we set the size in the 163 // constructor and we do not change the size later. This is for optimization 164 // purposes. The total size of data in this class is approximately 165 // 2KB. Because it is a constructor return values cannot be checked. 166 lit_len_lens_.resize(288); 167 lit_len_rcodes_.resize(288); 168 lit_len_hcodes_.resize(1 << 9); 169 170 distance_lens_.resize(30); 171 distance_rcodes_.resize(30); 172 distance_hcodes_.resize(1 << 5); 173 174 size_t i = 0; 175 while (i < 144) { 176 lit_len_lens_[i++] = 8; 177 } 178 while (i < 256) { 179 lit_len_lens_[i++] = 9; 180 } 181 while (i < 280) { 182 lit_len_lens_[i++] = 7; 183 } 184 while (i < 288) { 185 lit_len_lens_[i++] = 8; 186 } 187 188 i = 0; 189 while (i < 30) { 190 distance_lens_[i++] = 5; 191 } 192 193 TEST_AND_RETURN_FALSE( 194 BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_)); 195 196 TEST_AND_RETURN_FALSE(BuildHuffmanCodes( 197 distance_lens_, &distance_hcodes_, &distance_max_bits_)); 198 199 TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes( 200 lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_)); 201 202 TEST_AND_RETURN_FALSE(BuildHuffmanReverseCodes( 203 distance_lens_, &distance_rcodes_, &distance_max_bits_)); 204 205 initialized_ = true; 206 } 207 return true; 208 } 209 210 bool HuffmanTable::BuildDynamicHuffmanTable(BitReaderInterface* br, 211 uint8_t* buffer, 212 size_t* length, 213 Error* error) { 214 // Initilize only once and reuse. 215 if (!initialized_) { 216 // Only resizing the arrays needed. 217 code_lens_.resize(19); 218 code_hcodes_.resize(1 << 7); 219 220 lit_len_lens_.resize(286); 221 lit_len_hcodes_.resize(1 << 15); 222 223 distance_lens_.resize(30); 224 distance_hcodes_.resize(1 << 15); 225 226 // 286: Maximum number of literal/lengths symbols. 227 // 30: Maximum number of distance symbols. 228 // The reason we reserve this to the sum of both maximum sizes is that we 229 // need to calculate both huffman codes contiguously. See b/72815313. 230 tmp_lens_.resize(286 + 30); 231 initialized_ = true; 232 } 233 234 // Read the header. Reads the first portion of the Huffman data from input and 235 // writes it into the puff |buffer|. The first portion includes the size 236 // (|num_lit_len|) of the literals/lengths Huffman code length array 237 // (|dynamic_lit_len_lens_|), the size (|num_distance|) of distance Huffman 238 // code length array (|dynamic_distance_lens_|), and the size (|num_codes|) of 239 // Huffman code length array (dynamic_code_lens_) for reading 240 // |dynamic_lit_len_lens_| and |dynamic_distance_lens_|. Then it follows by 241 // reading |dynamic_code_lens_|. 242 243 TEST_AND_RETURN_FALSE_SET_ERROR(*length >= 3, Error::kInsufficientOutput); 244 size_t index = 0; 245 TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(14), Error::kInsufficientInput); 246 buffer[index++] = br->ReadBits(5); // HLIST 247 auto num_lit_len = br->ReadBits(5) + 257; 248 br->DropBits(5); 249 250 buffer[index++] = br->ReadBits(5); // HDIST 251 auto num_distance = br->ReadBits(5) + 1; 252 br->DropBits(5); 253 254 buffer[index++] = br->ReadBits(4); // HCLEN 255 auto num_codes = br->ReadBits(4) + 4; 256 br->DropBits(4); 257 258 TEST_AND_RETURN_FALSE_SET_ERROR( 259 CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes), 260 Error::kInvalidInput); 261 262 bool checked = false; 263 size_t idx = 0; 264 TEST_AND_RETURN_FALSE_SET_ERROR(*length - index >= (num_codes + 1) / 2, 265 Error::kInsufficientOutput); 266 // Two codes per byte 267 for (; idx < num_codes; idx++) { 268 TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(3), 269 Error::kInsufficientInput); 270 code_lens_[kPermutations[idx]] = br->ReadBits(3); 271 if (checked) { 272 buffer[index++] |= br->ReadBits(3); 273 } else { 274 buffer[index] = br->ReadBits(3) << 4; 275 } 276 checked = !checked; 277 br->DropBits(3); 278 } 279 // Pad the last byte if odd number of codes. 280 if (checked) { 281 index++; 282 } 283 for (; idx < 19; idx++) { 284 code_lens_[kPermutations[idx]] = 0; 285 } 286 287 TEST_AND_RETURN_FALSE_SET_ERROR( 288 BuildHuffmanCodes(code_lens_, &code_hcodes_, &code_max_bits_), 289 Error::kInvalidInput); 290 291 // Build literals/lengths and distance Huffman code length arrays. 292 auto bytes_available = (*length - index); 293 tmp_lens_.clear(); 294 TEST_AND_RETURN_FALSE(BuildHuffmanCodeLengths( 295 br, buffer + index, &bytes_available, code_max_bits_, 296 num_lit_len + num_distance, &tmp_lens_, error)); 297 index += bytes_available; 298 299 // TODO(ahassani): Optimize this so the memcpy is not needed anymore. 300 lit_len_lens_.clear(); 301 lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(), 302 tmp_lens_.begin() + num_lit_len); 303 304 distance_lens_.clear(); 305 distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len, 306 tmp_lens_.end()); 307 308 TEST_AND_RETURN_FALSE_SET_ERROR( 309 BuildHuffmanCodes(lit_len_lens_, &lit_len_hcodes_, &lit_len_max_bits_), 310 Error::kInvalidInput); 311 312 // Build distance Huffman codes. 313 TEST_AND_RETURN_FALSE_SET_ERROR( 314 BuildHuffmanCodes(distance_lens_, &distance_hcodes_, &distance_max_bits_), 315 Error::kInvalidInput); 316 317 *length = index; 318 return true; 319 } 320 321 bool HuffmanTable::BuildHuffmanCodeLengths(BitReaderInterface* br, 322 uint8_t* buffer, 323 size_t* length, 324 size_t max_bits, 325 size_t num_codes, 326 Buffer* lens, 327 Error* error) { 328 size_t index = 0; 329 lens->clear(); 330 for (size_t idx = 0; idx < num_codes;) { 331 TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(max_bits), 332 Error::kInsufficientInput); 333 auto bits = br->ReadBits(max_bits); 334 uint16_t code; 335 size_t nbits; 336 TEST_AND_RETURN_FALSE_SET_ERROR(CodeAlphabet(bits, &code, &nbits), 337 Error::kInvalidInput); 338 TEST_AND_RETURN_FALSE_SET_ERROR(index < *length, 339 Error::kInsufficientOutput); 340 br->DropBits(nbits); 341 if (code < 16) { 342 buffer[index++] = code; 343 lens->push_back(code); 344 idx++; 345 } else { 346 TEST_AND_RETURN_FALSE_SET_ERROR(code < 19, Error::kInvalidInput); 347 size_t copy_num = 0; 348 uint8_t copy_val; 349 switch (code) { 350 case 16: 351 TEST_AND_RETURN_FALSE_SET_ERROR(idx != 0, Error::kInvalidInput); 352 TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(2), 353 Error::kInsufficientInput); 354 copy_num = 3 + br->ReadBits(2); 355 buffer[index++] = 16 + br->ReadBits(2); // 3 - 6 times 356 copy_val = (*lens)[idx - 1]; 357 br->DropBits(2); 358 break; 359 360 case 17: 361 TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(3), 362 Error::kInsufficientInput); 363 copy_num = 3 + br->ReadBits(3); 364 buffer[index++] = 20 + br->ReadBits(3); // 3 - 10 times 365 copy_val = 0; 366 br->DropBits(3); 367 break; 368 369 case 18: 370 TEST_AND_RETURN_FALSE_SET_ERROR(br->CacheBits(7), 371 Error::kInsufficientInput); 372 copy_num = 11 + br->ReadBits(7); 373 buffer[index++] = 28 + br->ReadBits(7); // 11 - 138 times 374 copy_val = 0; 375 br->DropBits(7); 376 break; 377 378 default: 379 LOG(ERROR) << "Invalid code!"; 380 *error = Error::kInvalidInput; 381 return false; 382 break; 383 } 384 idx += copy_num; 385 while (copy_num--) { 386 lens->push_back(copy_val); 387 } 388 } 389 } 390 TEST_AND_RETURN_FALSE(lens->size() == num_codes); 391 *length = index; 392 return true; 393 } 394 395 bool HuffmanTable::BuildDynamicHuffmanTable(const uint8_t* buffer, 396 size_t length, 397 BitWriterInterface* bw, 398 Error* error) { 399 if (!initialized_) { 400 // Only resizing the arrays needed. 401 code_lens_.resize(19); 402 code_rcodes_.resize(19); 403 404 lit_len_lens_.resize(286); 405 lit_len_rcodes_.resize(286); 406 407 distance_lens_.resize(30); 408 distance_rcodes_.resize(30); 409 410 tmp_lens_.resize(286 + 30); 411 412 initialized_ = true; 413 } 414 415 TEST_AND_RETURN_FALSE_SET_ERROR(length >= 3, Error::kInsufficientInput); 416 size_t index = 0; 417 // Write the header. 418 size_t num_lit_len = buffer[index] + 257; 419 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(5, buffer[index++]), 420 Error::kInsufficientOutput); 421 422 size_t num_distance = buffer[index] + 1; 423 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(5, buffer[index++]), 424 Error::kInsufficientOutput); 425 426 size_t num_codes = buffer[index] + 4; 427 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(4, buffer[index++]), 428 Error::kInsufficientOutput); 429 430 TEST_AND_RETURN_FALSE_SET_ERROR( 431 CheckHuffmanArrayLengths(num_lit_len, num_distance, num_codes), 432 Error::kInvalidInput); 433 434 TEST_AND_RETURN_FALSE_SET_ERROR(length - index >= (num_codes + 1) / 2, 435 Error::kInsufficientInput); 436 bool checked = false; 437 size_t idx = 0; 438 for (; idx < num_codes; idx++) { 439 uint8_t len; 440 if (checked) { 441 len = buffer[index++] & 0x0F; 442 } else { 443 len = buffer[index] >> 4; 444 } 445 checked = !checked; 446 code_lens_[kPermutations[idx]] = len; 447 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(3, len), 448 Error::kInsufficientOutput); 449 } 450 if (checked) { 451 index++; 452 } 453 for (; idx < 19; idx++) { 454 code_lens_[kPermutations[idx]] = 0; 455 } 456 457 TEST_AND_RETURN_FALSE_SET_ERROR( 458 BuildHuffmanReverseCodes(code_lens_, &code_rcodes_, &code_max_bits_), 459 Error::kInvalidInput); 460 461 // Build literal/lengths and distance Huffman code length arrays. 462 auto bytes_available = length - index; 463 TEST_AND_RETURN_FALSE( 464 BuildHuffmanCodeLengths(buffer + index, &bytes_available, bw, 465 num_lit_len + num_distance, &tmp_lens_, error)); 466 index += bytes_available; 467 468 lit_len_lens_.clear(); 469 lit_len_lens_.insert(lit_len_lens_.begin(), tmp_lens_.begin(), 470 tmp_lens_.begin() + num_lit_len); 471 472 distance_lens_.clear(); 473 distance_lens_.insert(distance_lens_.begin(), tmp_lens_.begin() + num_lit_len, 474 tmp_lens_.end()); 475 476 // Build literal/lengths Huffman reverse codes. 477 TEST_AND_RETURN_FALSE_SET_ERROR( 478 BuildHuffmanReverseCodes( 479 lit_len_lens_, &lit_len_rcodes_, &lit_len_max_bits_), 480 Error::kInvalidInput); 481 482 // Build distance Huffman reverse codes. 483 TEST_AND_RETURN_FALSE_SET_ERROR( 484 BuildHuffmanReverseCodes( 485 distance_lens_, &distance_rcodes_, &distance_max_bits_), 486 Error::kInvalidInput); 487 488 TEST_AND_RETURN_FALSE_SET_ERROR(length == index, Error::kInvalidInput); 489 490 return true; 491 } 492 493 bool HuffmanTable::BuildHuffmanCodeLengths(const uint8_t* buffer, 494 size_t* length, 495 BitWriterInterface* bw, 496 size_t num_codes, 497 Buffer* lens, 498 Error* error) { 499 lens->clear(); 500 uint16_t hcode; 501 size_t nbits; 502 size_t index = 0; 503 for (size_t idx = 0; idx < num_codes;) { 504 TEST_AND_RETURN_FALSE_SET_ERROR(index < *length, Error::kInsufficientInput); 505 auto pcode = buffer[index++]; 506 TEST_AND_RETURN_FALSE_SET_ERROR(pcode <= 155, Error::kInvalidInput); 507 508 auto code = pcode < 16 ? pcode : pcode < 20 ? 16 : pcode < 28 ? 17 : 18; 509 TEST_AND_RETURN_FALSE_SET_ERROR(CodeHuffman(code, &hcode, &nbits), 510 Error::kInvalidInput); 511 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(nbits, hcode), 512 Error::kInsufficientOutput); 513 if (code < 16) { 514 lens->push_back(code); 515 idx++; 516 } else { 517 size_t copy_num = 0; 518 uint8_t copy_val; 519 switch (code) { 520 case 16: 521 // Cannot repeat a non-existent last code if idx == 0. 522 TEST_AND_RETURN_FALSE_SET_ERROR(idx != 0, Error::kInvalidInput); 523 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(2, pcode - 16), 524 Error::kInsufficientOutput); 525 copy_num = 3 + pcode - 16; 526 copy_val = (*lens)[idx - 1]; 527 break; 528 529 case 17: 530 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(3, pcode - 20), 531 Error::kInsufficientOutput); 532 copy_num = 3 + pcode - 20; 533 copy_val = 0; 534 break; 535 536 case 18: 537 TEST_AND_RETURN_FALSE_SET_ERROR(bw->WriteBits(7, pcode - 28), 538 Error::kInsufficientOutput); 539 copy_num = 11 + pcode - 28; 540 copy_val = 0; 541 break; 542 543 default: 544 break; 545 } 546 idx += copy_num; 547 while (copy_num--) { 548 lens->push_back(copy_val); 549 } 550 } 551 } 552 TEST_AND_RETURN_FALSE(lens->size() == num_codes); 553 *length = index; 554 return true; 555 } 556 557 std::string BlockTypeToString(BlockType type) { 558 switch (type) { 559 case BlockType::kUncompressed: 560 return "Uncompressed"; 561 562 case BlockType::kFixed: 563 return "Fixed"; 564 565 case BlockType::kDynamic: 566 return "Dynamic"; 567 568 default: 569 return "Unknown"; 570 } 571 } 572 573 } // namespace puffin 574