Home | History | Annotate | Download | only in src
      1 // Copyright 2017 The Chromium OS Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef SRC_HUFFMAN_TABLE_H_
      6 #define SRC_HUFFMAN_TABLE_H_
      7 
      8 #include <cstddef>
      9 #include <cstdint>
     10 #include <string>
     11 #include <vector>
     12 
     13 #include "puffin/src/bit_reader.h"
     14 #include "puffin/src/bit_writer.h"
     15 #include "puffin/src/include/puffin/common.h"
     16 #include "puffin/src/include/puffin/errors.h"
     17 #include "puffin/src/set_errors.h"
     18 
     19 namespace puffin {
     20 
     21 // Maximum Huffman code length based on RFC1951.
     22 constexpr size_t kMaxHuffmanBits = 15;
     23 
     24 // Permutations of input Huffman code lengths (used only to read
     25 // |dynamic_code_lens_|).
     26 extern const uint8_t kPermutations[];
     27 
     28 // The bases of each alphabet which is added to the integer value of extra
     29 // bits that comes after the Huffman code in the input to create the given
     30 // length value. The last element is a guard.
     31 extern const uint16_t kLengthBases[];
     32 
     33 // Number of extra bits that comes after the associating Huffman code.
     34 extern const uint8_t kLengthExtraBits[];
     35 
     36 // same as |kLengthBases| except for the the distances instead of lengths.
     37 // The last element is a guard.
     38 extern const uint16_t kDistanceBases[];
     39 
     40 // Same as |kLengthExtraBits| except for distances instead of lengths.
     41 extern const uint8_t kDistanceExtraBits[];
     42 
     43 class HuffmanTable {
     44  public:
     45   HuffmanTable();
     46   virtual ~HuffmanTable() = default;
     47 
     48   // Checks the lengths of Huffman length arrays for correctness
     49   //
     50   // |num_lit_len|  IN  The number of literal/lengths code lengths
     51   // |num_distance| IN  The number of distance code lengths
     52   // |num_codes|    IN  The number of code lengths for reading Huffman table.
     53   inline bool CheckHuffmanArrayLengths(size_t num_lit_len,
     54                                        size_t num_distance,
     55                                        size_t num_codes) {
     56     if (num_lit_len > 286 || num_distance > 30 || num_codes > 19) {
     57       LOG(ERROR) << "The lengths of the dynamic Huffman table are invalid: "
     58                  << "num_lit_len(" << num_lit_len << ") "
     59                  << "num_distance(" << num_distance << ") "
     60                  << "num_codes(" << num_codes << ")";
     61       return false;
     62     }
     63     return true;
     64   }
     65 
     66   // Returns the maximum number of bits used in the current literal/length
     67   // Huffman codes.
     68   inline size_t LitLenMaxBits() { return lit_len_max_bits_; }
     69 
     70   // Returns the maximum number of bits used in the current distance Huffman
     71   // codes.
     72   inline size_t DistanceMaxBits() { return distance_max_bits_; }
     73 
     74   // Returns the alphabet associated with the set of input bits for the code
     75   // length array.
     76   //
     77   // |bits|     IN   The input Huffman bits read from the deflate stream.
     78   // |alphabet| OUT  The alphabet associated with the given |bits|.
     79   // |nbits|    OUT  The number of bits in the Huffman code of alphabet.
     80   // Returns true if there is an alphabet associated with |bits|.
     81   inline bool CodeAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) {
     82     auto hc = code_hcodes_[bits];
     83     TEST_AND_RETURN_FALSE(hc & 0x8000);
     84     *alphabet = hc & 0x7FFF;
     85     *nbits = code_lens_[*alphabet];
     86     return true;
     87   }
     88 
     89   // Returns the alphabet associated with the set of input bits for the
     90   // literal/length code length array.
     91   //
     92   // |bits|     IN   The input Huffman bits read from the deflate stream.
     93   // |alphabet| OUT  The alphabet associated with the given |bits|.
     94   // |nbits|    OUT  The number of bits in the Huffman code of the |alphabet|.
     95   // Returns true if there is an alphabet associated with |bits|.
     96   inline bool LitLenAlphabet(uint32_t bits, uint16_t* alphabet, size_t* nbits) {
     97     auto hc = lit_len_hcodes_[bits];
     98     TEST_AND_RETURN_FALSE(hc & 0x8000);
     99     *alphabet = hc & 0x7FFF;
    100     *nbits = lit_len_lens_[*alphabet];
    101     return true;
    102   }
    103 
    104   // Returns the alphabet associated with the set of input bits for the
    105   // distance code length array.
    106   //
    107   // |bits|     IN   The input Huffman bits read from the deflate stream.
    108   // |alphabet| OUT  The alphabet associated with the given |bits|.
    109   // |nbits|    OUT  The number of bits in the Huffman code of the |alphabet|.
    110   // Returns true if there is an alphabet associated with |bits|.
    111   inline bool DistanceAlphabet(uint32_t bits,
    112                                uint16_t* alphabet,
    113                                size_t* nbits) {
    114     auto hc = distance_hcodes_[bits];
    115     TEST_AND_RETURN_FALSE(hc & 0x8000);
    116     *alphabet = hc & 0x7FFF;
    117     *nbits = distance_lens_[*alphabet];
    118     return true;
    119   }
    120 
    121   // Returns the Huffman code of a give alphabet for Huffman table codes.
    122   //
    123   // |alphabet| IN   The alphabet.
    124   // |huffman|  OUT  The Huffman code for |alphabet|.
    125   // |nbits|    OUT  The maximum number of bits in the Huffman code of the
    126   //                 |alphabet|.
    127   inline bool CodeHuffman(uint16_t alphabet, uint16_t* huffman, size_t* nbits) {
    128     TEST_AND_RETURN_FALSE(alphabet < code_lens_.size());
    129     *huffman = code_rcodes_[alphabet];
    130     *nbits = code_lens_[alphabet];
    131     return true;
    132   }
    133 
    134   // Returns the Huffman code of a give alphabet for literal/length codes.
    135   //
    136   // |alphabet| IN   The alphabet.
    137   // |huffman|  OUT  The Huffman code for |alphabet|.
    138   // |nbits|    OUT  The maximum number of bits in the Huffman code of the
    139   //                 |alphabet|.
    140   inline bool LitLenHuffman(uint16_t alphabet,
    141                             uint16_t* huffman,
    142                             size_t* nbits) {
    143     TEST_AND_RETURN_FALSE(alphabet < lit_len_lens_.size());
    144     *huffman = lit_len_rcodes_[alphabet];
    145     *nbits = lit_len_lens_[alphabet];
    146     return true;
    147   }
    148 
    149   inline bool EndOfBlockBitLength(size_t* nbits) {
    150     TEST_AND_RETURN_FALSE(256 < lit_len_lens_.size());
    151     *nbits = lit_len_lens_[256];
    152     return true;
    153   }
    154 
    155   // Returns the Huffman code of a give alphabet for distance codes.
    156   //
    157   // |alphabet| IN   The alphabet.
    158   // |huffman|  OUT  The Huffman code for |alphabet|.
    159   // |nbits|    OUT  The maximum number of bits in the Huffman code of the
    160   //                 |alphabet|.
    161   inline bool DistanceHuffman(uint16_t alphabet,
    162                               uint16_t* huffman,
    163                               size_t* nbits) {
    164     TEST_AND_RETURN_FALSE(alphabet < distance_lens_.size());
    165     *huffman = distance_rcodes_[alphabet];
    166     *nbits = distance_lens_[alphabet];
    167     return true;
    168   }
    169 
    170   // This populates the object with fixed huffman table parameters.
    171   // TODO(ahassani): Revamp the use of this function to be initiliazed once in
    172   // the lifetime of the program and only one instance needed.
    173   bool BuildFixedHuffmanTable();
    174 
    175   // This functions first reads the Huffman code length arrays from the input
    176   // deflate stream, then builds both literal/length and distance Huffman
    177   // code arrays. It also writes the Huffman table into the puffed stream.
    178   //
    179   // |br|      IN      The |BitReaderInterface| reading the deflate stream.
    180   // |buffer|  OUT     The object to write the Huffman table.
    181   // |length|  IN/OUT  The length available in the |buffer| and in return it
    182   //                   will be the length of Huffman table data written into
    183   //                   the |buffer|.
    184   bool BuildDynamicHuffmanTable(BitReaderInterface* br,
    185                                 uint8_t* buffer,
    186                                 size_t* length,
    187                                 Error* error);
    188 
    189   // This functions first reads the Huffman code length arrays from the input
    190   // puffed |buffer|, then builds both literal/length and distance Huffman code
    191   // arrays. It also writes the coded Huffman table arrays into the deflate
    192   // stream.
    193   //
    194   // |buffer| IN      The array to read the Huffman table from.
    195   // |length| IN      The length available in the |buffer|.
    196   // |bw|     IN/OUT  The |BitWriterInterface| for writing into the deflate
    197   //                  stream.
    198   // |error|  OUT     The error code.
    199   bool BuildDynamicHuffmanTable(const uint8_t* buffer,
    200                                 size_t length,
    201                                 BitWriterInterface* bw,
    202                                 Error* error);
    203 
    204  protected:
    205   // Initializes the Huffman codes from an array of lengths.
    206   //
    207   // |lens|     IN   The input array of code lengths.
    208   // |max_bits| OUT  The maximum number of bits used for the Huffman codes.
    209   bool InitHuffmanCodes(const Buffer& lens, size_t* max_bits);
    210 
    211   // Creates the Huffman code to alphabet array.
    212   // |lens|     IN   The input array of code lengths.
    213   // |hcodes|   OUT  The Huffman to alphabet array.
    214   // |max_bits| OUT  The maximum number of bits used for the Huffman codes.
    215   bool BuildHuffmanCodes(const Buffer& lens,
    216                          std::vector<uint16_t>* hcodes,
    217                          size_t* max_bits);
    218 
    219   // Creates the alphabet to Huffman code array.
    220   // |lens|     IN   The input array of code lengths.
    221   // |rcodes|   OUT  The Huffman to Huffman array.
    222   // |max_bits| OUT  The maximum number of bits used for the Huffman codes.
    223   bool BuildHuffmanReverseCodes(const Buffer& lens,
    224                                 std::vector<uint16_t>* rcodes,
    225                                 size_t* max_bits);
    226 
    227   // Reads a specific Huffman code length array from input. At the same time
    228   // writes the array into the puffed stream. The Huffman code length array is
    229   // either the literal/lengths or distance codes.
    230   //
    231   // |br|        IN      The |BitReaderInterface| for reading the deflate
    232   //                      stream.
    233   // |buffer|    OUT     The array to write the Huffman table.
    234   // |length|    IN/OUT  The length available in the |buffer| and in return it
    235   //                     will be the length of data written into the |buffer|.
    236   // |max_bits|  IN      The maximum number of bits in the Huffman codes.
    237   // |num_codes| IN      The size of the Huffman code length array in the input.
    238   // |lens|      OUT     The resulting Huffman code length array.
    239   // |error|     OUT     The error code.
    240   bool BuildHuffmanCodeLengths(BitReaderInterface* br,
    241                                uint8_t* buffer,
    242                                size_t* length,
    243                                size_t max_bits,
    244                                size_t num_codes,
    245                                Buffer* lens,
    246                                Error* error);
    247 
    248   // Similar to |BuildHuffmanCodeLengths| but for reading from puffed buffer and
    249   // writing into deflate stream.
    250   //
    251   // |buffer|    IN      The array to read the Huffman table from.
    252   // |length|    IN      The length available in the |buffer|.
    253   // |bw|        IN/OUT  The |BitWriterInterface| for writing into the deflate
    254   //                     stream.
    255   // |num_codes| IN      Number of Huffman code lengths to read from the
    256   //                     |buffer|.
    257   // |lens|      OUT     The Huffman code lengths array.
    258   // |error|     OUT     The error code.
    259   bool BuildHuffmanCodeLengths(const uint8_t* buffer,
    260                                size_t* length,
    261                                BitWriterInterface* bw,
    262                                size_t num_codes,
    263                                Buffer* lens,
    264                                Error* error);
    265 
    266  private:
    267   // A utility struct used to create Huffman codes.
    268   struct CodeIndexPair {
    269     uint16_t code;   // The Huffman code
    270     uint16_t index;  // The alphabet
    271   };
    272   // A vector with maximum size of 286 that is only uses as temporary space for
    273   // building Huffman codes.
    274   std::vector<CodeIndexPair> codeindexpairs_;
    275 
    276   // Used in building Huffman codes for literals/lengths and distances.
    277   std::vector<uint8_t> lit_len_lens_;
    278   std::vector<uint16_t> lit_len_hcodes_;
    279   std::vector<uint16_t> lit_len_rcodes_;
    280   size_t lit_len_max_bits_;
    281   std::vector<uint8_t> distance_lens_;
    282   std::vector<uint16_t> distance_hcodes_;
    283   std::vector<uint16_t> distance_rcodes_;
    284   size_t distance_max_bits_;
    285 
    286   // The reason for keeping a temporary buffer here is to avoid reallocing each
    287   // time.
    288   std::vector<uint8_t> tmp_lens_;
    289 
    290   // Used in building Huffman codes for reading and decoding literal/length and
    291   // distance Huffman code length arrays.
    292   std::vector<uint8_t> code_lens_;
    293   std::vector<uint16_t> code_hcodes_;
    294   std::vector<uint16_t> code_rcodes_;
    295   size_t code_max_bits_;
    296 
    297   bool initialized_;
    298 
    299   DISALLOW_COPY_AND_ASSIGN(HuffmanTable);
    300 };
    301 
    302 // The type of a block in a deflate stream.
    303 enum class BlockType : uint8_t {
    304   kUncompressed = 0x00,
    305   kFixed = 0x01,
    306   kDynamic = 0x02,
    307 };
    308 
    309 std::string BlockTypeToString(BlockType type);
    310 
    311 }  // namespace puffin
    312 
    313 #endif  // SRC_HUFFMAN_TABLE_H_
    314