Home | History | Annotate | Download | only in enc
      1 /* NOLINT(build/header_guard) */
      2 /* Copyright 2016 Google Inc. All Rights Reserved.
      3 
      4    Distributed under MIT license.
      5    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      6 */
      7 
      8 /* template parameters: FN, BUCKET_BITS, NUM_BANKS, BANK_BITS,
      9                         NUM_LAST_DISTANCES_TO_CHECK */
     10 
     11 /* A (forgetful) hash table to the data seen by the compressor, to
     12    help create backward references to previous data.
     13 
     14    Hashes are stored in chains which are bucketed to groups. Group of chains
     15    share a storage "bank". When more than "bank size" chain nodes are added,
     16    oldest nodes are replaced; this way several chains may share a tail. */
     17 
     18 #define HashForgetfulChain HASHER()
     19 
     20 #define BANK_SIZE (1 << BANK_BITS)
     21 
     22 /* Number of hash buckets. */
     23 #define BUCKET_SIZE (1 << BUCKET_BITS)
     24 
     25 #define CAPPED_CHAINS 0
     26 
     27 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
     28 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
     29 
     30 /* HashBytes is the function that chooses the bucket to place the address in.*/
     31 static BROTLI_INLINE size_t FN(HashBytes)(const uint8_t *data) {
     32   const uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
     33   /* The higher bits contain more mixture from the multiplication,
     34      so we take our results from there. */
     35   return h >> (32 - BUCKET_BITS);
     36 }
     37 
     38 typedef struct FN(Slot) {
     39   uint16_t delta;
     40   uint16_t next;
     41 } FN(Slot);
     42 
     43 typedef struct FN(Bank) {
     44   FN(Slot) slots[BANK_SIZE];
     45 } FN(Bank);
     46 
     47 typedef struct HashForgetfulChain {
     48   uint32_t addr[BUCKET_SIZE];
     49   uint16_t head[BUCKET_SIZE];
     50   /* Truncated hash used for quick rejection of "distance cache" candidates. */
     51   uint8_t tiny_hash[65536];
     52   FN(Bank) banks[NUM_BANKS];
     53   uint16_t free_slot_idx[NUM_BANKS];
     54   size_t max_hops;
     55 } HashForgetfulChain;
     56 
     57 static BROTLI_INLINE HashForgetfulChain* FN(Self)(HasherHandle handle) {
     58   return (HashForgetfulChain*)&(GetHasherCommon(handle)[1]);
     59 }
     60 
     61 static void FN(Initialize)(
     62     HasherHandle handle, const BrotliEncoderParams* params) {
     63   FN(Self)(handle)->max_hops =
     64       (params->quality > 6 ? 7u : 8u) << (params->quality - 4);
     65 }
     66 
     67 static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
     68     size_t input_size, const uint8_t* data) {
     69   HashForgetfulChain* self = FN(Self)(handle);
     70   /* Partial preparation is 100 times slower (per socket). */
     71   size_t partial_prepare_threshold = BUCKET_SIZE >> 6;
     72   if (one_shot && input_size <= partial_prepare_threshold) {
     73     size_t i;
     74     for (i = 0; i < input_size; ++i) {
     75       size_t bucket = FN(HashBytes)(&data[i]);
     76       /* See InitEmpty comment. */
     77       self->addr[bucket] = 0xCCCCCCCC;
     78       self->head[bucket] = 0xCCCC;
     79     }
     80   } else {
     81     /* Fill |addr| array with 0xCCCCCCCC value. Because of wrapping, position
     82        processed by hasher never reaches 3GB + 64M; this makes all new chains
     83        to be terminated after the first node. */
     84     memset(self->addr, 0xCC, sizeof(self->addr));
     85     memset(self->head, 0, sizeof(self->head));
     86   }
     87   memset(self->tiny_hash, 0, sizeof(self->tiny_hash));
     88   memset(self->free_slot_idx, 0, sizeof(self->free_slot_idx));
     89 }
     90 
     91 static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
     92     const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
     93     size_t input_size) {
     94   BROTLI_UNUSED(params);
     95   BROTLI_UNUSED(one_shot);
     96   BROTLI_UNUSED(input_size);
     97   return sizeof(HashForgetfulChain);
     98 }
     99 
    100 /* Look at 4 bytes at &data[ix & mask]. Compute a hash from these, and prepend
    101    node to corresponding chain; also update tiny_hash for current position. */
    102 static BROTLI_INLINE void FN(Store)(HasherHandle BROTLI_RESTRICT handle,
    103     const uint8_t* BROTLI_RESTRICT data, const size_t mask, const size_t ix) {
    104   HashForgetfulChain* self = FN(Self)(handle);
    105   const size_t key = FN(HashBytes)(&data[ix & mask]);
    106   const size_t bank = key & (NUM_BANKS - 1);
    107   const size_t idx = self->free_slot_idx[bank]++ & (BANK_SIZE - 1);
    108   size_t delta = ix - self->addr[key];
    109   self->tiny_hash[(uint16_t)ix] = (uint8_t)key;
    110   if (delta > 0xFFFF) delta = CAPPED_CHAINS ? 0 : 0xFFFF;
    111   self->banks[bank].slots[idx].delta = (uint16_t)delta;
    112   self->banks[bank].slots[idx].next = self->head[key];
    113   self->addr[key] = (uint32_t)ix;
    114   self->head[key] = (uint16_t)idx;
    115 }
    116 
    117 static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
    118     const uint8_t *data, const size_t mask, const size_t ix_start,
    119     const size_t ix_end) {
    120   size_t i;
    121   for (i = ix_start; i < ix_end; ++i) {
    122     FN(Store)(handle, data, mask, i);
    123   }
    124 }
    125 
    126 static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
    127     size_t num_bytes, size_t position, const uint8_t* ringbuffer,
    128     size_t ring_buffer_mask) {
    129   if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
    130     /* Prepare the hashes for three last bytes of the last write.
    131        These could not be calculated before, since they require knowledge
    132        of both the previous and the current block. */
    133     FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 3);
    134     FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 2);
    135     FN(Store)(handle, ringbuffer, ring_buffer_mask, position - 1);
    136   }
    137 }
    138 
    139 static BROTLI_INLINE void FN(PrepareDistanceCache)(
    140     HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
    141   BROTLI_UNUSED(handle);
    142   PrepareDistanceCache(distance_cache, NUM_LAST_DISTANCES_TO_CHECK);
    143 }
    144 
    145 /* Find a longest backward match of &data[cur_ix] up to the length of
    146    max_length and stores the position cur_ix in the hash table.
    147 
    148    REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
    149              values; if this method is invoked repeatedly with the same distance
    150              cache values, it is enough to invoke FN(PrepareDistanceCache) once.
    151 
    152    Does not look for matches longer than max_length.
    153    Does not look for matches further away than max_backward.
    154    Writes the best match into |out|.
    155    |out|->score is updated only if a better match is found. */
    156 static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
    157     const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
    158     const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
    159     const int* BROTLI_RESTRICT distance_cache,
    160     const size_t cur_ix, const size_t max_length, const size_t max_backward,
    161     const size_t gap, HasherSearchResult* BROTLI_RESTRICT out) {
    162   HashForgetfulChain* self = FN(Self)(handle);
    163   const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
    164   /* Don't accept a short copy from far away. */
    165   score_t min_score = out->score;
    166   score_t best_score = out->score;
    167   size_t best_len = out->len;
    168   size_t i;
    169   const size_t key = FN(HashBytes)(&data[cur_ix_masked]);
    170   const uint8_t tiny_hash = (uint8_t)(key);
    171   out->len = 0;
    172   out->len_code_delta = 0;
    173   /* Try last distance first. */
    174   for (i = 0; i < NUM_LAST_DISTANCES_TO_CHECK; ++i) {
    175     const size_t backward = (size_t)distance_cache[i];
    176     size_t prev_ix = (cur_ix - backward);
    177     /* For distance code 0 we want to consider 2-byte matches. */
    178     if (i > 0 && self->tiny_hash[(uint16_t)prev_ix] != tiny_hash) continue;
    179     if (prev_ix >= cur_ix || backward > max_backward) {
    180       continue;
    181     }
    182     prev_ix &= ring_buffer_mask;
    183     {
    184       const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
    185                                                   &data[cur_ix_masked],
    186                                                   max_length);
    187       if (len >= 2) {
    188         score_t score = BackwardReferenceScoreUsingLastDistance(len);
    189         if (best_score < score) {
    190           if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
    191           if (best_score < score) {
    192             best_score = score;
    193             best_len = len;
    194             out->len = best_len;
    195             out->distance = backward;
    196             out->score = best_score;
    197           }
    198         }
    199       }
    200     }
    201   }
    202   {
    203     const size_t bank = key & (NUM_BANKS - 1);
    204     size_t backward = 0;
    205     size_t hops = self->max_hops;
    206     size_t delta = cur_ix - self->addr[key];
    207     size_t slot = self->head[key];
    208     while (hops--) {
    209       size_t prev_ix;
    210       size_t last = slot;
    211       backward += delta;
    212       if (backward > max_backward || (CAPPED_CHAINS && !delta)) break;
    213       prev_ix = (cur_ix - backward) & ring_buffer_mask;
    214       slot = self->banks[bank].slots[last].next;
    215       delta = self->banks[bank].slots[last].delta;
    216       if (cur_ix_masked + best_len > ring_buffer_mask ||
    217           prev_ix + best_len > ring_buffer_mask ||
    218           data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
    219         continue;
    220       }
    221       {
    222         const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
    223                                                     &data[cur_ix_masked],
    224                                                     max_length);
    225         if (len >= 4) {
    226           /* Comparing for >= 3 does not change the semantics, but just saves
    227              for a few unnecessary binary logarithms in backward reference
    228              score, since we are not interested in such short matches. */
    229           score_t score = BackwardReferenceScore(len, backward);
    230           if (best_score < score) {
    231             best_score = score;
    232             best_len = len;
    233             out->len = best_len;
    234             out->distance = backward;
    235             out->score = best_score;
    236           }
    237         }
    238       }
    239     }
    240     FN(Store)(handle, data, ring_buffer_mask, cur_ix);
    241   }
    242   if (out->score == min_score) {
    243     SearchInStaticDictionary(dictionary, dictionary_hash,
    244         handle, &data[cur_ix_masked], max_length, max_backward + gap, out,
    245         BROTLI_FALSE);
    246   }
    247 }
    248 
    249 #undef BANK_SIZE
    250 #undef BUCKET_SIZE
    251 #undef CAPPED_CHAINS
    252 
    253 #undef HashForgetfulChain
    254