Home | History | Annotate | Download | only in enc
      1 /* NOLINT(build/header_guard) */
      2 /* Copyright 2010 Google Inc. All Rights Reserved.
      3 
      4    Distributed under MIT license.
      5    See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
      6 */
      7 
      8 /* template parameters: FN */
      9 
     10 /* A (forgetful) hash table to the data seen by the compressor, to
     11    help create backward references to previous data.
     12 
     13    This is a hash map of fixed size (bucket_size_) to a ring buffer of
     14    fixed size (block_size_). The ring buffer contains the last block_size_
     15    index positions of the given hash key in the compressed data. */
     16 
     17 #define HashLongestMatch HASHER()
     18 
     19 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; }
     20 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { return 4; }
     21 
     22 /* HashBytes is the function that chooses the bucket to place the address in. */
     23 static uint32_t FN(HashBytes)(const uint8_t *data, const int shift) {
     24   uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32;
     25   /* The higher bits contain more mixture from the multiplication,
     26      so we take our results from there. */
     27   return (uint32_t)(h >> shift);
     28 }
     29 
     30 typedef struct HashLongestMatch {
     31   /* Number of hash buckets. */
     32   size_t bucket_size_;
     33   /* Only block_size_ newest backward references are kept,
     34      and the older are forgotten. */
     35   size_t block_size_;
     36   /* Left-shift for computing hash bucket index from hash value. */
     37   int hash_shift_;
     38   /* Mask for accessing entries in a block (in a ring-buffer manner). */
     39   uint32_t block_mask_;
     40 
     41   /* --- Dynamic size members --- */
     42 
     43   /* Number of entries in a particular bucket. */
     44   /* uint16_t num[bucket_size]; */
     45 
     46   /* Buckets containing block_size_ of backward references. */
     47   /* uint32_t* buckets[bucket_size * block_size]; */
     48 } HashLongestMatch;
     49 
     50 static BROTLI_INLINE HashLongestMatch* FN(Self)(HasherHandle handle) {
     51   return (HashLongestMatch*)&(GetHasherCommon(handle)[1]);
     52 }
     53 
     54 static BROTLI_INLINE uint16_t* FN(Num)(HashLongestMatch* self) {
     55   return (uint16_t*)(&self[1]);
     56 }
     57 
     58 static BROTLI_INLINE uint32_t* FN(Buckets)(HashLongestMatch* self) {
     59   return (uint32_t*)(&FN(Num)(self)[self->bucket_size_]);
     60 }
     61 
     62 static void FN(Initialize)(
     63     HasherHandle handle, const BrotliEncoderParams* params) {
     64   HasherCommon* common = GetHasherCommon(handle);
     65   HashLongestMatch* self = FN(Self)(handle);
     66   BROTLI_UNUSED(params);
     67   self->hash_shift_ = 32 - common->params.bucket_bits;
     68   self->bucket_size_ = (size_t)1 << common->params.bucket_bits;
     69   self->block_size_ = (size_t)1 << common->params.block_bits;
     70   self->block_mask_ = (uint32_t)(self->block_size_ - 1);
     71 }
     72 
     73 static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot,
     74     size_t input_size, const uint8_t* data) {
     75   HashLongestMatch* self = FN(Self)(handle);
     76   uint16_t* num = FN(Num)(self);
     77   /* Partial preparation is 100 times slower (per socket). */
     78   size_t partial_prepare_threshold = self->bucket_size_ >> 6;
     79   if (one_shot && input_size <= partial_prepare_threshold) {
     80     size_t i;
     81     for (i = 0; i < input_size; ++i) {
     82       const uint32_t key = FN(HashBytes)(&data[i], self->hash_shift_);
     83       num[key] = 0;
     84     }
     85   } else {
     86     memset(num, 0, self->bucket_size_ * sizeof(num[0]));
     87   }
     88 }
     89 
     90 static BROTLI_INLINE size_t FN(HashMemAllocInBytes)(
     91     const BrotliEncoderParams* params, BROTLI_BOOL one_shot,
     92     size_t input_size) {
     93   size_t bucket_size = (size_t)1 << params->hasher.bucket_bits;
     94   size_t block_size = (size_t)1 << params->hasher.block_bits;
     95   BROTLI_UNUSED(one_shot);
     96   BROTLI_UNUSED(input_size);
     97   return sizeof(HashLongestMatch) + bucket_size * (2 + 4 * block_size);
     98 }
     99 
    100 /* Look at 4 bytes at &data[ix & mask].
    101    Compute a hash from these, and store the value of ix at that position. */
    102 static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t* data,
    103     const size_t mask, const size_t ix) {
    104   HashLongestMatch* self = FN(Self)(handle);
    105   uint16_t* num = FN(Num)(self);
    106   const uint32_t key = FN(HashBytes)(&data[ix & mask], self->hash_shift_);
    107   const size_t minor_ix = num[key] & self->block_mask_;
    108   const size_t offset =
    109       minor_ix + (key << GetHasherCommon(handle)->params.block_bits);
    110   FN(Buckets)(self)[offset] = (uint32_t)ix;
    111   ++num[key];
    112 }
    113 
    114 static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle,
    115     const uint8_t *data, const size_t mask, const size_t ix_start,
    116     const size_t ix_end) {
    117   size_t i;
    118   for (i = ix_start; i < ix_end; ++i) {
    119     FN(Store)(handle, data, mask, i);
    120   }
    121 }
    122 
    123 static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle,
    124     size_t num_bytes, size_t position, const uint8_t* ringbuffer,
    125     size_t ringbuffer_mask) {
    126   if (num_bytes >= FN(HashTypeLength)() - 1 && position >= 3) {
    127     /* Prepare the hashes for three last bytes of the last write.
    128        These could not be calculated before, since they require knowledge
    129        of both the previous and the current block. */
    130     FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 3);
    131     FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 2);
    132     FN(Store)(handle, ringbuffer, ringbuffer_mask, position - 1);
    133   }
    134 }
    135 
    136 static BROTLI_INLINE void FN(PrepareDistanceCache)(
    137     HasherHandle handle, int* BROTLI_RESTRICT distance_cache) {
    138   PrepareDistanceCache(distance_cache,
    139       GetHasherCommon(handle)->params.num_last_distances_to_check);
    140 }
    141 
    142 /* Find a longest backward match of &data[cur_ix] up to the length of
    143    max_length and stores the position cur_ix in the hash table.
    144 
    145    REQUIRES: FN(PrepareDistanceCache) must be invoked for current distance cache
    146              values; if this method is invoked repeatedly with the same distance
    147              cache values, it is enough to invoke FN(PrepareDistanceCache) once.
    148 
    149    Does not look for matches longer than max_length.
    150    Does not look for matches further away than max_backward.
    151    Writes the best match into |out|.
    152    |out|->score is updated only if a better match is found. */
    153 static BROTLI_INLINE void FN(FindLongestMatch)(HasherHandle handle,
    154     const BrotliDictionary* dictionary, const uint16_t* dictionary_hash,
    155     const uint8_t* BROTLI_RESTRICT data, const size_t ring_buffer_mask,
    156     const int* BROTLI_RESTRICT distance_cache, const size_t cur_ix,
    157     const size_t max_length, const size_t max_backward, const size_t gap,
    158     HasherSearchResult* BROTLI_RESTRICT out) {
    159   HasherCommon* common = GetHasherCommon(handle);
    160   HashLongestMatch* self = FN(Self)(handle);
    161   uint16_t* num = FN(Num)(self);
    162   uint32_t* buckets = FN(Buckets)(self);
    163   const size_t cur_ix_masked = cur_ix & ring_buffer_mask;
    164   /* Don't accept a short copy from far away. */
    165   score_t min_score = out->score;
    166   score_t best_score = out->score;
    167   size_t best_len = out->len;
    168   size_t i;
    169   out->len = 0;
    170   out->len_code_delta = 0;
    171   /* Try last distance first. */
    172   for (i = 0; i < (size_t)common->params.num_last_distances_to_check; ++i) {
    173     const size_t backward = (size_t)distance_cache[i];
    174     size_t prev_ix = (size_t)(cur_ix - backward);
    175     if (prev_ix >= cur_ix) {
    176       continue;
    177     }
    178     if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
    179       continue;
    180     }
    181     prev_ix &= ring_buffer_mask;
    182 
    183     if (cur_ix_masked + best_len > ring_buffer_mask ||
    184         prev_ix + best_len > ring_buffer_mask ||
    185         data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
    186       continue;
    187     }
    188     {
    189       const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
    190                                                   &data[cur_ix_masked],
    191                                                   max_length);
    192       if (len >= 3 || (len == 2 && i < 2)) {
    193         /* Comparing for >= 2 does not change the semantics, but just saves for
    194            a few unnecessary binary logarithms in backward reference score,
    195            since we are not interested in such short matches. */
    196         score_t score = BackwardReferenceScoreUsingLastDistance(len);
    197         if (best_score < score) {
    198           if (i != 0) score -= BackwardReferencePenaltyUsingLastDistance(i);
    199           if (best_score < score) {
    200             best_score = score;
    201             best_len = len;
    202             out->len = best_len;
    203             out->distance = backward;
    204             out->score = best_score;
    205           }
    206         }
    207       }
    208     }
    209   }
    210   {
    211     const uint32_t key =
    212         FN(HashBytes)(&data[cur_ix_masked], self->hash_shift_);
    213     uint32_t* BROTLI_RESTRICT bucket =
    214         &buckets[key << common->params.block_bits];
    215     const size_t down =
    216         (num[key] > self->block_size_) ? (num[key] - self->block_size_) : 0u;
    217     for (i = num[key]; i > down;) {
    218       size_t prev_ix = bucket[--i & self->block_mask_];
    219       const size_t backward = cur_ix - prev_ix;
    220       if (BROTLI_PREDICT_FALSE(backward > max_backward)) {
    221         break;
    222       }
    223       prev_ix &= ring_buffer_mask;
    224       if (cur_ix_masked + best_len > ring_buffer_mask ||
    225           prev_ix + best_len > ring_buffer_mask ||
    226           data[cur_ix_masked + best_len] != data[prev_ix + best_len]) {
    227         continue;
    228       }
    229       {
    230         const size_t len = FindMatchLengthWithLimit(&data[prev_ix],
    231                                                     &data[cur_ix_masked],
    232                                                     max_length);
    233         if (len >= 4) {
    234           /* Comparing for >= 3 does not change the semantics, but just saves
    235              for a few unnecessary binary logarithms in backward reference
    236              score, since we are not interested in such short matches. */
    237           score_t score = BackwardReferenceScore(len, backward);
    238           if (best_score < score) {
    239             best_score = score;
    240             best_len = len;
    241             out->len = best_len;
    242             out->distance = backward;
    243             out->score = best_score;
    244           }
    245         }
    246       }
    247     }
    248     bucket[num[key] & self->block_mask_] = (uint32_t)cur_ix;
    249     ++num[key];
    250   }
    251   if (min_score == out->score) {
    252     SearchInStaticDictionary(dictionary, dictionary_hash,
    253         handle, &data[cur_ix_masked], max_length, max_backward + gap, out,
    254         BROTLI_FALSE);
    255   }
    256 }
    257 
    258 #undef HashLongestMatch
    259