1 /* NOLINT(build/header_guard) */ 2 /* Copyright 2016 Google Inc. All Rights Reserved. 3 4 Distributed under MIT license. 5 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 6 */ 7 8 /* template parameters: FN, BUCKET_BITS, MAX_TREE_COMP_LENGTH, 9 MAX_TREE_SEARCH_DEPTH */ 10 11 /* A (forgetful) hash table where each hash bucket contains a binary tree of 12 sequences whose first 4 bytes share the same hash code. 13 Each sequence is MAX_TREE_COMP_LENGTH long and is identified by its starting 14 position in the input data. The binary tree is sorted by the lexicographic 15 order of the sequences, and it is also a max-heap with respect to the 16 starting positions. */ 17 18 #define HashToBinaryTree HASHER() 19 20 #define BUCKET_SIZE (1 << BUCKET_BITS) 21 22 static BROTLI_INLINE size_t FN(HashTypeLength)(void) { return 4; } 23 static BROTLI_INLINE size_t FN(StoreLookahead)(void) { 24 return MAX_TREE_COMP_LENGTH; 25 } 26 27 static uint32_t FN(HashBytes)(const uint8_t *data) { 28 uint32_t h = BROTLI_UNALIGNED_LOAD32(data) * kHashMul32; 29 /* The higher bits contain more mixture from the multiplication, 30 so we take our results from there. */ 31 return h >> (32 - BUCKET_BITS); 32 } 33 34 typedef struct HashToBinaryTree { 35 /* The window size minus 1 */ 36 size_t window_mask_; 37 38 /* Hash table that maps the 4-byte hashes of the sequence to the last 39 position where this hash was found, which is the root of the binary 40 tree of sequences that share this hash bucket. */ 41 uint32_t buckets_[BUCKET_SIZE]; 42 43 /* A position used to mark a non-existent sequence, i.e. a tree is empty if 44 its root is at invalid_pos_ and a node is a leaf if both its children 45 are at invalid_pos_. */ 46 uint32_t invalid_pos_; 47 48 /* --- Dynamic size members --- */ 49 50 /* The union of the binary trees of each hash bucket. The root of the tree 51 corresponding to a hash is a sequence starting at buckets_[hash] and 52 the left and right children of a sequence starting at pos are 53 forest_[2 * pos] and forest_[2 * pos + 1]. */ 54 /* uint32_t forest[2 * num_nodes] */ 55 } HashToBinaryTree; 56 57 static BROTLI_INLINE HashToBinaryTree* FN(Self)(HasherHandle handle) { 58 return (HashToBinaryTree*)&(GetHasherCommon(handle)[1]); 59 } 60 61 static BROTLI_INLINE uint32_t* FN(Forest)(HashToBinaryTree* self) { 62 return (uint32_t*)(&self[1]); 63 } 64 65 static void FN(Initialize)( 66 HasherHandle handle, const BrotliEncoderParams* params) { 67 HashToBinaryTree* self = FN(Self)(handle); 68 self->window_mask_ = (1u << params->lgwin) - 1u; 69 self->invalid_pos_ = (uint32_t)(0 - self->window_mask_); 70 } 71 72 static void FN(Prepare)(HasherHandle handle, BROTLI_BOOL one_shot, 73 size_t input_size, const uint8_t* data) { 74 HashToBinaryTree* self = FN(Self)(handle); 75 uint32_t invalid_pos = self->invalid_pos_; 76 uint32_t i; 77 BROTLI_UNUSED(data); 78 BROTLI_UNUSED(one_shot); 79 BROTLI_UNUSED(input_size); 80 for (i = 0; i < BUCKET_SIZE; i++) { 81 self->buckets_[i] = invalid_pos; 82 } 83 } 84 85 static BROTLI_INLINE size_t FN(HashMemAllocInBytes)( 86 const BrotliEncoderParams* params, BROTLI_BOOL one_shot, 87 size_t input_size) { 88 size_t num_nodes = (size_t)1 << params->lgwin; 89 if (one_shot && input_size < num_nodes) { 90 num_nodes = input_size; 91 } 92 return sizeof(HashToBinaryTree) + 2 * sizeof(uint32_t) * num_nodes; 93 } 94 95 static BROTLI_INLINE size_t FN(LeftChildIndex)(HashToBinaryTree* self, 96 const size_t pos) { 97 return 2 * (pos & self->window_mask_); 98 } 99 100 static BROTLI_INLINE size_t FN(RightChildIndex)(HashToBinaryTree* self, 101 const size_t pos) { 102 return 2 * (pos & self->window_mask_) + 1; 103 } 104 105 /* Stores the hash of the next 4 bytes and in a single tree-traversal, the 106 hash bucket's binary tree is searched for matches and is re-rooted at the 107 current position. 108 109 If less than MAX_TREE_COMP_LENGTH data is available, the hash bucket of the 110 current position is searched for matches, but the state of the hash table 111 is not changed, since we can not know the final sorting order of the 112 current (incomplete) sequence. 113 114 This function must be called with increasing cur_ix positions. */ 115 static BROTLI_INLINE BackwardMatch* FN(StoreAndFindMatches)( 116 HashToBinaryTree* self, const uint8_t* const BROTLI_RESTRICT data, 117 const size_t cur_ix, const size_t ring_buffer_mask, const size_t max_length, 118 const size_t max_backward, size_t* const BROTLI_RESTRICT best_len, 119 BackwardMatch* BROTLI_RESTRICT matches) { 120 const size_t cur_ix_masked = cur_ix & ring_buffer_mask; 121 const size_t max_comp_len = 122 BROTLI_MIN(size_t, max_length, MAX_TREE_COMP_LENGTH); 123 const BROTLI_BOOL should_reroot_tree = 124 TO_BROTLI_BOOL(max_length >= MAX_TREE_COMP_LENGTH); 125 const uint32_t key = FN(HashBytes)(&data[cur_ix_masked]); 126 uint32_t* forest = FN(Forest)(self); 127 size_t prev_ix = self->buckets_[key]; 128 /* The forest index of the rightmost node of the left subtree of the new 129 root, updated as we traverse and re-root the tree of the hash bucket. */ 130 size_t node_left = FN(LeftChildIndex)(self, cur_ix); 131 /* The forest index of the leftmost node of the right subtree of the new 132 root, updated as we traverse and re-root the tree of the hash bucket. */ 133 size_t node_right = FN(RightChildIndex)(self, cur_ix); 134 /* The match length of the rightmost node of the left subtree of the new 135 root, updated as we traverse and re-root the tree of the hash bucket. */ 136 size_t best_len_left = 0; 137 /* The match length of the leftmost node of the right subtree of the new 138 root, updated as we traverse and re-root the tree of the hash bucket. */ 139 size_t best_len_right = 0; 140 size_t depth_remaining; 141 if (should_reroot_tree) { 142 self->buckets_[key] = (uint32_t)cur_ix; 143 } 144 for (depth_remaining = MAX_TREE_SEARCH_DEPTH; ; --depth_remaining) { 145 const size_t backward = cur_ix - prev_ix; 146 const size_t prev_ix_masked = prev_ix & ring_buffer_mask; 147 if (backward == 0 || backward > max_backward || depth_remaining == 0) { 148 if (should_reroot_tree) { 149 forest[node_left] = self->invalid_pos_; 150 forest[node_right] = self->invalid_pos_; 151 } 152 break; 153 } 154 { 155 const size_t cur_len = BROTLI_MIN(size_t, best_len_left, best_len_right); 156 size_t len; 157 assert(cur_len <= MAX_TREE_COMP_LENGTH); 158 len = cur_len + 159 FindMatchLengthWithLimit(&data[cur_ix_masked + cur_len], 160 &data[prev_ix_masked + cur_len], 161 max_length - cur_len); 162 assert(0 == memcmp(&data[cur_ix_masked], &data[prev_ix_masked], len)); 163 if (matches && len > *best_len) { 164 *best_len = len; 165 InitBackwardMatch(matches++, backward, len); 166 } 167 if (len >= max_comp_len) { 168 if (should_reroot_tree) { 169 forest[node_left] = forest[FN(LeftChildIndex)(self, prev_ix)]; 170 forest[node_right] = forest[FN(RightChildIndex)(self, prev_ix)]; 171 } 172 break; 173 } 174 if (data[cur_ix_masked + len] > data[prev_ix_masked + len]) { 175 best_len_left = len; 176 if (should_reroot_tree) { 177 forest[node_left] = (uint32_t)prev_ix; 178 } 179 node_left = FN(RightChildIndex)(self, prev_ix); 180 prev_ix = forest[node_left]; 181 } else { 182 best_len_right = len; 183 if (should_reroot_tree) { 184 forest[node_right] = (uint32_t)prev_ix; 185 } 186 node_right = FN(LeftChildIndex)(self, prev_ix); 187 prev_ix = forest[node_right]; 188 } 189 } 190 } 191 return matches; 192 } 193 194 /* Finds all backward matches of &data[cur_ix & ring_buffer_mask] up to the 195 length of max_length and stores the position cur_ix in the hash table. 196 197 Sets *num_matches to the number of matches found, and stores the found 198 matches in matches[0] to matches[*num_matches - 1]. The matches will be 199 sorted by strictly increasing length and (non-strictly) increasing 200 distance. */ 201 static BROTLI_INLINE size_t FN(FindAllMatches)(HasherHandle handle, 202 const BrotliDictionary* dictionary, const uint8_t* data, 203 const size_t ring_buffer_mask, const size_t cur_ix, 204 const size_t max_length, const size_t max_backward, const size_t gap, 205 const BrotliEncoderParams* params, BackwardMatch* matches) { 206 BackwardMatch* const orig_matches = matches; 207 const size_t cur_ix_masked = cur_ix & ring_buffer_mask; 208 size_t best_len = 1; 209 const size_t short_match_max_backward = 210 params->quality != HQ_ZOPFLIFICATION_QUALITY ? 16 : 64; 211 size_t stop = cur_ix - short_match_max_backward; 212 uint32_t dict_matches[BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN + 1]; 213 size_t i; 214 if (cur_ix < short_match_max_backward) { stop = 0; } 215 for (i = cur_ix - 1; i > stop && best_len <= 2; --i) { 216 size_t prev_ix = i; 217 const size_t backward = cur_ix - prev_ix; 218 if (BROTLI_PREDICT_FALSE(backward > max_backward)) { 219 break; 220 } 221 prev_ix &= ring_buffer_mask; 222 if (data[cur_ix_masked] != data[prev_ix] || 223 data[cur_ix_masked + 1] != data[prev_ix + 1]) { 224 continue; 225 } 226 { 227 const size_t len = 228 FindMatchLengthWithLimit(&data[prev_ix], &data[cur_ix_masked], 229 max_length); 230 if (len > best_len) { 231 best_len = len; 232 InitBackwardMatch(matches++, backward, len); 233 } 234 } 235 } 236 if (best_len < max_length) { 237 matches = FN(StoreAndFindMatches)(FN(Self)(handle), data, cur_ix, 238 ring_buffer_mask, max_length, max_backward, &best_len, matches); 239 } 240 for (i = 0; i <= BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN; ++i) { 241 dict_matches[i] = kInvalidMatch; 242 } 243 { 244 size_t minlen = BROTLI_MAX(size_t, 4, best_len + 1); 245 if (BrotliFindAllStaticDictionaryMatches(dictionary, 246 &data[cur_ix_masked], minlen, max_length, &dict_matches[0])) { 247 size_t maxlen = BROTLI_MIN( 248 size_t, BROTLI_MAX_STATIC_DICTIONARY_MATCH_LEN, max_length); 249 size_t l; 250 for (l = minlen; l <= maxlen; ++l) { 251 uint32_t dict_id = dict_matches[l]; 252 if (dict_id < kInvalidMatch) { 253 size_t distance = max_backward + gap + (dict_id >> 5) + 1; 254 if (distance < BROTLI_MAX_DISTANCE) { 255 InitDictionaryBackwardMatch(matches++, distance, l, dict_id & 31); 256 } 257 } 258 } 259 } 260 } 261 return (size_t)(matches - orig_matches); 262 } 263 264 /* Stores the hash of the next 4 bytes and re-roots the binary tree at the 265 current sequence, without returning any matches. 266 REQUIRES: ix + MAX_TREE_COMP_LENGTH <= end-of-current-block */ 267 static BROTLI_INLINE void FN(Store)(HasherHandle handle, const uint8_t *data, 268 const size_t mask, const size_t ix) { 269 HashToBinaryTree* self = FN(Self)(handle); 270 /* Maximum distance is window size - 16, see section 9.1. of the spec. */ 271 const size_t max_backward = self->window_mask_ - BROTLI_WINDOW_GAP + 1; 272 FN(StoreAndFindMatches)(self, data, ix, mask, MAX_TREE_COMP_LENGTH, 273 max_backward, NULL, NULL); 274 } 275 276 static BROTLI_INLINE void FN(StoreRange)(HasherHandle handle, 277 const uint8_t *data, const size_t mask, const size_t ix_start, 278 const size_t ix_end) { 279 size_t i = ix_start; 280 size_t j = ix_start; 281 if (ix_start + 63 <= ix_end) { 282 i = ix_end - 63; 283 } 284 if (ix_start + 512 <= i) { 285 for (; j < i; j += 8) { 286 FN(Store)(handle, data, mask, j); 287 } 288 } 289 for (; i < ix_end; ++i) { 290 FN(Store)(handle, data, mask, i); 291 } 292 } 293 294 static BROTLI_INLINE void FN(StitchToPreviousBlock)(HasherHandle handle, 295 size_t num_bytes, size_t position, const uint8_t* ringbuffer, 296 size_t ringbuffer_mask) { 297 HashToBinaryTree* self = FN(Self)(handle); 298 if (num_bytes >= FN(HashTypeLength)() - 1 && 299 position >= MAX_TREE_COMP_LENGTH) { 300 /* Store the last `MAX_TREE_COMP_LENGTH - 1` positions in the hasher. 301 These could not be calculated before, since they require knowledge 302 of both the previous and the current block. */ 303 const size_t i_start = position - MAX_TREE_COMP_LENGTH + 1; 304 const size_t i_end = BROTLI_MIN(size_t, position, i_start + num_bytes); 305 size_t i; 306 for (i = i_start; i < i_end; ++i) { 307 /* Maximum distance is window size - 16, see section 9.1. of the spec. 308 Furthermore, we have to make sure that we don't look further back 309 from the start of the next block than the window size, otherwise we 310 could access already overwritten areas of the ring-buffer. */ 311 const size_t max_backward = 312 self->window_mask_ - BROTLI_MAX(size_t, 313 BROTLI_WINDOW_GAP - 1, 314 position - i); 315 /* We know that i + MAX_TREE_COMP_LENGTH <= position + num_bytes, i.e. the 316 end of the current block and that we have at least 317 MAX_TREE_COMP_LENGTH tail in the ring-buffer. */ 318 FN(StoreAndFindMatches)(self, ringbuffer, i, ringbuffer_mask, 319 MAX_TREE_COMP_LENGTH, max_backward, NULL, NULL); 320 } 321 } 322 } 323 324 #undef BUCKET_SIZE 325 326 #undef HashToBinaryTree 327