1 /* 2 * LZEncoder 3 * 4 * Authors: Lasse Collin <lasse.collin (at) tukaani.org> 5 * Igor Pavlov <http://7-zip.org/> 6 * 7 * This file has been put into the public domain. 8 * You can do whatever you want with this file. 9 */ 10 11 package org.tukaani.xz.lz; 12 13 import java.io.OutputStream; 14 import java.io.IOException; 15 16 public abstract class LZEncoder { 17 public static final int MF_HC4 = 0x04; 18 public static final int MF_BT4 = 0x14; 19 20 /** 21 * Number of bytes to keep available before the current byte 22 * when moving the LZ window. 23 */ 24 private final int keepSizeBefore; 25 26 /** 27 * Number of bytes that must be available, the current byte included, 28 * to make hasEnoughData return true. Flushing and finishing are 29 * naturally exceptions to this since there cannot be any data after 30 * the end of the uncompressed input. 31 */ 32 private final int keepSizeAfter; 33 34 final int matchLenMax; 35 final int niceLen; 36 37 final byte[] buf; 38 39 int readPos = -1; 40 private int readLimit = -1; 41 private boolean finishing = false; 42 private int writePos = 0; 43 private int pendingSize = 0; 44 45 static void normalize(int[] positions, int normalizationOffset) { 46 for (int i = 0; i < positions.length; ++i) { 47 if (positions[i] <= normalizationOffset) 48 positions[i] = 0; 49 else 50 positions[i] -= normalizationOffset; 51 } 52 } 53 54 /** 55 * Gets the size of the LZ window buffer that needs to be allocated. 56 */ 57 private static int getBufSize( 58 int dictSize, int extraSizeBefore, int extraSizeAfter, 59 int matchLenMax) { 60 int keepSizeBefore = extraSizeBefore + dictSize; 61 int keepSizeAfter = extraSizeAfter + matchLenMax; 62 int reserveSize = Math.min(dictSize / 2 + (256 << 10), 512 << 20); 63 return keepSizeBefore + keepSizeAfter + reserveSize; 64 } 65 66 /** 67 * Gets approximate memory usage of the LZEncoder base structure and 68 * the match finder as kibibytes. 69 */ 70 public static int getMemoryUsage( 71 int dictSize, int extraSizeBefore, int extraSizeAfter, 72 int matchLenMax, int mf) { 73 // Buffer size + a little extra 74 int m = getBufSize(dictSize, extraSizeBefore, extraSizeAfter, 75 matchLenMax) / 1024 + 10; 76 77 switch (mf) { 78 case MF_HC4: 79 m += HC4.getMemoryUsage(dictSize); 80 break; 81 82 case MF_BT4: 83 m += BT4.getMemoryUsage(dictSize); 84 break; 85 86 default: 87 throw new IllegalArgumentException(); 88 } 89 90 return m; 91 } 92 93 /** 94 * Creates a new LZEncoder. 95 * <p> 96 * @param dictSize dictionary size 97 * 98 * @param extraSizeBefore 99 * number of bytes to keep available in the 100 * history in addition to dictSize 101 * 102 * @param extraSizeAfter 103 * number of bytes that must be available 104 * after current position + matchLenMax 105 * 106 * @param niceLen if a match of at least <code>niceLen</code> 107 * bytes is found, be happy with it and don't 108 * stop looking for longer matches 109 * 110 * @param matchLenMax don't test for matches longer than 111 * <code>matchLenMax</code> bytes 112 * 113 * @param mf match finder ID 114 * 115 * @param depthLimit match finder search depth limit 116 */ 117 public static LZEncoder getInstance( 118 int dictSize, int extraSizeBefore, int extraSizeAfter, 119 int niceLen, int matchLenMax, int mf, int depthLimit) { 120 switch (mf) { 121 case MF_HC4: 122 return new HC4(dictSize, extraSizeBefore, extraSizeAfter, 123 niceLen, matchLenMax, depthLimit); 124 125 case MF_BT4: 126 return new BT4(dictSize, extraSizeBefore, extraSizeAfter, 127 niceLen, matchLenMax, depthLimit); 128 } 129 130 throw new IllegalArgumentException(); 131 } 132 133 /** 134 * Creates a new LZEncoder. See <code>getInstance</code>. 135 */ 136 LZEncoder(int dictSize, int extraSizeBefore, int extraSizeAfter, 137 int niceLen, int matchLenMax) { 138 buf = new byte[getBufSize(dictSize, extraSizeBefore, extraSizeAfter, 139 matchLenMax)]; 140 141 keepSizeBefore = extraSizeBefore + dictSize; 142 keepSizeAfter = extraSizeAfter + matchLenMax; 143 144 this.matchLenMax = matchLenMax; 145 this.niceLen = niceLen; 146 } 147 148 /** 149 * Sets a preset dictionary. If a preset dictionary is wanted, this 150 * function must be called immediately after creating the LZEncoder 151 * before any data has been encoded. 152 */ 153 public void setPresetDict(int dictSize, byte[] presetDict) { 154 assert !isStarted(); 155 assert writePos == 0; 156 157 if (presetDict != null) { 158 // If the preset dictionary buffer is bigger than the dictionary 159 // size, copy only the tail of the preset dictionary. 160 int copySize = Math.min(presetDict.length, dictSize); 161 int offset = presetDict.length - copySize; 162 System.arraycopy(presetDict, offset, buf, 0, copySize); 163 writePos += copySize; 164 skip(copySize); 165 } 166 } 167 168 /** 169 * Moves data from the end of the buffer to the beginning, discarding 170 * old data and making space for new input. 171 */ 172 private void moveWindow() { 173 // Align the move to a multiple of 16 bytes. LZMA2 needs this 174 // because it uses the lowest bits from readPos to get the 175 // alignment of the uncompressed data. 176 int moveOffset = (readPos + 1 - keepSizeBefore) & ~15; 177 int moveSize = writePos - moveOffset; 178 System.arraycopy(buf, moveOffset, buf, 0, moveSize); 179 180 readPos -= moveOffset; 181 readLimit -= moveOffset; 182 writePos -= moveOffset; 183 } 184 185 /** 186 * Copies new data into the LZEncoder's buffer. 187 */ 188 public int fillWindow(byte[] in, int off, int len) { 189 assert !finishing; 190 191 // Move the sliding window if needed. 192 if (readPos >= buf.length - keepSizeAfter) 193 moveWindow(); 194 195 // Try to fill the dictionary buffer. If it becomes full, 196 // some of the input bytes may be left unused. 197 if (len > buf.length - writePos) 198 len = buf.length - writePos; 199 200 System.arraycopy(in, off, buf, writePos, len); 201 writePos += len; 202 203 // Set the new readLimit but only if there's enough data to allow 204 // encoding of at least one more byte. 205 if (writePos >= keepSizeAfter) 206 readLimit = writePos - keepSizeAfter; 207 208 processPendingBytes(); 209 210 // Tell the caller how much input we actually copied into 211 // the dictionary. 212 return len; 213 } 214 215 /** 216 * Process pending bytes remaining from preset dictionary initialization 217 * or encoder flush operation. 218 */ 219 private void processPendingBytes() { 220 // After flushing or setting a preset dictionary there will be 221 // pending data that hasn't been ran through the match finder yet. 222 // Run it through the match finder now if there is enough new data 223 // available (readPos < readLimit) that the encoder may encode at 224 // least one more input byte. This way we don't waste any time 225 // looping in the match finder (and marking the same bytes as 226 // pending again) if the application provides very little new data 227 // per write call. 228 if (pendingSize > 0 && readPos < readLimit) { 229 readPos -= pendingSize; 230 int oldPendingSize = pendingSize; 231 pendingSize = 0; 232 skip(oldPendingSize); 233 assert pendingSize < oldPendingSize; 234 } 235 } 236 237 /** 238 * Returns true if at least one byte has already been run through 239 * the match finder. 240 */ 241 public boolean isStarted() { 242 return readPos != -1; 243 } 244 245 /** 246 * Marks that all the input needs to be made available in 247 * the encoded output. 248 */ 249 public void setFlushing() { 250 readLimit = writePos - 1; 251 processPendingBytes(); 252 } 253 254 /** 255 * Marks that there is no more input remaining. The read position 256 * can be advanced until the end of the data. 257 */ 258 public void setFinishing() { 259 readLimit = writePos - 1; 260 finishing = true; 261 processPendingBytes(); 262 } 263 264 /** 265 * Tests if there is enough input available to let the caller encode 266 * at least one more byte. 267 */ 268 public boolean hasEnoughData(int alreadyReadLen) { 269 return readPos - alreadyReadLen < readLimit; 270 } 271 272 public void copyUncompressed(OutputStream out, int backward, int len) 273 throws IOException { 274 out.write(buf, readPos + 1 - backward, len); 275 } 276 277 /** 278 * Get the number of bytes available, including the current byte. 279 * <p> 280 * Note that the result is undefined if <code>getMatches</code> or 281 * <code>skip</code> hasn't been called yet and no preset dictionary 282 * is being used. 283 */ 284 public int getAvail() { 285 assert isStarted(); 286 return writePos - readPos; 287 } 288 289 /** 290 * Gets the lowest four bits of the absolute offset of the current byte. 291 * Bits other than the lowest four are undefined. 292 */ 293 public int getPos() { 294 return readPos; 295 } 296 297 /** 298 * Gets the byte from the given backward offset. 299 * <p> 300 * The current byte is at <code>0</code>, the previous byte 301 * at <code>1</code> etc. To get a byte at zero-based distance, 302 * use <code>getByte(dist + 1)<code>. 303 * <p> 304 * This function is equivalent to <code>getByte(0, backward)</code>. 305 */ 306 public int getByte(int backward) { 307 return buf[readPos - backward] & 0xFF; 308 } 309 310 /** 311 * Gets the byte from the given forward minus backward offset. 312 * The forward offset is added to the current position. This lets 313 * one read bytes ahead of the current byte. 314 */ 315 public int getByte(int forward, int backward) { 316 return buf[readPos + forward - backward] & 0xFF; 317 } 318 319 /** 320 * Get the length of a match at the given distance. 321 * 322 * @param dist zero-based distance of the match to test 323 * @param lenLimit don't test for a match longer than this 324 * 325 * @return length of the match; it is in the range [0, lenLimit] 326 */ 327 public int getMatchLen(int dist, int lenLimit) { 328 int backPos = readPos - dist - 1; 329 int len = 0; 330 331 while (len < lenLimit && buf[readPos + len] == buf[backPos + len]) 332 ++len; 333 334 return len; 335 } 336 337 /** 338 * Get the length of a match at the given distance and forward offset. 339 * 340 * @param forward forward offset 341 * @param dist zero-based distance of the match to test 342 * @param lenLimit don't test for a match longer than this 343 * 344 * @return length of the match; it is in the range [0, lenLimit] 345 */ 346 public int getMatchLen(int forward, int dist, int lenLimit) { 347 int curPos = readPos + forward; 348 int backPos = curPos - dist - 1; 349 int len = 0; 350 351 while (len < lenLimit && buf[curPos + len] == buf[backPos + len]) 352 ++len; 353 354 return len; 355 } 356 357 /** 358 * Verifies that the matches returned by the match finder are valid. 359 * This is meant to be used in an assert statement. This is totally 360 * useless for actual encoding since match finder's results should 361 * naturally always be valid if it isn't broken. 362 * 363 * @param matches return value from <code>getMatches</code> 364 * 365 * @return true if matches are valid, false if match finder is broken 366 */ 367 public boolean verifyMatches(Matches matches) { 368 int lenLimit = Math.min(getAvail(), matchLenMax); 369 370 for (int i = 0; i < matches.count; ++i) 371 if (getMatchLen(matches.dist[i], lenLimit) != matches.len[i]) 372 return false; 373 374 return true; 375 } 376 377 /** 378 * Moves to the next byte, checks if there is enough input available, 379 * and returns the amount of input available. 380 * 381 * @param requiredForFlushing 382 * minimum number of available bytes when 383 * flushing; encoding may be continued with 384 * new input after flushing 385 * @param requiredForFinishing 386 * minimum number of available bytes when 387 * finishing; encoding must not be continued 388 * after finishing or the match finder state 389 * may be corrupt 390 * 391 * @return the number of bytes available or zero if there 392 * is not enough input available 393 */ 394 int movePos(int requiredForFlushing, int requiredForFinishing) { 395 assert requiredForFlushing >= requiredForFinishing; 396 397 ++readPos; 398 int avail = writePos - readPos; 399 400 if (avail < requiredForFlushing) { 401 if (avail < requiredForFinishing || !finishing) { 402 ++pendingSize; 403 avail = 0; 404 } 405 } 406 407 return avail; 408 } 409 410 /** 411 * Runs match finder for the next byte and returns the matches found. 412 */ 413 public abstract Matches getMatches(); 414 415 /** 416 * Skips the given number of bytes in the match finder. 417 */ 418 public abstract void skip(int len); 419 } 420