Home | History | Annotate | Download | only in lz
      1 /*
      2  * LZEncoder
      3  *
      4  * Authors: Lasse Collin <lasse.collin (at) tukaani.org>
      5  *          Igor Pavlov <http://7-zip.org/>
      6  *
      7  * This file has been put into the public domain.
      8  * You can do whatever you want with this file.
      9  */
     10 
     11 package org.tukaani.xz.lz;
     12 
     13 import java.io.OutputStream;
     14 import java.io.IOException;
     15 
     16 public abstract class LZEncoder {
     17     public static final int MF_HC4 = 0x04;
     18     public static final int MF_BT4 = 0x14;
     19 
     20     /**
     21      * Number of bytes to keep available before the current byte
     22      * when moving the LZ window.
     23      */
     24     private final int keepSizeBefore;
     25 
     26     /**
     27      * Number of bytes that must be available, the current byte included,
     28      * to make hasEnoughData return true. Flushing and finishing are
     29      * naturally exceptions to this since there cannot be any data after
     30      * the end of the uncompressed input.
     31      */
     32     private final int keepSizeAfter;
     33 
     34     final int matchLenMax;
     35     final int niceLen;
     36 
     37     final byte[] buf;
     38 
     39     int readPos = -1;
     40     private int readLimit = -1;
     41     private boolean finishing = false;
     42     private int writePos = 0;
     43     private int pendingSize = 0;
     44 
     45     static void normalize(int[] positions, int normalizationOffset) {
     46         for (int i = 0; i < positions.length; ++i) {
     47             if (positions[i] <= normalizationOffset)
     48                 positions[i] = 0;
     49             else
     50                 positions[i] -= normalizationOffset;
     51         }
     52     }
     53 
     54     /**
     55      * Gets the size of the LZ window buffer that needs to be allocated.
     56      */
     57     private static int getBufSize(
     58             int dictSize, int extraSizeBefore, int extraSizeAfter,
     59             int matchLenMax) {
     60         int keepSizeBefore = extraSizeBefore + dictSize;
     61         int keepSizeAfter = extraSizeAfter + matchLenMax;
     62         int reserveSize = Math.min(dictSize / 2 + (256 << 10), 512 << 20);
     63         return keepSizeBefore + keepSizeAfter + reserveSize;
     64     }
     65 
     66     /**
     67      * Gets approximate memory usage of the LZEncoder base structure and
     68      * the match finder as kibibytes.
     69      */
     70     public static int getMemoryUsage(
     71             int dictSize, int extraSizeBefore, int extraSizeAfter,
     72             int matchLenMax, int mf) {
     73         // Buffer size + a little extra
     74         int m = getBufSize(dictSize, extraSizeBefore, extraSizeAfter,
     75                            matchLenMax) / 1024 + 10;
     76 
     77         switch (mf) {
     78             case MF_HC4:
     79                 m += HC4.getMemoryUsage(dictSize);
     80                 break;
     81 
     82             case MF_BT4:
     83                 m += BT4.getMemoryUsage(dictSize);
     84                 break;
     85 
     86             default:
     87                 throw new IllegalArgumentException();
     88         }
     89 
     90         return m;
     91     }
     92 
     93     /**
     94      * Creates a new LZEncoder.
     95      * <p>
     96      * @param       dictSize    dictionary size
     97      *
     98      * @param       extraSizeBefore
     99      *                          number of bytes to keep available in the
    100      *                          history in addition to dictSize
    101      *
    102      * @param       extraSizeAfter
    103      *                          number of bytes that must be available
    104      *                          after current position + matchLenMax
    105      *
    106      * @param       niceLen     if a match of at least <code>niceLen</code>
    107      *                          bytes is found, be happy with it and don't
    108      *                          stop looking for longer matches
    109      *
    110      * @param       matchLenMax don't test for matches longer than
    111      *                          <code>matchLenMax</code> bytes
    112      *
    113      * @param       mf          match finder ID
    114      *
    115      * @param       depthLimit  match finder search depth limit
    116      */
    117     public static LZEncoder getInstance(
    118             int dictSize, int extraSizeBefore, int extraSizeAfter,
    119             int niceLen, int matchLenMax, int mf, int depthLimit) {
    120         switch (mf) {
    121             case MF_HC4:
    122                 return new HC4(dictSize, extraSizeBefore, extraSizeAfter,
    123                                niceLen, matchLenMax, depthLimit);
    124 
    125             case MF_BT4:
    126                 return new BT4(dictSize, extraSizeBefore, extraSizeAfter,
    127                                niceLen, matchLenMax, depthLimit);
    128         }
    129 
    130         throw new IllegalArgumentException();
    131     }
    132 
    133     /**
    134      * Creates a new LZEncoder. See <code>getInstance</code>.
    135      */
    136     LZEncoder(int dictSize, int extraSizeBefore, int extraSizeAfter,
    137               int niceLen, int matchLenMax) {
    138         buf = new byte[getBufSize(dictSize, extraSizeBefore, extraSizeAfter,
    139                                   matchLenMax)];
    140 
    141         keepSizeBefore = extraSizeBefore + dictSize;
    142         keepSizeAfter = extraSizeAfter + matchLenMax;
    143 
    144         this.matchLenMax = matchLenMax;
    145         this.niceLen = niceLen;
    146     }
    147 
    148     /**
    149      * Sets a preset dictionary. If a preset dictionary is wanted, this
    150      * function must be called immediately after creating the LZEncoder
    151      * before any data has been encoded.
    152      */
    153     public void setPresetDict(int dictSize, byte[] presetDict) {
    154         assert !isStarted();
    155         assert writePos == 0;
    156 
    157         if (presetDict != null) {
    158             // If the preset dictionary buffer is bigger than the dictionary
    159             // size, copy only the tail of the preset dictionary.
    160             int copySize = Math.min(presetDict.length, dictSize);
    161             int offset = presetDict.length - copySize;
    162             System.arraycopy(presetDict, offset, buf, 0, copySize);
    163             writePos += copySize;
    164             skip(copySize);
    165         }
    166     }
    167 
    168     /**
    169      * Moves data from the end of the buffer to the beginning, discarding
    170      * old data and making space for new input.
    171      */
    172     private void moveWindow() {
    173         // Align the move to a multiple of 16 bytes. LZMA2 needs this
    174         // because it uses the lowest bits from readPos to get the
    175         // alignment of the uncompressed data.
    176         int moveOffset = (readPos + 1 - keepSizeBefore) & ~15;
    177         int moveSize = writePos - moveOffset;
    178         System.arraycopy(buf, moveOffset, buf, 0, moveSize);
    179 
    180         readPos -= moveOffset;
    181         readLimit -= moveOffset;
    182         writePos -= moveOffset;
    183     }
    184 
    185     /**
    186      * Copies new data into the LZEncoder's buffer.
    187      */
    188     public int fillWindow(byte[] in, int off, int len) {
    189         assert !finishing;
    190 
    191         // Move the sliding window if needed.
    192         if (readPos >= buf.length - keepSizeAfter)
    193             moveWindow();
    194 
    195         // Try to fill the dictionary buffer. If it becomes full,
    196         // some of the input bytes may be left unused.
    197         if (len > buf.length - writePos)
    198             len = buf.length - writePos;
    199 
    200         System.arraycopy(in, off, buf, writePos, len);
    201         writePos += len;
    202 
    203         // Set the new readLimit but only if there's enough data to allow
    204         // encoding of at least one more byte.
    205         if (writePos >= keepSizeAfter)
    206             readLimit = writePos - keepSizeAfter;
    207 
    208         processPendingBytes();
    209 
    210         // Tell the caller how much input we actually copied into
    211         // the dictionary.
    212         return len;
    213     }
    214 
    215     /**
    216      * Process pending bytes remaining from preset dictionary initialization
    217      * or encoder flush operation.
    218      */
    219     private void processPendingBytes() {
    220         // After flushing or setting a preset dictionary there will be
    221         // pending data that hasn't been ran through the match finder yet.
    222         // Run it through the match finder now if there is enough new data
    223         // available (readPos < readLimit) that the encoder may encode at
    224         // least one more input byte. This way we don't waste any time
    225         // looping in the match finder (and marking the same bytes as
    226         // pending again) if the application provides very little new data
    227         // per write call.
    228         if (pendingSize > 0 && readPos < readLimit) {
    229             readPos -= pendingSize;
    230             int oldPendingSize = pendingSize;
    231             pendingSize = 0;
    232             skip(oldPendingSize);
    233             assert pendingSize < oldPendingSize;
    234         }
    235     }
    236 
    237     /**
    238      * Returns true if at least one byte has already been run through
    239      * the match finder.
    240      */
    241     public boolean isStarted() {
    242         return readPos != -1;
    243     }
    244 
    245     /**
    246      * Marks that all the input needs to be made available in
    247      * the encoded output.
    248      */
    249     public void setFlushing() {
    250         readLimit = writePos - 1;
    251         processPendingBytes();
    252     }
    253 
    254     /**
    255      * Marks that there is no more input remaining. The read position
    256      * can be advanced until the end of the data.
    257      */
    258     public void setFinishing() {
    259         readLimit = writePos - 1;
    260         finishing = true;
    261         processPendingBytes();
    262     }
    263 
    264     /**
    265      * Tests if there is enough input available to let the caller encode
    266      * at least one more byte.
    267      */
    268     public boolean hasEnoughData(int alreadyReadLen) {
    269         return readPos - alreadyReadLen < readLimit;
    270     }
    271 
    272     public void copyUncompressed(OutputStream out, int backward, int len)
    273             throws IOException {
    274         out.write(buf, readPos + 1 - backward, len);
    275     }
    276 
    277     /**
    278      * Get the number of bytes available, including the current byte.
    279      * <p>
    280      * Note that the result is undefined if <code>getMatches</code> or
    281      * <code>skip</code> hasn't been called yet and no preset dictionary
    282      * is being used.
    283      */
    284     public int getAvail() {
    285         assert isStarted();
    286         return writePos - readPos;
    287     }
    288 
    289     /**
    290      * Gets the lowest four bits of the absolute offset of the current byte.
    291      * Bits other than the lowest four are undefined.
    292      */
    293     public int getPos() {
    294         return readPos;
    295     }
    296 
    297     /**
    298      * Gets the byte from the given backward offset.
    299      * <p>
    300      * The current byte is at <code>0</code>, the previous byte
    301      * at <code>1</code> etc. To get a byte at zero-based distance,
    302      * use <code>getByte(dist + 1)<code>.
    303      * <p>
    304      * This function is equivalent to <code>getByte(0, backward)</code>.
    305      */
    306     public int getByte(int backward) {
    307         return buf[readPos - backward] & 0xFF;
    308     }
    309 
    310     /**
    311      * Gets the byte from the given forward minus backward offset.
    312      * The forward offset is added to the current position. This lets
    313      * one read bytes ahead of the current byte.
    314      */
    315     public int getByte(int forward, int backward) {
    316         return buf[readPos + forward - backward] & 0xFF;
    317     }
    318 
    319     /**
    320      * Get the length of a match at the given distance.
    321      *
    322      * @param       dist        zero-based distance of the match to test
    323      * @param       lenLimit    don't test for a match longer than this
    324      *
    325      * @return      length of the match; it is in the range [0, lenLimit]
    326      */
    327     public int getMatchLen(int dist, int lenLimit) {
    328         int backPos = readPos - dist - 1;
    329         int len = 0;
    330 
    331         while (len < lenLimit && buf[readPos + len] == buf[backPos + len])
    332             ++len;
    333 
    334         return len;
    335     }
    336 
    337     /**
    338      * Get the length of a match at the given distance and forward offset.
    339      *
    340      * @param       forward     forward offset
    341      * @param       dist        zero-based distance of the match to test
    342      * @param       lenLimit    don't test for a match longer than this
    343      *
    344      * @return      length of the match; it is in the range [0, lenLimit]
    345      */
    346     public int getMatchLen(int forward, int dist, int lenLimit) {
    347         int curPos = readPos + forward;
    348         int backPos = curPos - dist - 1;
    349         int len = 0;
    350 
    351         while (len < lenLimit && buf[curPos + len] == buf[backPos + len])
    352             ++len;
    353 
    354         return len;
    355     }
    356 
    357     /**
    358      * Verifies that the matches returned by the match finder are valid.
    359      * This is meant to be used in an assert statement. This is totally
    360      * useless for actual encoding since match finder's results should
    361      * naturally always be valid if it isn't broken.
    362      *
    363      * @param       matches     return value from <code>getMatches</code>
    364      *
    365      * @return      true if matches are valid, false if match finder is broken
    366      */
    367     public boolean verifyMatches(Matches matches) {
    368         int lenLimit = Math.min(getAvail(), matchLenMax);
    369 
    370         for (int i = 0; i < matches.count; ++i)
    371             if (getMatchLen(matches.dist[i], lenLimit) != matches.len[i])
    372                 return false;
    373 
    374         return true;
    375     }
    376 
    377     /**
    378      * Moves to the next byte, checks if there is enough input available,
    379      * and returns the amount of input available.
    380      *
    381      * @param       requiredForFlushing
    382      *                          minimum number of available bytes when
    383      *                          flushing; encoding may be continued with
    384      *                          new input after flushing
    385      * @param       requiredForFinishing
    386      *                          minimum number of available bytes when
    387      *                          finishing; encoding must not be continued
    388      *                          after finishing or the match finder state
    389      *                          may be corrupt
    390      *
    391      * @return      the number of bytes available or zero if there
    392      *              is not enough input available
    393      */
    394     int movePos(int requiredForFlushing, int requiredForFinishing) {
    395         assert requiredForFlushing >= requiredForFinishing;
    396 
    397         ++readPos;
    398         int avail = writePos - readPos;
    399 
    400         if (avail < requiredForFlushing) {
    401             if (avail < requiredForFinishing || !finishing) {
    402                 ++pendingSize;
    403                 avail = 0;
    404             }
    405         }
    406 
    407         return avail;
    408     }
    409 
    410     /**
    411      * Runs match finder for the next byte and returns the matches found.
    412      */
    413     public abstract Matches getMatches();
    414 
    415     /**
    416      * Skips the given number of bytes in the match finder.
    417      */
    418     public abstract void skip(int len);
    419 }
    420