Home | History | Annotate | Download | only in lzma
      1 /*
      2  * LZMAEncoderNormal
      3  *
      4  * Authors: Lasse Collin <lasse.collin (at) tukaani.org>
      5  *          Igor Pavlov <http://7-zip.org/>
      6  *
      7  * This file has been put into the public domain.
      8  * You can do whatever you want with this file.
      9  */
     10 
     11 package org.tukaani.xz.lzma;
     12 
     13 import org.tukaani.xz.lz.LZEncoder;
     14 import org.tukaani.xz.lz.Matches;
     15 import org.tukaani.xz.rangecoder.RangeEncoder;
     16 
     17 final class LZMAEncoderNormal extends LZMAEncoder {
     18     private static final int OPTS = 4096;
     19 
     20     private static final int EXTRA_SIZE_BEFORE = OPTS;
     21     private static final int EXTRA_SIZE_AFTER = OPTS;
     22 
     23     private final Optimum[] opts = new Optimum[OPTS];
     24     private int optCur = 0;
     25     private int optEnd = 0;
     26 
     27     private Matches matches;
     28 
     29     // These are fields solely to avoid allocating the objects again and
     30     // again on each function call.
     31     private final int[] repLens = new int[REPS];
     32     private final State nextState = new State();
     33 
     34     static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) {
     35         return LZEncoder.getMemoryUsage(dictSize,
     36                    Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE),
     37                    EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf)
     38                + OPTS * 64 / 1024;
     39     }
     40 
     41     LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb,
     42                              int dictSize, int extraSizeBefore,
     43                              int niceLen, int mf, int depthLimit) {
     44         super(rc, LZEncoder.getInstance(dictSize,
     45                                         Math.max(extraSizeBefore,
     46                                                  EXTRA_SIZE_BEFORE),
     47                                         EXTRA_SIZE_AFTER,
     48                                         niceLen, MATCH_LEN_MAX,
     49                                         mf, depthLimit),
     50               lc, lp, pb, dictSize, niceLen);
     51 
     52         for (int i = 0; i < OPTS; ++i)
     53             opts[i] = new Optimum();
     54     }
     55 
     56     public void reset() {
     57         optCur = 0;
     58         optEnd = 0;
     59         super.reset();
     60     }
     61 
     62     /**
     63      * Converts the opts array from backward indexes to forward indexes.
     64      * Then it will be simple to get the next symbol from the array
     65      * in later calls to <code>getNextSymbol()</code>.
     66      */
     67     private int convertOpts() {
     68         optEnd = optCur;
     69 
     70         int optPrev = opts[optCur].optPrev;
     71 
     72         do {
     73             Optimum opt = opts[optCur];
     74 
     75             if (opt.prev1IsLiteral) {
     76                 opts[optPrev].optPrev = optCur;
     77                 opts[optPrev].backPrev = -1;
     78                 optCur = optPrev--;
     79 
     80                 if (opt.hasPrev2) {
     81                     opts[optPrev].optPrev = optPrev + 1;
     82                     opts[optPrev].backPrev = opt.backPrev2;
     83                     optCur = optPrev;
     84                     optPrev = opt.optPrev2;
     85                 }
     86             }
     87 
     88             int temp = opts[optPrev].optPrev;
     89             opts[optPrev].optPrev = optCur;
     90             optCur = optPrev;
     91             optPrev = temp;
     92         } while (optCur > 0);
     93 
     94         optCur = opts[0].optPrev;
     95         back = opts[optCur].backPrev;
     96         return optCur;
     97     }
     98 
     99     int getNextSymbol() {
    100         // If there are pending symbols from an earlier call to this
    101         // function, return those symbols first.
    102         if (optCur < optEnd) {
    103             int len = opts[optCur].optPrev - optCur;
    104             optCur = opts[optCur].optPrev;
    105             back = opts[optCur].backPrev;
    106             return len;
    107         }
    108 
    109         assert optCur == optEnd;
    110         optCur = 0;
    111         optEnd = 0;
    112         back = -1;
    113 
    114         if (readAhead == -1)
    115             matches = getMatches();
    116 
    117         // Get the number of bytes available in the dictionary, but
    118         // not more than the maximum match length. If there aren't
    119         // enough bytes remaining to encode a match at all, return
    120         // immediately to encode this byte as a literal.
    121         int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX);
    122         if (avail < MATCH_LEN_MIN)
    123             return 1;
    124 
    125         // Get the lengths of repeated matches.
    126         int repBest = 0;
    127         for (int rep = 0; rep < REPS; ++rep) {
    128             repLens[rep] = lz.getMatchLen(reps[rep], avail);
    129 
    130             if (repLens[rep] < MATCH_LEN_MIN) {
    131                 repLens[rep] = 0;
    132                 continue;
    133             }
    134 
    135             if (repLens[rep] > repLens[repBest])
    136                 repBest = rep;
    137         }
    138 
    139         // Return if the best repeated match is at least niceLen bytes long.
    140         if (repLens[repBest] >= niceLen) {
    141             back = repBest;
    142             skip(repLens[repBest] - 1);
    143             return repLens[repBest];
    144         }
    145 
    146         // Initialize mainLen and mainDist to the longest match found
    147         // by the match finder.
    148         int mainLen = 0;
    149         int mainDist = 0;
    150         if (matches.count > 0) {
    151             mainLen = matches.len[matches.count - 1];
    152             mainDist = matches.dist[matches.count - 1];
    153 
    154             // Return if it is at least niceLen bytes long.
    155             if (mainLen >= niceLen) {
    156                 back = mainDist + REPS;
    157                 skip(mainLen - 1);
    158                 return mainLen;
    159             }
    160         }
    161 
    162         int curByte = lz.getByte(0);
    163         int matchByte = lz.getByte(reps[0] + 1);
    164 
    165         // If the match finder found no matches and this byte cannot be
    166         // encoded as a repeated match (short or long), we must be return
    167         // to have the byte encoded as a literal.
    168         if (mainLen < MATCH_LEN_MIN && curByte != matchByte
    169                 && repLens[repBest] < MATCH_LEN_MIN)
    170             return 1;
    171 
    172 
    173         int pos = lz.getPos();
    174         int posState = pos & posMask;
    175 
    176         // Calculate the price of encoding the current byte as a literal.
    177         {
    178             int prevByte = lz.getByte(1);
    179             int literalPrice = literalEncoder.getPrice(curByte, matchByte,
    180                                                        prevByte, pos, state);
    181             opts[1].set1(literalPrice, 0, -1);
    182         }
    183 
    184         int anyMatchPrice = getAnyMatchPrice(state, posState);
    185         int anyRepPrice = getAnyRepPrice(anyMatchPrice, state);
    186 
    187         // If it is possible to encode this byte as a short rep, see if
    188         // it is cheaper than encoding it as a literal.
    189         if (matchByte == curByte) {
    190             int shortRepPrice = getShortRepPrice(anyRepPrice,
    191                                                  state, posState);
    192             if (shortRepPrice < opts[1].price)
    193                 opts[1].set1(shortRepPrice, 0, 0);
    194         }
    195 
    196         // Return if there is neither normal nor long repeated match. Use
    197         // a short match instead of a literal if is is possible and cheaper.
    198         optEnd = Math.max(mainLen, repLens[repBest]);
    199         if (optEnd < MATCH_LEN_MIN) {
    200             assert optEnd == 0 : optEnd;
    201             back = opts[1].backPrev;
    202             return 1;
    203         }
    204 
    205 
    206         // Update the lookup tables for distances and lengths before using
    207         // those price calculation functions. (The price function above
    208         // don't need these tables.)
    209         updatePrices();
    210 
    211         // Initialize the state and reps of this position in opts[].
    212         // updateOptStateAndReps() will need these to get the new
    213         // state and reps for the next byte.
    214         opts[0].state.set(state);
    215         System.arraycopy(reps, 0, opts[0].reps, 0, REPS);
    216 
    217         // Initialize the prices for latter opts that will be used below.
    218         for (int i = optEnd; i >= MATCH_LEN_MIN; --i)
    219             opts[i].reset();
    220 
    221         // Calculate the prices of repeated matches of all lengths.
    222         for (int rep = 0; rep < REPS; ++rep) {
    223             int repLen = repLens[rep];
    224             if (repLen < MATCH_LEN_MIN)
    225                 continue;
    226 
    227             int longRepPrice = getLongRepPrice(anyRepPrice, rep,
    228                                                state, posState);
    229             do {
    230                 int price = longRepPrice + repLenEncoder.getPrice(repLen,
    231                                                                   posState);
    232                 if (price < opts[repLen].price)
    233                     opts[repLen].set1(price, 0, rep);
    234             } while (--repLen >= MATCH_LEN_MIN);
    235         }
    236 
    237         // Calculate the prices of normal matches that are longer than rep0.
    238         {
    239             int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN);
    240             if (len <= mainLen) {
    241                 int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
    242                                                            state);
    243 
    244                 // Set i to the index of the shortest match that is
    245                 // at least len bytes long.
    246                 int i = 0;
    247                 while (len > matches.len[i])
    248                     ++i;
    249 
    250                 while (true) {
    251                     int dist = matches.dist[i];
    252                     int price = getMatchAndLenPrice(normalMatchPrice,
    253                                                     dist, len, posState);
    254                     if (price < opts[len].price)
    255                         opts[len].set1(price, 0, dist + REPS);
    256 
    257                     if (len == matches.len[i])
    258                         if (++i == matches.count)
    259                             break;
    260 
    261                     ++len;
    262                 }
    263             }
    264         }
    265 
    266 
    267         avail = Math.min(lz.getAvail(), OPTS - 1);
    268 
    269         // Get matches for later bytes and optimize the use of LZMA symbols
    270         // by calculating the prices and picking the cheapest symbol
    271         // combinations.
    272         while (++optCur < optEnd) {
    273             matches = getMatches();
    274             if (matches.count > 0
    275                     && matches.len[matches.count - 1] >= niceLen)
    276                 break;
    277 
    278             --avail;
    279             ++pos;
    280             posState = pos & posMask;
    281 
    282             updateOptStateAndReps();
    283             anyMatchPrice = opts[optCur].price
    284                             + getAnyMatchPrice(opts[optCur].state, posState);
    285             anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state);
    286 
    287             calc1BytePrices(pos, posState, avail, anyRepPrice);
    288 
    289             if (avail >= MATCH_LEN_MIN) {
    290                 int startLen = calcLongRepPrices(pos, posState,
    291                                                  avail, anyRepPrice);
    292                 if (matches.count > 0)
    293                     calcNormalMatchPrices(pos, posState, avail,
    294                                           anyMatchPrice, startLen);
    295             }
    296         }
    297 
    298         return convertOpts();
    299     }
    300 
    301     /**
    302      * Updates the state and reps for the current byte in the opts array.
    303      */
    304     private void updateOptStateAndReps() {
    305         int optPrev = opts[optCur].optPrev;
    306         assert optPrev < optCur;
    307 
    308         if (opts[optCur].prev1IsLiteral) {
    309             --optPrev;
    310 
    311             if (opts[optCur].hasPrev2) {
    312                 opts[optCur].state.set(opts[opts[optCur].optPrev2].state);
    313                 if (opts[optCur].backPrev2 < REPS)
    314                     opts[optCur].state.updateLongRep();
    315                 else
    316                     opts[optCur].state.updateMatch();
    317             } else {
    318                 opts[optCur].state.set(opts[optPrev].state);
    319             }
    320 
    321             opts[optCur].state.updateLiteral();
    322         } else {
    323             opts[optCur].state.set(opts[optPrev].state);
    324         }
    325 
    326         if (optPrev == optCur - 1) {
    327             // Must be either a short rep or a literal.
    328             assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1;
    329 
    330             if (opts[optCur].backPrev == 0)
    331                 opts[optCur].state.updateShortRep();
    332             else
    333                 opts[optCur].state.updateLiteral();
    334 
    335             System.arraycopy(opts[optPrev].reps, 0,
    336                              opts[optCur].reps, 0, REPS);
    337         } else {
    338             int back;
    339             if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) {
    340                 optPrev = opts[optCur].optPrev2;
    341                 back = opts[optCur].backPrev2;
    342                 opts[optCur].state.updateLongRep();
    343             } else {
    344                 back = opts[optCur].backPrev;
    345                 if (back < REPS)
    346                     opts[optCur].state.updateLongRep();
    347                 else
    348                     opts[optCur].state.updateMatch();
    349             }
    350 
    351             if (back < REPS) {
    352                 opts[optCur].reps[0] = opts[optPrev].reps[back];
    353 
    354                 int rep;
    355                 for (rep = 1; rep <= back; ++rep)
    356                     opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1];
    357 
    358                 for (; rep < REPS; ++rep)
    359                     opts[optCur].reps[rep] = opts[optPrev].reps[rep];
    360             } else {
    361                 opts[optCur].reps[0] = back - REPS;
    362                 System.arraycopy(opts[optPrev].reps, 0,
    363                                  opts[optCur].reps, 1, REPS - 1);
    364             }
    365         }
    366     }
    367 
    368     /**
    369      * Calculates prices of a literal, a short rep, and literal + rep0.
    370      */
    371     private void calc1BytePrices(int pos, int posState,
    372                                  int avail, int anyRepPrice) {
    373         // This will be set to true if using a literal or a short rep.
    374         boolean nextIsByte = false;
    375 
    376         int curByte = lz.getByte(0);
    377         int matchByte = lz.getByte(opts[optCur].reps[0] + 1);
    378 
    379         // Try a literal.
    380         int literalPrice = opts[optCur].price
    381                 + literalEncoder.getPrice(curByte, matchByte, lz.getByte(1),
    382                                           pos, opts[optCur].state);
    383         if (literalPrice < opts[optCur + 1].price) {
    384             opts[optCur + 1].set1(literalPrice, optCur, -1);
    385             nextIsByte = true;
    386         }
    387 
    388         // Try a short rep.
    389         if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur
    390                                       || opts[optCur + 1].backPrev != 0)) {
    391             int shortRepPrice = getShortRepPrice(anyRepPrice,
    392                                                  opts[optCur].state,
    393                                                  posState);
    394             if (shortRepPrice <= opts[optCur + 1].price) {
    395                 opts[optCur + 1].set1(shortRepPrice, optCur, 0);
    396                 nextIsByte = true;
    397             }
    398         }
    399 
    400         // If neither a literal nor a short rep was the cheapest choice,
    401         // try literal + long rep0.
    402         if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) {
    403             int lenLimit = Math.min(niceLen, avail - 1);
    404             int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit);
    405 
    406             if (len >= MATCH_LEN_MIN) {
    407                 nextState.set(opts[optCur].state);
    408                 nextState.updateLiteral();
    409                 int nextPosState = (pos + 1) & posMask;
    410                 int price = literalPrice
    411                             + getLongRepAndLenPrice(0, len,
    412                                                     nextState, nextPosState);
    413 
    414                 int i = optCur + 1 + len;
    415                 while (optEnd < i)
    416                     opts[++optEnd].reset();
    417 
    418                 if (price < opts[i].price)
    419                     opts[i].set2(price, optCur, 0);
    420             }
    421         }
    422     }
    423 
    424     /**
    425      * Calculates prices of long rep and long rep + literal + rep0.
    426      */
    427     private int calcLongRepPrices(int pos, int posState,
    428                                   int avail, int anyRepPrice) {
    429         int startLen = MATCH_LEN_MIN;
    430         int lenLimit = Math.min(avail, niceLen);
    431 
    432         for (int rep = 0; rep < REPS; ++rep) {
    433             int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit);
    434             if (len < MATCH_LEN_MIN)
    435                 continue;
    436 
    437             while (optEnd < optCur + len)
    438                 opts[++optEnd].reset();
    439 
    440             int longRepPrice = getLongRepPrice(anyRepPrice, rep,
    441                                                opts[optCur].state, posState);
    442 
    443             for (int i = len; i >= MATCH_LEN_MIN; --i) {
    444                 int price = longRepPrice
    445                             + repLenEncoder.getPrice(i, posState);
    446                 if (price < opts[optCur + i].price)
    447                     opts[optCur + i].set1(price, optCur, rep);
    448             }
    449 
    450             if (rep == 0)
    451                 startLen = len + 1;
    452 
    453             int len2Limit = Math.min(niceLen, avail - len - 1);
    454             int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep],
    455                                       len2Limit);
    456 
    457             if (len2 >= MATCH_LEN_MIN) {
    458                 // Rep
    459                 int price = longRepPrice
    460                             + repLenEncoder.getPrice(len, posState);
    461                 nextState.set(opts[optCur].state);
    462                 nextState.updateLongRep();
    463 
    464                 // Literal
    465                 int curByte = lz.getByte(len, 0);
    466                 int matchByte = lz.getByte(0); // lz.getByte(len, len)
    467                 int prevByte = lz.getByte(len, 1);
    468                 price += literalEncoder.getPrice(curByte, matchByte, prevByte,
    469                                                  pos + len, nextState);
    470                 nextState.updateLiteral();
    471 
    472                 // Rep0
    473                 int nextPosState = (pos + len + 1) & posMask;
    474                 price += getLongRepAndLenPrice(0, len2,
    475                                                nextState, nextPosState);
    476 
    477                 int i = optCur + len + 1 + len2;
    478                 while (optEnd < i)
    479                     opts[++optEnd].reset();
    480 
    481                 if (price < opts[i].price)
    482                     opts[i].set3(price, optCur, rep, len, 0);
    483             }
    484         }
    485 
    486         return startLen;
    487     }
    488 
    489     /**
    490      * Calculates prices of a normal match and normal match + literal + rep0.
    491      */
    492     private void calcNormalMatchPrices(int pos, int posState, int avail,
    493                                        int anyMatchPrice, int startLen) {
    494         // If the longest match is so long that it would not fit into
    495         // the opts array, shorten the matches.
    496         if (matches.len[matches.count - 1] > avail) {
    497             matches.count = 0;
    498             while (matches.len[matches.count] < avail)
    499                 ++matches.count;
    500 
    501             matches.len[matches.count++] = avail;
    502         }
    503 
    504         if (matches.len[matches.count - 1] < startLen)
    505             return;
    506 
    507         while (optEnd < optCur + matches.len[matches.count - 1])
    508             opts[++optEnd].reset();
    509 
    510         int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
    511                                                    opts[optCur].state);
    512 
    513         int match = 0;
    514         while (startLen > matches.len[match])
    515             ++match;
    516 
    517         for (int len = startLen; ; ++len) {
    518             int dist = matches.dist[match];
    519 
    520             // Calculate the price of a match of len bytes from the nearest
    521             // possible distance.
    522             int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice,
    523                                                        dist, len, posState);
    524             if (matchAndLenPrice < opts[optCur + len].price)
    525                 opts[optCur + len].set1(matchAndLenPrice,
    526                                         optCur, dist + REPS);
    527 
    528             if (len != matches.len[match])
    529                 continue;
    530 
    531             // Try match + literal + rep0. First get the length of the rep0.
    532             int len2Limit = Math.min(niceLen, avail - len - 1);
    533             int len2 = lz.getMatchLen(len + 1, dist, len2Limit);
    534 
    535             if (len2 >= MATCH_LEN_MIN) {
    536                 nextState.set(opts[optCur].state);
    537                 nextState.updateMatch();
    538 
    539                 // Literal
    540                 int curByte = lz.getByte(len, 0);
    541                 int matchByte = lz.getByte(0); // lz.getByte(len, len)
    542                 int prevByte = lz.getByte(len, 1);
    543                 int price = matchAndLenPrice
    544                         + literalEncoder.getPrice(curByte, matchByte,
    545                                                   prevByte, pos + len,
    546                                                   nextState);
    547                 nextState.updateLiteral();
    548 
    549                 // Rep0
    550                 int nextPosState = (pos + len + 1) & posMask;
    551                 price += getLongRepAndLenPrice(0, len2,
    552                                                nextState, nextPosState);
    553 
    554                 int i = optCur + len + 1 + len2;
    555                 while (optEnd < i)
    556                     opts[++optEnd].reset();
    557 
    558                 if (price < opts[i].price)
    559                     opts[i].set3(price, optCur, dist + REPS, len, 0);
    560             }
    561 
    562             if (++match == matches.count)
    563                 break;
    564         }
    565     }
    566 }
    567