1 /* 2 * LZMAEncoderNormal 3 * 4 * Authors: Lasse Collin <lasse.collin (at) tukaani.org> 5 * Igor Pavlov <http://7-zip.org/> 6 * 7 * This file has been put into the public domain. 8 * You can do whatever you want with this file. 9 */ 10 11 package org.tukaani.xz.lzma; 12 13 import org.tukaani.xz.lz.LZEncoder; 14 import org.tukaani.xz.lz.Matches; 15 import org.tukaani.xz.rangecoder.RangeEncoder; 16 17 final class LZMAEncoderNormal extends LZMAEncoder { 18 private static final int OPTS = 4096; 19 20 private static final int EXTRA_SIZE_BEFORE = OPTS; 21 private static final int EXTRA_SIZE_AFTER = OPTS; 22 23 private final Optimum[] opts = new Optimum[OPTS]; 24 private int optCur = 0; 25 private int optEnd = 0; 26 27 private Matches matches; 28 29 // These are fields solely to avoid allocating the objects again and 30 // again on each function call. 31 private final int[] repLens = new int[REPS]; 32 private final State nextState = new State(); 33 34 static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) { 35 return LZEncoder.getMemoryUsage(dictSize, 36 Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE), 37 EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf) 38 + OPTS * 64 / 1024; 39 } 40 41 LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb, 42 int dictSize, int extraSizeBefore, 43 int niceLen, int mf, int depthLimit) { 44 super(rc, LZEncoder.getInstance(dictSize, 45 Math.max(extraSizeBefore, 46 EXTRA_SIZE_BEFORE), 47 EXTRA_SIZE_AFTER, 48 niceLen, MATCH_LEN_MAX, 49 mf, depthLimit), 50 lc, lp, pb, dictSize, niceLen); 51 52 for (int i = 0; i < OPTS; ++i) 53 opts[i] = new Optimum(); 54 } 55 56 public void reset() { 57 optCur = 0; 58 optEnd = 0; 59 super.reset(); 60 } 61 62 /** 63 * Converts the opts array from backward indexes to forward indexes. 64 * Then it will be simple to get the next symbol from the array 65 * in later calls to <code>getNextSymbol()</code>. 66 */ 67 private int convertOpts() { 68 optEnd = optCur; 69 70 int optPrev = opts[optCur].optPrev; 71 72 do { 73 Optimum opt = opts[optCur]; 74 75 if (opt.prev1IsLiteral) { 76 opts[optPrev].optPrev = optCur; 77 opts[optPrev].backPrev = -1; 78 optCur = optPrev--; 79 80 if (opt.hasPrev2) { 81 opts[optPrev].optPrev = optPrev + 1; 82 opts[optPrev].backPrev = opt.backPrev2; 83 optCur = optPrev; 84 optPrev = opt.optPrev2; 85 } 86 } 87 88 int temp = opts[optPrev].optPrev; 89 opts[optPrev].optPrev = optCur; 90 optCur = optPrev; 91 optPrev = temp; 92 } while (optCur > 0); 93 94 optCur = opts[0].optPrev; 95 back = opts[optCur].backPrev; 96 return optCur; 97 } 98 99 int getNextSymbol() { 100 // If there are pending symbols from an earlier call to this 101 // function, return those symbols first. 102 if (optCur < optEnd) { 103 int len = opts[optCur].optPrev - optCur; 104 optCur = opts[optCur].optPrev; 105 back = opts[optCur].backPrev; 106 return len; 107 } 108 109 assert optCur == optEnd; 110 optCur = 0; 111 optEnd = 0; 112 back = -1; 113 114 if (readAhead == -1) 115 matches = getMatches(); 116 117 // Get the number of bytes available in the dictionary, but 118 // not more than the maximum match length. If there aren't 119 // enough bytes remaining to encode a match at all, return 120 // immediately to encode this byte as a literal. 121 int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX); 122 if (avail < MATCH_LEN_MIN) 123 return 1; 124 125 // Get the lengths of repeated matches. 126 int repBest = 0; 127 for (int rep = 0; rep < REPS; ++rep) { 128 repLens[rep] = lz.getMatchLen(reps[rep], avail); 129 130 if (repLens[rep] < MATCH_LEN_MIN) { 131 repLens[rep] = 0; 132 continue; 133 } 134 135 if (repLens[rep] > repLens[repBest]) 136 repBest = rep; 137 } 138 139 // Return if the best repeated match is at least niceLen bytes long. 140 if (repLens[repBest] >= niceLen) { 141 back = repBest; 142 skip(repLens[repBest] - 1); 143 return repLens[repBest]; 144 } 145 146 // Initialize mainLen and mainDist to the longest match found 147 // by the match finder. 148 int mainLen = 0; 149 int mainDist = 0; 150 if (matches.count > 0) { 151 mainLen = matches.len[matches.count - 1]; 152 mainDist = matches.dist[matches.count - 1]; 153 154 // Return if it is at least niceLen bytes long. 155 if (mainLen >= niceLen) { 156 back = mainDist + REPS; 157 skip(mainLen - 1); 158 return mainLen; 159 } 160 } 161 162 int curByte = lz.getByte(0); 163 int matchByte = lz.getByte(reps[0] + 1); 164 165 // If the match finder found no matches and this byte cannot be 166 // encoded as a repeated match (short or long), we must be return 167 // to have the byte encoded as a literal. 168 if (mainLen < MATCH_LEN_MIN && curByte != matchByte 169 && repLens[repBest] < MATCH_LEN_MIN) 170 return 1; 171 172 173 int pos = lz.getPos(); 174 int posState = pos & posMask; 175 176 // Calculate the price of encoding the current byte as a literal. 177 { 178 int prevByte = lz.getByte(1); 179 int literalPrice = literalEncoder.getPrice(curByte, matchByte, 180 prevByte, pos, state); 181 opts[1].set1(literalPrice, 0, -1); 182 } 183 184 int anyMatchPrice = getAnyMatchPrice(state, posState); 185 int anyRepPrice = getAnyRepPrice(anyMatchPrice, state); 186 187 // If it is possible to encode this byte as a short rep, see if 188 // it is cheaper than encoding it as a literal. 189 if (matchByte == curByte) { 190 int shortRepPrice = getShortRepPrice(anyRepPrice, 191 state, posState); 192 if (shortRepPrice < opts[1].price) 193 opts[1].set1(shortRepPrice, 0, 0); 194 } 195 196 // Return if there is neither normal nor long repeated match. Use 197 // a short match instead of a literal if is is possible and cheaper. 198 optEnd = Math.max(mainLen, repLens[repBest]); 199 if (optEnd < MATCH_LEN_MIN) { 200 assert optEnd == 0 : optEnd; 201 back = opts[1].backPrev; 202 return 1; 203 } 204 205 206 // Update the lookup tables for distances and lengths before using 207 // those price calculation functions. (The price function above 208 // don't need these tables.) 209 updatePrices(); 210 211 // Initialize the state and reps of this position in opts[]. 212 // updateOptStateAndReps() will need these to get the new 213 // state and reps for the next byte. 214 opts[0].state.set(state); 215 System.arraycopy(reps, 0, opts[0].reps, 0, REPS); 216 217 // Initialize the prices for latter opts that will be used below. 218 for (int i = optEnd; i >= MATCH_LEN_MIN; --i) 219 opts[i].reset(); 220 221 // Calculate the prices of repeated matches of all lengths. 222 for (int rep = 0; rep < REPS; ++rep) { 223 int repLen = repLens[rep]; 224 if (repLen < MATCH_LEN_MIN) 225 continue; 226 227 int longRepPrice = getLongRepPrice(anyRepPrice, rep, 228 state, posState); 229 do { 230 int price = longRepPrice + repLenEncoder.getPrice(repLen, 231 posState); 232 if (price < opts[repLen].price) 233 opts[repLen].set1(price, 0, rep); 234 } while (--repLen >= MATCH_LEN_MIN); 235 } 236 237 // Calculate the prices of normal matches that are longer than rep0. 238 { 239 int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN); 240 if (len <= mainLen) { 241 int normalMatchPrice = getNormalMatchPrice(anyMatchPrice, 242 state); 243 244 // Set i to the index of the shortest match that is 245 // at least len bytes long. 246 int i = 0; 247 while (len > matches.len[i]) 248 ++i; 249 250 while (true) { 251 int dist = matches.dist[i]; 252 int price = getMatchAndLenPrice(normalMatchPrice, 253 dist, len, posState); 254 if (price < opts[len].price) 255 opts[len].set1(price, 0, dist + REPS); 256 257 if (len == matches.len[i]) 258 if (++i == matches.count) 259 break; 260 261 ++len; 262 } 263 } 264 } 265 266 267 avail = Math.min(lz.getAvail(), OPTS - 1); 268 269 // Get matches for later bytes and optimize the use of LZMA symbols 270 // by calculating the prices and picking the cheapest symbol 271 // combinations. 272 while (++optCur < optEnd) { 273 matches = getMatches(); 274 if (matches.count > 0 275 && matches.len[matches.count - 1] >= niceLen) 276 break; 277 278 --avail; 279 ++pos; 280 posState = pos & posMask; 281 282 updateOptStateAndReps(); 283 anyMatchPrice = opts[optCur].price 284 + getAnyMatchPrice(opts[optCur].state, posState); 285 anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state); 286 287 calc1BytePrices(pos, posState, avail, anyRepPrice); 288 289 if (avail >= MATCH_LEN_MIN) { 290 int startLen = calcLongRepPrices(pos, posState, 291 avail, anyRepPrice); 292 if (matches.count > 0) 293 calcNormalMatchPrices(pos, posState, avail, 294 anyMatchPrice, startLen); 295 } 296 } 297 298 return convertOpts(); 299 } 300 301 /** 302 * Updates the state and reps for the current byte in the opts array. 303 */ 304 private void updateOptStateAndReps() { 305 int optPrev = opts[optCur].optPrev; 306 assert optPrev < optCur; 307 308 if (opts[optCur].prev1IsLiteral) { 309 --optPrev; 310 311 if (opts[optCur].hasPrev2) { 312 opts[optCur].state.set(opts[opts[optCur].optPrev2].state); 313 if (opts[optCur].backPrev2 < REPS) 314 opts[optCur].state.updateLongRep(); 315 else 316 opts[optCur].state.updateMatch(); 317 } else { 318 opts[optCur].state.set(opts[optPrev].state); 319 } 320 321 opts[optCur].state.updateLiteral(); 322 } else { 323 opts[optCur].state.set(opts[optPrev].state); 324 } 325 326 if (optPrev == optCur - 1) { 327 // Must be either a short rep or a literal. 328 assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1; 329 330 if (opts[optCur].backPrev == 0) 331 opts[optCur].state.updateShortRep(); 332 else 333 opts[optCur].state.updateLiteral(); 334 335 System.arraycopy(opts[optPrev].reps, 0, 336 opts[optCur].reps, 0, REPS); 337 } else { 338 int back; 339 if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) { 340 optPrev = opts[optCur].optPrev2; 341 back = opts[optCur].backPrev2; 342 opts[optCur].state.updateLongRep(); 343 } else { 344 back = opts[optCur].backPrev; 345 if (back < REPS) 346 opts[optCur].state.updateLongRep(); 347 else 348 opts[optCur].state.updateMatch(); 349 } 350 351 if (back < REPS) { 352 opts[optCur].reps[0] = opts[optPrev].reps[back]; 353 354 int rep; 355 for (rep = 1; rep <= back; ++rep) 356 opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1]; 357 358 for (; rep < REPS; ++rep) 359 opts[optCur].reps[rep] = opts[optPrev].reps[rep]; 360 } else { 361 opts[optCur].reps[0] = back - REPS; 362 System.arraycopy(opts[optPrev].reps, 0, 363 opts[optCur].reps, 1, REPS - 1); 364 } 365 } 366 } 367 368 /** 369 * Calculates prices of a literal, a short rep, and literal + rep0. 370 */ 371 private void calc1BytePrices(int pos, int posState, 372 int avail, int anyRepPrice) { 373 // This will be set to true if using a literal or a short rep. 374 boolean nextIsByte = false; 375 376 int curByte = lz.getByte(0); 377 int matchByte = lz.getByte(opts[optCur].reps[0] + 1); 378 379 // Try a literal. 380 int literalPrice = opts[optCur].price 381 + literalEncoder.getPrice(curByte, matchByte, lz.getByte(1), 382 pos, opts[optCur].state); 383 if (literalPrice < opts[optCur + 1].price) { 384 opts[optCur + 1].set1(literalPrice, optCur, -1); 385 nextIsByte = true; 386 } 387 388 // Try a short rep. 389 if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur 390 || opts[optCur + 1].backPrev != 0)) { 391 int shortRepPrice = getShortRepPrice(anyRepPrice, 392 opts[optCur].state, 393 posState); 394 if (shortRepPrice <= opts[optCur + 1].price) { 395 opts[optCur + 1].set1(shortRepPrice, optCur, 0); 396 nextIsByte = true; 397 } 398 } 399 400 // If neither a literal nor a short rep was the cheapest choice, 401 // try literal + long rep0. 402 if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) { 403 int lenLimit = Math.min(niceLen, avail - 1); 404 int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit); 405 406 if (len >= MATCH_LEN_MIN) { 407 nextState.set(opts[optCur].state); 408 nextState.updateLiteral(); 409 int nextPosState = (pos + 1) & posMask; 410 int price = literalPrice 411 + getLongRepAndLenPrice(0, len, 412 nextState, nextPosState); 413 414 int i = optCur + 1 + len; 415 while (optEnd < i) 416 opts[++optEnd].reset(); 417 418 if (price < opts[i].price) 419 opts[i].set2(price, optCur, 0); 420 } 421 } 422 } 423 424 /** 425 * Calculates prices of long rep and long rep + literal + rep0. 426 */ 427 private int calcLongRepPrices(int pos, int posState, 428 int avail, int anyRepPrice) { 429 int startLen = MATCH_LEN_MIN; 430 int lenLimit = Math.min(avail, niceLen); 431 432 for (int rep = 0; rep < REPS; ++rep) { 433 int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit); 434 if (len < MATCH_LEN_MIN) 435 continue; 436 437 while (optEnd < optCur + len) 438 opts[++optEnd].reset(); 439 440 int longRepPrice = getLongRepPrice(anyRepPrice, rep, 441 opts[optCur].state, posState); 442 443 for (int i = len; i >= MATCH_LEN_MIN; --i) { 444 int price = longRepPrice 445 + repLenEncoder.getPrice(i, posState); 446 if (price < opts[optCur + i].price) 447 opts[optCur + i].set1(price, optCur, rep); 448 } 449 450 if (rep == 0) 451 startLen = len + 1; 452 453 int len2Limit = Math.min(niceLen, avail - len - 1); 454 int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep], 455 len2Limit); 456 457 if (len2 >= MATCH_LEN_MIN) { 458 // Rep 459 int price = longRepPrice 460 + repLenEncoder.getPrice(len, posState); 461 nextState.set(opts[optCur].state); 462 nextState.updateLongRep(); 463 464 // Literal 465 int curByte = lz.getByte(len, 0); 466 int matchByte = lz.getByte(0); // lz.getByte(len, len) 467 int prevByte = lz.getByte(len, 1); 468 price += literalEncoder.getPrice(curByte, matchByte, prevByte, 469 pos + len, nextState); 470 nextState.updateLiteral(); 471 472 // Rep0 473 int nextPosState = (pos + len + 1) & posMask; 474 price += getLongRepAndLenPrice(0, len2, 475 nextState, nextPosState); 476 477 int i = optCur + len + 1 + len2; 478 while (optEnd < i) 479 opts[++optEnd].reset(); 480 481 if (price < opts[i].price) 482 opts[i].set3(price, optCur, rep, len, 0); 483 } 484 } 485 486 return startLen; 487 } 488 489 /** 490 * Calculates prices of a normal match and normal match + literal + rep0. 491 */ 492 private void calcNormalMatchPrices(int pos, int posState, int avail, 493 int anyMatchPrice, int startLen) { 494 // If the longest match is so long that it would not fit into 495 // the opts array, shorten the matches. 496 if (matches.len[matches.count - 1] > avail) { 497 matches.count = 0; 498 while (matches.len[matches.count] < avail) 499 ++matches.count; 500 501 matches.len[matches.count++] = avail; 502 } 503 504 if (matches.len[matches.count - 1] < startLen) 505 return; 506 507 while (optEnd < optCur + matches.len[matches.count - 1]) 508 opts[++optEnd].reset(); 509 510 int normalMatchPrice = getNormalMatchPrice(anyMatchPrice, 511 opts[optCur].state); 512 513 int match = 0; 514 while (startLen > matches.len[match]) 515 ++match; 516 517 for (int len = startLen; ; ++len) { 518 int dist = matches.dist[match]; 519 520 // Calculate the price of a match of len bytes from the nearest 521 // possible distance. 522 int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice, 523 dist, len, posState); 524 if (matchAndLenPrice < opts[optCur + len].price) 525 opts[optCur + len].set1(matchAndLenPrice, 526 optCur, dist + REPS); 527 528 if (len != matches.len[match]) 529 continue; 530 531 // Try match + literal + rep0. First get the length of the rep0. 532 int len2Limit = Math.min(niceLen, avail - len - 1); 533 int len2 = lz.getMatchLen(len + 1, dist, len2Limit); 534 535 if (len2 >= MATCH_LEN_MIN) { 536 nextState.set(opts[optCur].state); 537 nextState.updateMatch(); 538 539 // Literal 540 int curByte = lz.getByte(len, 0); 541 int matchByte = lz.getByte(0); // lz.getByte(len, len) 542 int prevByte = lz.getByte(len, 1); 543 int price = matchAndLenPrice 544 + literalEncoder.getPrice(curByte, matchByte, 545 prevByte, pos + len, 546 nextState); 547 nextState.updateLiteral(); 548 549 // Rep0 550 int nextPosState = (pos + len + 1) & posMask; 551 price += getLongRepAndLenPrice(0, len2, 552 nextState, nextPosState); 553 554 int i = optCur + len + 1 + len2; 555 while (optEnd < i) 556 opts[++optEnd].reset(); 557 558 if (price < opts[i].price) 559 opts[i].set3(price, optCur, dist + REPS, len, 0); 560 } 561 562 if (++match == matches.count) 563 break; 564 } 565 } 566 } 567