1 /* LzmaSpec.c -- LZMA Reference Decoder 2 2013-07-28 : Igor Pavlov : Public domain */ 3 4 // This code implements LZMA file decoding according to LZMA specification. 5 // This code is not optimized for speed. 6 7 #include <stdio.h> 8 9 #ifdef _MSC_VER 10 #pragma warning(disable : 4710) // function not inlined 11 #pragma warning(disable : 4996) // This function or variable may be unsafe 12 #endif 13 14 typedef unsigned char Byte; 15 typedef unsigned short UInt16; 16 17 #ifdef _LZMA_UINT32_IS_ULONG 18 typedef unsigned long UInt32; 19 #else 20 typedef unsigned int UInt32; 21 #endif 22 23 #if defined(_MSC_VER) || defined(__BORLANDC__) 24 typedef unsigned __int64 UInt64; 25 #else 26 typedef unsigned long long int UInt64; 27 #endif 28 29 30 struct CInputStream 31 { 32 FILE *File; 33 UInt64 Processed; 34 35 void Init() { Processed = 0; } 36 37 Byte ReadByte() 38 { 39 int c = getc(File); 40 if (c < 0) 41 throw "Unexpected end of file"; 42 Processed++; 43 return (Byte)c; 44 } 45 }; 46 47 48 struct COutStream 49 { 50 FILE *File; 51 UInt64 Processed; 52 53 void Init() { Processed = 0; } 54 55 void WriteByte(Byte b) 56 { 57 if (putc(b, File) == EOF) 58 throw "File writing error"; 59 Processed++; 60 } 61 }; 62 63 64 class COutWindow 65 { 66 Byte *Buf; 67 UInt32 Pos; 68 UInt32 Size; 69 bool IsFull; 70 71 public: 72 unsigned TotalPos; 73 COutStream OutStream; 74 75 COutWindow(): Buf(NULL) {} 76 ~COutWindow() { delete []Buf; } 77 78 void Create(UInt32 dictSize) 79 { 80 Buf = new Byte[dictSize]; 81 Pos = 0; 82 Size = dictSize; 83 IsFull = false; 84 TotalPos = 0; 85 } 86 87 void PutByte(Byte b) 88 { 89 TotalPos++; 90 Buf[Pos++] = b; 91 if (Pos == Size) 92 { 93 Pos = 0; 94 IsFull = true; 95 } 96 OutStream.WriteByte(b); 97 } 98 99 Byte GetByte(UInt32 dist) const 100 { 101 return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos]; 102 } 103 104 void CopyMatch(UInt32 dist, unsigned len) 105 { 106 for (; len > 0; len--) 107 PutByte(GetByte(dist)); 108 } 109 110 bool CheckDistance(UInt32 dist) const 111 { 112 return dist <= Pos || IsFull; 113 } 114 115 bool IsEmpty() const 116 { 117 return Pos == 0 && !IsFull; 118 } 119 }; 120 121 122 #define kNumBitModelTotalBits 11 123 #define kNumMoveBits 5 124 125 typedef UInt16 CProb; 126 127 #define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2) 128 129 #define INIT_PROBS(p) \ 130 { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; } 131 132 class CRangeDecoder 133 { 134 UInt32 Range; 135 UInt32 Code; 136 137 void Normalize(); 138 139 public: 140 141 CInputStream *InStream; 142 bool Corrupted; 143 144 void Init(); 145 bool IsFinishedOK() const { return Code == 0; } 146 147 UInt32 DecodeDirectBits(unsigned numBits); 148 unsigned DecodeBit(CProb *prob); 149 }; 150 151 void CRangeDecoder::Init() 152 { 153 Corrupted = false; 154 155 if (InStream->ReadByte() != 0) 156 Corrupted = true; 157 158 Range = 0xFFFFFFFF; 159 Code = 0; 160 for (int i = 0; i < 4; i++) 161 Code = (Code << 8) | InStream->ReadByte(); 162 163 if (Code == Range) 164 Corrupted = true; 165 } 166 167 #define kTopValue ((UInt32)1 << 24) 168 169 void CRangeDecoder::Normalize() 170 { 171 if (Range < kTopValue) 172 { 173 Range <<= 8; 174 Code = (Code << 8) | InStream->ReadByte(); 175 } 176 } 177 178 UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits) 179 { 180 UInt32 res = 0; 181 do 182 { 183 Range >>= 1; 184 Code -= Range; 185 UInt32 t = 0 - ((UInt32)Code >> 31); 186 Code += Range & t; 187 188 if (Code == Range) 189 Corrupted = true; 190 191 Normalize(); 192 res <<= 1; 193 res += t + 1; 194 } 195 while (--numBits); 196 return res; 197 } 198 199 unsigned CRangeDecoder::DecodeBit(CProb *prob) 200 { 201 unsigned v = *prob; 202 UInt32 bound = (Range >> kNumBitModelTotalBits) * v; 203 unsigned symbol; 204 if (Code < bound) 205 { 206 v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits; 207 Range = bound; 208 symbol = 0; 209 } 210 else 211 { 212 v -= v >> kNumMoveBits; 213 Code -= bound; 214 Range -= bound; 215 symbol = 1; 216 } 217 *prob = (CProb)v; 218 Normalize(); 219 return symbol; 220 } 221 222 223 unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc) 224 { 225 unsigned m = 1; 226 unsigned symbol = 0; 227 for (unsigned i = 0; i < numBits; i++) 228 { 229 unsigned bit = rc->DecodeBit(&probs[m]); 230 m <<= 1; 231 m += bit; 232 symbol |= (bit << i); 233 } 234 return symbol; 235 } 236 237 template <unsigned NumBits> 238 class CBitTreeDecoder 239 { 240 CProb Probs[(unsigned)1 << NumBits]; 241 242 public: 243 244 void Init() 245 { 246 INIT_PROBS(Probs); 247 } 248 249 unsigned Decode(CRangeDecoder *rc) 250 { 251 unsigned m = 1; 252 for (unsigned i = 0; i < NumBits; i++) 253 m = (m << 1) + rc->DecodeBit(&Probs[m]); 254 return m - ((unsigned)1 << NumBits); 255 } 256 257 unsigned ReverseDecode(CRangeDecoder *rc) 258 { 259 return BitTreeReverseDecode(Probs, NumBits, rc); 260 } 261 }; 262 263 #define kNumPosBitsMax 4 264 265 #define kNumStates 12 266 #define kNumLenToPosStates 4 267 #define kNumAlignBits 4 268 #define kStartPosModelIndex 4 269 #define kEndPosModelIndex 14 270 #define kNumFullDistances (1 << (kEndPosModelIndex >> 1)) 271 #define kMatchMinLen 2 272 273 class CLenDecoder 274 { 275 CProb Choice; 276 CProb Choice2; 277 CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax]; 278 CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax]; 279 CBitTreeDecoder<8> HighCoder; 280 281 public: 282 283 void Init() 284 { 285 Choice = PROB_INIT_VAL; 286 Choice2 = PROB_INIT_VAL; 287 HighCoder.Init(); 288 for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++) 289 { 290 LowCoder[i].Init(); 291 MidCoder[i].Init(); 292 } 293 } 294 295 unsigned Decode(CRangeDecoder *rc, unsigned posState) 296 { 297 if (rc->DecodeBit(&Choice) == 0) 298 return LowCoder[posState].Decode(rc); 299 if (rc->DecodeBit(&Choice2) == 0) 300 return 8 + MidCoder[posState].Decode(rc); 301 return 16 + HighCoder.Decode(rc); 302 } 303 }; 304 305 unsigned UpdateState_Literal(unsigned state) 306 { 307 if (state < 4) return 0; 308 else if (state < 10) return state - 3; 309 else return state - 6; 310 } 311 unsigned UpdateState_Match (unsigned state) { return state < 7 ? 7 : 10; } 312 unsigned UpdateState_Rep (unsigned state) { return state < 7 ? 8 : 11; } 313 unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; } 314 315 #define LZMA_DIC_MIN (1 << 12) 316 317 class CLzmaDecoder 318 { 319 public: 320 CRangeDecoder RangeDec; 321 COutWindow OutWindow; 322 323 bool markerIsMandatory; 324 unsigned lc, pb, lp; 325 UInt32 dictSize; 326 UInt32 dictSizeInProperties; 327 328 void DecodeProperties(const Byte *properties) 329 { 330 unsigned d = properties[0]; 331 if (d >= (9 * 5 * 5)) 332 throw "Incorrect LZMA properties"; 333 lc = d % 9; 334 d /= 9; 335 pb = d / 5; 336 lp = d % 5; 337 dictSizeInProperties = 0; 338 for (int i = 0; i < 4; i++) 339 dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i); 340 dictSize = dictSizeInProperties; 341 if (dictSize < LZMA_DIC_MIN) 342 dictSize = LZMA_DIC_MIN; 343 } 344 345 CLzmaDecoder(): LitProbs(NULL) {} 346 ~CLzmaDecoder() { delete []LitProbs; } 347 348 void Create() 349 { 350 OutWindow.Create(dictSize); 351 CreateLiterals(); 352 } 353 354 int Decode(bool unpackSizeDefined, UInt64 unpackSize); 355 356 private: 357 358 CProb *LitProbs; 359 360 void CreateLiterals() 361 { 362 LitProbs = new CProb[(UInt32)0x300 << (lc + lp)]; 363 } 364 365 void InitLiterals() 366 { 367 UInt32 num = (UInt32)0x300 << (lc + lp); 368 for (UInt32 i = 0; i < num; i++) 369 LitProbs[i] = PROB_INIT_VAL; 370 } 371 372 void DecodeLiteral(unsigned state, UInt32 rep0) 373 { 374 unsigned prevByte = 0; 375 if (!OutWindow.IsEmpty()) 376 prevByte = OutWindow.GetByte(1); 377 378 unsigned symbol = 1; 379 unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc)); 380 CProb *probs = &LitProbs[(UInt32)0x300 * litState]; 381 382 if (state >= 7) 383 { 384 unsigned matchByte = OutWindow.GetByte(rep0 + 1); 385 do 386 { 387 unsigned matchBit = (matchByte >> 7) & 1; 388 matchByte <<= 1; 389 unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]); 390 symbol = (symbol << 1) | bit; 391 if (matchBit != bit) 392 break; 393 } 394 while (symbol < 0x100); 395 } 396 while (symbol < 0x100) 397 symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]); 398 OutWindow.PutByte((Byte)(symbol - 0x100)); 399 } 400 401 CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates]; 402 CBitTreeDecoder<kNumAlignBits> AlignDecoder; 403 CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex]; 404 405 void InitDist() 406 { 407 for (unsigned i = 0; i < kNumLenToPosStates; i++) 408 PosSlotDecoder[i].Init(); 409 AlignDecoder.Init(); 410 INIT_PROBS(PosDecoders); 411 } 412 413 unsigned DecodeDistance(unsigned len) 414 { 415 unsigned lenState = len; 416 if (lenState > kNumLenToPosStates - 1) 417 lenState = kNumLenToPosStates - 1; 418 419 unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec); 420 if (posSlot < 4) 421 return posSlot; 422 423 unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1); 424 UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits); 425 if (posSlot < kEndPosModelIndex) 426 dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec); 427 else 428 { 429 dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits; 430 dist += AlignDecoder.ReverseDecode(&RangeDec); 431 } 432 return dist; 433 } 434 435 CProb IsMatch[kNumStates << kNumPosBitsMax]; 436 CProb IsRep[kNumStates]; 437 CProb IsRepG0[kNumStates]; 438 CProb IsRepG1[kNumStates]; 439 CProb IsRepG2[kNumStates]; 440 CProb IsRep0Long[kNumStates << kNumPosBitsMax]; 441 442 CLenDecoder LenDecoder; 443 CLenDecoder RepLenDecoder; 444 445 void Init() 446 { 447 InitLiterals(); 448 InitDist(); 449 450 INIT_PROBS(IsMatch); 451 INIT_PROBS(IsRep); 452 INIT_PROBS(IsRepG0); 453 INIT_PROBS(IsRepG1); 454 INIT_PROBS(IsRepG2); 455 INIT_PROBS(IsRep0Long); 456 457 LenDecoder.Init(); 458 RepLenDecoder.Init(); 459 } 460 }; 461 462 463 #define LZMA_RES_ERROR 0 464 #define LZMA_RES_FINISHED_WITH_MARKER 1 465 #define LZMA_RES_FINISHED_WITHOUT_MARKER 2 466 467 int CLzmaDecoder::Decode(bool unpackSizeDefined, UInt64 unpackSize) 468 { 469 Init(); 470 RangeDec.Init(); 471 472 UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0; 473 unsigned state = 0; 474 475 for (;;) 476 { 477 if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory) 478 if (RangeDec.IsFinishedOK()) 479 return LZMA_RES_FINISHED_WITHOUT_MARKER; 480 481 unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1); 482 483 if (RangeDec.DecodeBit(&IsMatch[(state << kNumPosBitsMax) + posState]) == 0) 484 { 485 if (unpackSizeDefined && unpackSize == 0) 486 return LZMA_RES_ERROR; 487 DecodeLiteral(state, rep0); 488 state = UpdateState_Literal(state); 489 unpackSize--; 490 continue; 491 } 492 493 unsigned len; 494 495 if (RangeDec.DecodeBit(&IsRep[state]) != 0) 496 { 497 if (unpackSizeDefined && unpackSize == 0) 498 return LZMA_RES_ERROR; 499 if (OutWindow.IsEmpty()) 500 return LZMA_RES_ERROR; 501 if (RangeDec.DecodeBit(&IsRepG0[state]) == 0) 502 { 503 if (RangeDec.DecodeBit(&IsRep0Long[(state << kNumPosBitsMax) + posState]) == 0) 504 { 505 state = UpdateState_ShortRep(state); 506 OutWindow.PutByte(OutWindow.GetByte(rep0 + 1)); 507 unpackSize--; 508 continue; 509 } 510 } 511 else 512 { 513 UInt32 dist; 514 if (RangeDec.DecodeBit(&IsRepG1[state]) == 0) 515 dist = rep1; 516 else 517 { 518 if (RangeDec.DecodeBit(&IsRepG2[state]) == 0) 519 dist = rep2; 520 else 521 { 522 dist = rep3; 523 rep3 = rep2; 524 } 525 rep2 = rep1; 526 } 527 rep1 = rep0; 528 rep0 = dist; 529 } 530 len = RepLenDecoder.Decode(&RangeDec, posState); 531 state = UpdateState_Rep(state); 532 } 533 else 534 { 535 rep3 = rep2; 536 rep2 = rep1; 537 rep1 = rep0; 538 len = LenDecoder.Decode(&RangeDec, posState); 539 state = UpdateState_Match(state); 540 rep0 = DecodeDistance(len); 541 if (rep0 == 0xFFFFFFFF) 542 return RangeDec.IsFinishedOK() ? 543 LZMA_RES_FINISHED_WITH_MARKER : 544 LZMA_RES_ERROR; 545 546 if (unpackSizeDefined && unpackSize == 0) 547 return LZMA_RES_ERROR; 548 if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0)) 549 return LZMA_RES_ERROR; 550 } 551 len += kMatchMinLen; 552 bool isError = false; 553 if (unpackSizeDefined && unpackSize < len) 554 { 555 len = (unsigned)unpackSize; 556 isError = true; 557 } 558 OutWindow.CopyMatch(rep0 + 1, len); 559 unpackSize -= len; 560 if (isError) 561 return LZMA_RES_ERROR; 562 } 563 } 564 565 static void Print(const char *s) 566 { 567 fputs(s, stdout); 568 } 569 570 static void PrintError(const char *s) 571 { 572 fputs(s, stderr); 573 } 574 575 576 #define CONVERT_INT_TO_STR(charType, tempSize) \ 577 578 void ConvertUInt64ToString(UInt64 val, char *s) 579 { 580 char temp[32]; 581 unsigned i = 0; 582 while (val >= 10) 583 { 584 temp[i++] = (char)('0' + (unsigned)(val % 10)); 585 val /= 10; 586 } 587 *s++ = (char)('0' + (unsigned)val); 588 while (i != 0) 589 { 590 i--; 591 *s++ = temp[i]; 592 } 593 *s = 0; 594 } 595 596 void PrintUInt64(const char *title, UInt64 v) 597 { 598 Print(title); 599 Print(" : "); 600 char s[32]; 601 ConvertUInt64ToString(v, s); 602 Print(s); 603 Print(" bytes \n"); 604 } 605 606 int main2(int numArgs, const char *args[]) 607 { 608 Print("\nLZMA Reference Decoder 9.31 : Igor Pavlov : Public domain : 2013-02-06\n"); 609 if (numArgs == 1) 610 Print("\nUse: lzmaSpec a.lzma outFile"); 611 612 if (numArgs != 3) 613 throw "you must specify two parameters"; 614 615 CInputStream inStream; 616 inStream.File = fopen(args[1], "rb"); 617 inStream.Init(); 618 if (inStream.File == 0) 619 throw "Can't open input file"; 620 621 CLzmaDecoder lzmaDecoder; 622 lzmaDecoder.OutWindow.OutStream.File = fopen(args[2], "wb+"); 623 lzmaDecoder.OutWindow.OutStream.Init(); 624 if (inStream.File == 0) 625 throw "Can't open output file"; 626 627 Byte header[13]; 628 int i; 629 for (i = 0; i < 13; i++) 630 header[i] = inStream.ReadByte(); 631 632 lzmaDecoder.DecodeProperties(header); 633 634 printf("\nlc=%d, lp=%d, pb=%d", lzmaDecoder.lc, lzmaDecoder.lp, lzmaDecoder.pb); 635 printf("\nDictionary Size in properties = %u", lzmaDecoder.dictSizeInProperties); 636 printf("\nDictionary Size for decoding = %u", lzmaDecoder.dictSize); 637 638 UInt64 unpackSize = 0; 639 bool unpackSizeDefined = false; 640 for (i = 0; i < 8; i++) 641 { 642 Byte b = header[5 + i]; 643 if (b != 0xFF) 644 unpackSizeDefined = true; 645 unpackSize |= (UInt64)b << (8 * i); 646 } 647 648 lzmaDecoder.markerIsMandatory = !unpackSizeDefined; 649 650 Print("\n"); 651 if (unpackSizeDefined) 652 PrintUInt64("Uncompressed Size", unpackSize); 653 else 654 Print("End marker is expected\n"); 655 lzmaDecoder.RangeDec.InStream = &inStream; 656 657 Print("\n"); 658 659 lzmaDecoder.Create(); 660 // we support the streams that have uncompressed size and marker. 661 int res = lzmaDecoder.Decode(unpackSizeDefined, unpackSize); 662 663 PrintUInt64("Read ", inStream.Processed); 664 PrintUInt64("Written ", lzmaDecoder.OutWindow.OutStream.Processed); 665 666 if (res == LZMA_RES_ERROR) 667 throw "LZMA decoding error"; 668 else if (res == LZMA_RES_FINISHED_WITHOUT_MARKER) 669 Print("Finished without end marker"); 670 else if (res == LZMA_RES_FINISHED_WITH_MARKER) 671 { 672 if (unpackSizeDefined) 673 { 674 if (lzmaDecoder.OutWindow.OutStream.Processed != unpackSize) 675 throw "Finished with end marker before than specified size"; 676 Print("Warning: "); 677 } 678 Print("Finished with end marker"); 679 } 680 else 681 throw "Internal Error"; 682 683 Print("\n"); 684 685 if (lzmaDecoder.RangeDec.Corrupted) 686 { 687 Print("\nWarning: LZMA stream is corrupted\n"); 688 } 689 690 return 0; 691 } 692 693 694 int 695 #ifdef _MSC_VER 696 __cdecl 697 #endif 698 main(int numArgs, const char *args[]) 699 { 700 try { return main2(numArgs, args); } 701 catch (const char *s) 702 { 703 PrintError("\nError:\n"); 704 PrintError(s); 705 PrintError("\n"); 706 return 1; 707 } 708 catch(...) 709 { 710 PrintError("\nError\n"); 711 return 1; 712 } 713 } 714