Home | History | Annotate | Download | only in LzmaSpec
      1 /* LzmaSpec.c -- LZMA Reference Decoder
      2 2013-07-28 : Igor Pavlov : Public domain */
      3 
      4 // This code implements LZMA file decoding according to LZMA specification.
      5 // This code is not optimized for speed.
      6 
      7 #include <stdio.h>
      8 
      9 #ifdef _MSC_VER
     10   #pragma warning(disable : 4710) // function not inlined
     11   #pragma warning(disable : 4996) // This function or variable may be unsafe
     12 #endif
     13 
     14 typedef unsigned char Byte;
     15 typedef unsigned short UInt16;
     16 
     17 #ifdef _LZMA_UINT32_IS_ULONG
     18   typedef unsigned long UInt32;
     19 #else
     20   typedef unsigned int UInt32;
     21 #endif
     22 
     23 #if defined(_MSC_VER) || defined(__BORLANDC__)
     24   typedef unsigned __int64 UInt64;
     25 #else
     26   typedef unsigned long long int UInt64;
     27 #endif
     28 
     29 
     30 struct CInputStream
     31 {
     32   FILE *File;
     33   UInt64 Processed;
     34 
     35   void Init() { Processed = 0; }
     36 
     37   Byte ReadByte()
     38   {
     39     int c = getc(File);
     40     if (c < 0)
     41       throw "Unexpected end of file";
     42     Processed++;
     43     return (Byte)c;
     44   }
     45 };
     46 
     47 
     48 struct COutStream
     49 {
     50   FILE *File;
     51   UInt64 Processed;
     52 
     53   void Init() { Processed = 0; }
     54 
     55   void WriteByte(Byte b)
     56   {
     57     if (putc(b, File) == EOF)
     58       throw "File writing error";
     59     Processed++;
     60   }
     61 };
     62 
     63 
     64 class COutWindow
     65 {
     66   Byte *Buf;
     67   UInt32 Pos;
     68   UInt32 Size;
     69   bool IsFull;
     70 
     71 public:
     72   unsigned TotalPos;
     73   COutStream OutStream;
     74 
     75   COutWindow(): Buf(NULL) {}
     76   ~COutWindow() { delete []Buf; }
     77 
     78   void Create(UInt32 dictSize)
     79   {
     80     Buf = new Byte[dictSize];
     81     Pos = 0;
     82     Size = dictSize;
     83     IsFull = false;
     84     TotalPos = 0;
     85   }
     86 
     87   void PutByte(Byte b)
     88   {
     89     TotalPos++;
     90     Buf[Pos++] = b;
     91     if (Pos == Size)
     92     {
     93       Pos = 0;
     94       IsFull = true;
     95     }
     96     OutStream.WriteByte(b);
     97   }
     98 
     99   Byte GetByte(UInt32 dist) const
    100   {
    101     return Buf[dist <= Pos ? Pos - dist : Size - dist + Pos];
    102   }
    103 
    104   void CopyMatch(UInt32 dist, unsigned len)
    105   {
    106     for (; len > 0; len--)
    107       PutByte(GetByte(dist));
    108   }
    109 
    110   bool CheckDistance(UInt32 dist) const
    111   {
    112     return dist <= Pos || IsFull;
    113   }
    114 
    115   bool IsEmpty() const
    116   {
    117     return Pos == 0 && !IsFull;
    118   }
    119 };
    120 
    121 
    122 #define kNumBitModelTotalBits 11
    123 #define kNumMoveBits 5
    124 
    125 typedef UInt16 CProb;
    126 
    127 #define PROB_INIT_VAL ((1 << kNumBitModelTotalBits) / 2)
    128 
    129 #define INIT_PROBS(p) \
    130  { for (unsigned i = 0; i < sizeof(p) / sizeof(p[0]); i++) p[i] = PROB_INIT_VAL; }
    131 
    132 class CRangeDecoder
    133 {
    134   UInt32 Range;
    135   UInt32 Code;
    136 
    137   void Normalize();
    138 
    139 public:
    140 
    141   CInputStream *InStream;
    142   bool Corrupted;
    143 
    144   void Init();
    145   bool IsFinishedOK() const { return Code == 0; }
    146 
    147   UInt32 DecodeDirectBits(unsigned numBits);
    148   unsigned DecodeBit(CProb *prob);
    149 };
    150 
    151 void CRangeDecoder::Init()
    152 {
    153   Corrupted = false;
    154 
    155   if (InStream->ReadByte() != 0)
    156     Corrupted = true;
    157 
    158   Range = 0xFFFFFFFF;
    159   Code = 0;
    160   for (int i = 0; i < 4; i++)
    161     Code = (Code << 8) | InStream->ReadByte();
    162 
    163   if (Code == Range)
    164     Corrupted = true;
    165 }
    166 
    167 #define kTopValue ((UInt32)1 << 24)
    168 
    169 void CRangeDecoder::Normalize()
    170 {
    171   if (Range < kTopValue)
    172   {
    173     Range <<= 8;
    174     Code = (Code << 8) | InStream->ReadByte();
    175   }
    176 }
    177 
    178 UInt32 CRangeDecoder::DecodeDirectBits(unsigned numBits)
    179 {
    180   UInt32 res = 0;
    181   do
    182   {
    183     Range >>= 1;
    184     Code -= Range;
    185     UInt32 t = 0 - ((UInt32)Code >> 31);
    186     Code += Range & t;
    187 
    188     if (Code == Range)
    189       Corrupted = true;
    190 
    191     Normalize();
    192     res <<= 1;
    193     res += t + 1;
    194   }
    195   while (--numBits);
    196   return res;
    197 }
    198 
    199 unsigned CRangeDecoder::DecodeBit(CProb *prob)
    200 {
    201   unsigned v = *prob;
    202   UInt32 bound = (Range >> kNumBitModelTotalBits) * v;
    203   unsigned symbol;
    204   if (Code < bound)
    205   {
    206     v += ((1 << kNumBitModelTotalBits) - v) >> kNumMoveBits;
    207     Range = bound;
    208     symbol = 0;
    209   }
    210   else
    211   {
    212     v -= v >> kNumMoveBits;
    213     Code -= bound;
    214     Range -= bound;
    215     symbol = 1;
    216   }
    217   *prob = (CProb)v;
    218   Normalize();
    219   return symbol;
    220 }
    221 
    222 
    223 unsigned BitTreeReverseDecode(CProb *probs, unsigned numBits, CRangeDecoder *rc)
    224 {
    225   unsigned m = 1;
    226   unsigned symbol = 0;
    227   for (unsigned i = 0; i < numBits; i++)
    228   {
    229     unsigned bit = rc->DecodeBit(&probs[m]);
    230     m <<= 1;
    231     m += bit;
    232     symbol |= (bit << i);
    233   }
    234   return symbol;
    235 }
    236 
    237 template <unsigned NumBits>
    238 class CBitTreeDecoder
    239 {
    240   CProb Probs[(unsigned)1 << NumBits];
    241 
    242 public:
    243 
    244   void Init()
    245   {
    246     INIT_PROBS(Probs);
    247   }
    248 
    249   unsigned Decode(CRangeDecoder *rc)
    250   {
    251     unsigned m = 1;
    252     for (unsigned i = 0; i < NumBits; i++)
    253       m = (m << 1) + rc->DecodeBit(&Probs[m]);
    254     return m - ((unsigned)1 << NumBits);
    255   }
    256 
    257   unsigned ReverseDecode(CRangeDecoder *rc)
    258   {
    259     return BitTreeReverseDecode(Probs, NumBits, rc);
    260   }
    261 };
    262 
    263 #define kNumPosBitsMax 4
    264 
    265 #define kNumStates 12
    266 #define kNumLenToPosStates 4
    267 #define kNumAlignBits 4
    268 #define kStartPosModelIndex 4
    269 #define kEndPosModelIndex 14
    270 #define kNumFullDistances (1 << (kEndPosModelIndex >> 1))
    271 #define kMatchMinLen 2
    272 
    273 class CLenDecoder
    274 {
    275   CProb Choice;
    276   CProb Choice2;
    277   CBitTreeDecoder<3> LowCoder[1 << kNumPosBitsMax];
    278   CBitTreeDecoder<3> MidCoder[1 << kNumPosBitsMax];
    279   CBitTreeDecoder<8> HighCoder;
    280 
    281 public:
    282 
    283   void Init()
    284   {
    285     Choice = PROB_INIT_VAL;
    286     Choice2 = PROB_INIT_VAL;
    287     HighCoder.Init();
    288     for (unsigned i = 0; i < (1 << kNumPosBitsMax); i++)
    289     {
    290       LowCoder[i].Init();
    291       MidCoder[i].Init();
    292     }
    293   }
    294 
    295   unsigned Decode(CRangeDecoder *rc, unsigned posState)
    296   {
    297     if (rc->DecodeBit(&Choice) == 0)
    298       return LowCoder[posState].Decode(rc);
    299     if (rc->DecodeBit(&Choice2) == 0)
    300       return 8 + MidCoder[posState].Decode(rc);
    301     return 16 + HighCoder.Decode(rc);
    302   }
    303 };
    304 
    305 unsigned UpdateState_Literal(unsigned state)
    306 {
    307   if (state < 4) return 0;
    308   else if (state < 10) return state - 3;
    309   else return state - 6;
    310 }
    311 unsigned UpdateState_Match   (unsigned state) { return state < 7 ? 7 : 10; }
    312 unsigned UpdateState_Rep     (unsigned state) { return state < 7 ? 8 : 11; }
    313 unsigned UpdateState_ShortRep(unsigned state) { return state < 7 ? 9 : 11; }
    314 
    315 #define LZMA_DIC_MIN (1 << 12)
    316 
    317 class CLzmaDecoder
    318 {
    319 public:
    320   CRangeDecoder RangeDec;
    321   COutWindow OutWindow;
    322 
    323   bool markerIsMandatory;
    324   unsigned lc, pb, lp;
    325   UInt32 dictSize;
    326   UInt32 dictSizeInProperties;
    327 
    328   void DecodeProperties(const Byte *properties)
    329   {
    330     unsigned d = properties[0];
    331     if (d >= (9 * 5 * 5))
    332       throw "Incorrect LZMA properties";
    333     lc = d % 9;
    334     d /= 9;
    335     pb = d / 5;
    336     lp = d % 5;
    337     dictSizeInProperties = 0;
    338     for (int i = 0; i < 4; i++)
    339       dictSizeInProperties |= (UInt32)properties[i + 1] << (8 * i);
    340     dictSize = dictSizeInProperties;
    341     if (dictSize < LZMA_DIC_MIN)
    342       dictSize = LZMA_DIC_MIN;
    343   }
    344 
    345   CLzmaDecoder(): LitProbs(NULL) {}
    346   ~CLzmaDecoder() { delete []LitProbs; }
    347 
    348   void Create()
    349   {
    350     OutWindow.Create(dictSize);
    351     CreateLiterals();
    352   }
    353 
    354   int Decode(bool unpackSizeDefined, UInt64 unpackSize);
    355 
    356 private:
    357 
    358   CProb *LitProbs;
    359 
    360   void CreateLiterals()
    361   {
    362     LitProbs = new CProb[(UInt32)0x300 << (lc + lp)];
    363   }
    364 
    365   void InitLiterals()
    366   {
    367     UInt32 num = (UInt32)0x300 << (lc + lp);
    368     for (UInt32 i = 0; i < num; i++)
    369       LitProbs[i] = PROB_INIT_VAL;
    370   }
    371 
    372   void DecodeLiteral(unsigned state, UInt32 rep0)
    373   {
    374     unsigned prevByte = 0;
    375     if (!OutWindow.IsEmpty())
    376       prevByte = OutWindow.GetByte(1);
    377 
    378     unsigned symbol = 1;
    379     unsigned litState = ((OutWindow.TotalPos & ((1 << lp) - 1)) << lc) + (prevByte >> (8 - lc));
    380     CProb *probs = &LitProbs[(UInt32)0x300 * litState];
    381 
    382     if (state >= 7)
    383     {
    384       unsigned matchByte = OutWindow.GetByte(rep0 + 1);
    385       do
    386       {
    387         unsigned matchBit = (matchByte >> 7) & 1;
    388         matchByte <<= 1;
    389         unsigned bit = RangeDec.DecodeBit(&probs[((1 + matchBit) << 8) + symbol]);
    390         symbol = (symbol << 1) | bit;
    391         if (matchBit != bit)
    392           break;
    393       }
    394       while (symbol < 0x100);
    395     }
    396     while (symbol < 0x100)
    397       symbol = (symbol << 1) | RangeDec.DecodeBit(&probs[symbol]);
    398     OutWindow.PutByte((Byte)(symbol - 0x100));
    399   }
    400 
    401   CBitTreeDecoder<6> PosSlotDecoder[kNumLenToPosStates];
    402   CBitTreeDecoder<kNumAlignBits> AlignDecoder;
    403   CProb PosDecoders[1 + kNumFullDistances - kEndPosModelIndex];
    404 
    405   void InitDist()
    406   {
    407     for (unsigned i = 0; i < kNumLenToPosStates; i++)
    408       PosSlotDecoder[i].Init();
    409     AlignDecoder.Init();
    410     INIT_PROBS(PosDecoders);
    411   }
    412 
    413   unsigned DecodeDistance(unsigned len)
    414   {
    415     unsigned lenState = len;
    416     if (lenState > kNumLenToPosStates - 1)
    417       lenState = kNumLenToPosStates - 1;
    418 
    419     unsigned posSlot = PosSlotDecoder[lenState].Decode(&RangeDec);
    420     if (posSlot < 4)
    421       return posSlot;
    422 
    423     unsigned numDirectBits = (unsigned)((posSlot >> 1) - 1);
    424     UInt32 dist = ((2 | (posSlot & 1)) << numDirectBits);
    425     if (posSlot < kEndPosModelIndex)
    426       dist += BitTreeReverseDecode(PosDecoders + dist - posSlot, numDirectBits, &RangeDec);
    427     else
    428     {
    429       dist += RangeDec.DecodeDirectBits(numDirectBits - kNumAlignBits) << kNumAlignBits;
    430       dist += AlignDecoder.ReverseDecode(&RangeDec);
    431     }
    432     return dist;
    433   }
    434 
    435   CProb IsMatch[kNumStates << kNumPosBitsMax];
    436   CProb IsRep[kNumStates];
    437   CProb IsRepG0[kNumStates];
    438   CProb IsRepG1[kNumStates];
    439   CProb IsRepG2[kNumStates];
    440   CProb IsRep0Long[kNumStates << kNumPosBitsMax];
    441 
    442   CLenDecoder LenDecoder;
    443   CLenDecoder RepLenDecoder;
    444 
    445   void Init()
    446   {
    447     InitLiterals();
    448     InitDist();
    449 
    450     INIT_PROBS(IsMatch);
    451     INIT_PROBS(IsRep);
    452     INIT_PROBS(IsRepG0);
    453     INIT_PROBS(IsRepG1);
    454     INIT_PROBS(IsRepG2);
    455     INIT_PROBS(IsRep0Long);
    456 
    457     LenDecoder.Init();
    458     RepLenDecoder.Init();
    459   }
    460 };
    461 
    462 
    463 #define LZMA_RES_ERROR                   0
    464 #define LZMA_RES_FINISHED_WITH_MARKER    1
    465 #define LZMA_RES_FINISHED_WITHOUT_MARKER 2
    466 
    467 int CLzmaDecoder::Decode(bool unpackSizeDefined, UInt64 unpackSize)
    468 {
    469   Init();
    470   RangeDec.Init();
    471 
    472   UInt32 rep0 = 0, rep1 = 0, rep2 = 0, rep3 = 0;
    473   unsigned state = 0;
    474 
    475   for (;;)
    476   {
    477     if (unpackSizeDefined && unpackSize == 0 && !markerIsMandatory)
    478       if (RangeDec.IsFinishedOK())
    479         return LZMA_RES_FINISHED_WITHOUT_MARKER;
    480 
    481     unsigned posState = OutWindow.TotalPos & ((1 << pb) - 1);
    482 
    483     if (RangeDec.DecodeBit(&IsMatch[(state << kNumPosBitsMax) + posState]) == 0)
    484     {
    485       if (unpackSizeDefined && unpackSize == 0)
    486         return LZMA_RES_ERROR;
    487       DecodeLiteral(state, rep0);
    488       state = UpdateState_Literal(state);
    489       unpackSize--;
    490       continue;
    491     }
    492 
    493     unsigned len;
    494 
    495     if (RangeDec.DecodeBit(&IsRep[state]) != 0)
    496     {
    497       if (unpackSizeDefined && unpackSize == 0)
    498         return LZMA_RES_ERROR;
    499       if (OutWindow.IsEmpty())
    500         return LZMA_RES_ERROR;
    501       if (RangeDec.DecodeBit(&IsRepG0[state]) == 0)
    502       {
    503         if (RangeDec.DecodeBit(&IsRep0Long[(state << kNumPosBitsMax) + posState]) == 0)
    504         {
    505           state = UpdateState_ShortRep(state);
    506           OutWindow.PutByte(OutWindow.GetByte(rep0 + 1));
    507           unpackSize--;
    508           continue;
    509         }
    510       }
    511       else
    512       {
    513         UInt32 dist;
    514         if (RangeDec.DecodeBit(&IsRepG1[state]) == 0)
    515           dist = rep1;
    516         else
    517         {
    518           if (RangeDec.DecodeBit(&IsRepG2[state]) == 0)
    519             dist = rep2;
    520           else
    521           {
    522             dist = rep3;
    523             rep3 = rep2;
    524           }
    525           rep2 = rep1;
    526         }
    527         rep1 = rep0;
    528         rep0 = dist;
    529       }
    530       len = RepLenDecoder.Decode(&RangeDec, posState);
    531       state = UpdateState_Rep(state);
    532     }
    533     else
    534     {
    535       rep3 = rep2;
    536       rep2 = rep1;
    537       rep1 = rep0;
    538       len = LenDecoder.Decode(&RangeDec, posState);
    539       state = UpdateState_Match(state);
    540       rep0 = DecodeDistance(len);
    541       if (rep0 == 0xFFFFFFFF)
    542         return RangeDec.IsFinishedOK() ?
    543             LZMA_RES_FINISHED_WITH_MARKER :
    544             LZMA_RES_ERROR;
    545 
    546       if (unpackSizeDefined && unpackSize == 0)
    547         return LZMA_RES_ERROR;
    548       if (rep0 >= dictSize || !OutWindow.CheckDistance(rep0))
    549         return LZMA_RES_ERROR;
    550     }
    551     len += kMatchMinLen;
    552     bool isError = false;
    553     if (unpackSizeDefined && unpackSize < len)
    554     {
    555       len = (unsigned)unpackSize;
    556       isError = true;
    557     }
    558     OutWindow.CopyMatch(rep0 + 1, len);
    559     unpackSize -= len;
    560     if (isError)
    561       return LZMA_RES_ERROR;
    562   }
    563 }
    564 
    565 static void Print(const char *s)
    566 {
    567   fputs(s, stdout);
    568 }
    569 
    570 static void PrintError(const char *s)
    571 {
    572   fputs(s, stderr);
    573 }
    574 
    575 
    576 #define CONVERT_INT_TO_STR(charType, tempSize) \
    577 
    578 void ConvertUInt64ToString(UInt64 val, char *s)
    579 {
    580   char temp[32];
    581   unsigned i = 0;
    582   while (val >= 10)
    583   {
    584     temp[i++] = (char)('0' + (unsigned)(val % 10));
    585     val /= 10;
    586   }
    587   *s++ = (char)('0' + (unsigned)val);
    588   while (i != 0)
    589   {
    590     i--;
    591     *s++ = temp[i];
    592   }
    593   *s = 0;
    594 }
    595 
    596 void PrintUInt64(const char *title, UInt64 v)
    597 {
    598   Print(title);
    599   Print(" : ");
    600   char s[32];
    601   ConvertUInt64ToString(v, s);
    602   Print(s);
    603   Print(" bytes \n");
    604 }
    605 
    606 int main2(int numArgs, const char *args[])
    607 {
    608   Print("\nLZMA Reference Decoder 9.31 : Igor Pavlov : Public domain : 2013-02-06\n");
    609   if (numArgs == 1)
    610     Print("\nUse: lzmaSpec a.lzma outFile");
    611 
    612   if (numArgs != 3)
    613     throw "you must specify two parameters";
    614 
    615   CInputStream inStream;
    616   inStream.File = fopen(args[1], "rb");
    617   inStream.Init();
    618   if (inStream.File == 0)
    619     throw "Can't open input file";
    620 
    621   CLzmaDecoder lzmaDecoder;
    622   lzmaDecoder.OutWindow.OutStream.File = fopen(args[2], "wb+");
    623   lzmaDecoder.OutWindow.OutStream.Init();
    624   if (inStream.File == 0)
    625     throw "Can't open output file";
    626 
    627   Byte header[13];
    628   int i;
    629   for (i = 0; i < 13; i++)
    630     header[i] = inStream.ReadByte();
    631 
    632   lzmaDecoder.DecodeProperties(header);
    633 
    634   printf("\nlc=%d, lp=%d, pb=%d", lzmaDecoder.lc, lzmaDecoder.lp, lzmaDecoder.pb);
    635   printf("\nDictionary Size in properties = %u", lzmaDecoder.dictSizeInProperties);
    636   printf("\nDictionary Size for decoding  = %u", lzmaDecoder.dictSize);
    637 
    638   UInt64 unpackSize = 0;
    639   bool unpackSizeDefined = false;
    640   for (i = 0; i < 8; i++)
    641   {
    642     Byte b = header[5 + i];
    643     if (b != 0xFF)
    644       unpackSizeDefined = true;
    645     unpackSize |= (UInt64)b << (8 * i);
    646   }
    647 
    648   lzmaDecoder.markerIsMandatory = !unpackSizeDefined;
    649 
    650   Print("\n");
    651   if (unpackSizeDefined)
    652     PrintUInt64("Uncompressed Size", unpackSize);
    653   else
    654     Print("End marker is expected\n");
    655   lzmaDecoder.RangeDec.InStream = &inStream;
    656 
    657   Print("\n");
    658 
    659   lzmaDecoder.Create();
    660   // we support the streams that have uncompressed size and marker.
    661   int res = lzmaDecoder.Decode(unpackSizeDefined, unpackSize);
    662 
    663   PrintUInt64("Read    ", inStream.Processed);
    664   PrintUInt64("Written ", lzmaDecoder.OutWindow.OutStream.Processed);
    665 
    666   if (res == LZMA_RES_ERROR)
    667     throw "LZMA decoding error";
    668   else if (res == LZMA_RES_FINISHED_WITHOUT_MARKER)
    669     Print("Finished without end marker");
    670   else if (res == LZMA_RES_FINISHED_WITH_MARKER)
    671   {
    672     if (unpackSizeDefined)
    673     {
    674       if (lzmaDecoder.OutWindow.OutStream.Processed != unpackSize)
    675         throw "Finished with end marker before than specified size";
    676       Print("Warning: ");
    677     }
    678     Print("Finished with end marker");
    679   }
    680   else
    681     throw "Internal Error";
    682 
    683   Print("\n");
    684 
    685   if (lzmaDecoder.RangeDec.Corrupted)
    686   {
    687     Print("\nWarning: LZMA stream is corrupted\n");
    688   }
    689 
    690   return 0;
    691 }
    692 
    693 
    694 int
    695   #ifdef _MSC_VER
    696     __cdecl
    697   #endif
    698 main(int numArgs, const char *args[])
    699 {
    700   try { return main2(numArgs, args); }
    701   catch (const char *s)
    702   {
    703     PrintError("\nError:\n");
    704     PrintError(s);
    705     PrintError("\n");
    706     return 1;
    707   }
    708   catch(...)
    709   {
    710     PrintError("\nError\n");
    711     return 1;
    712   }
    713 }
    714