Home | History | Annotate | Download | only in Common
      1 // Bench.cpp
      2 
      3 #include "StdAfx.h"
      4 
      5 #include <stdio.h>
      6 
      7 #ifndef _WIN32
      8 #define USE_POSIX_TIME
      9 #define USE_POSIX_TIME2
     10 #endif
     11 
     12 #ifdef USE_POSIX_TIME
     13 #include <time.h>
     14 #ifdef USE_POSIX_TIME2
     15 #include <sys/time.h>
     16 #endif
     17 #endif
     18 
     19 #ifdef _WIN32
     20 #define USE_ALLOCA
     21 #endif
     22 
     23 #ifdef USE_ALLOCA
     24 #ifdef _WIN32
     25 #include <malloc.h>
     26 #else
     27 #include <stdlib.h>
     28 #endif
     29 #endif
     30 
     31 #include "../../../../C/7zCrc.h"
     32 #include "../../../../C/Alloc.h"
     33 #include "../../../../C/CpuArch.h"
     34 
     35 #include "../../../Windows/System.h"
     36 
     37 #ifndef _7ZIP_ST
     38 #include "../../../Windows/Synchronization.h"
     39 #include "../../../Windows/Thread.h"
     40 #endif
     41 
     42 #if defined(_WIN32) || defined(UNIX_USE_WIN_FILE)
     43 #define USE_WIN_FILE
     44 #endif
     45 
     46 #ifdef USE_WIN_FILE
     47 #include "../../../Windows/FileIO.h"
     48 #endif
     49 
     50 
     51 #include "../../../Common/IntToString.h"
     52 #include "../../../Common/StringConvert.h"
     53 #include "../../../Common/StringToInt.h"
     54 
     55 #include "../../Common/MethodProps.h"
     56 #include "../../Common/StreamUtils.h"
     57 
     58 #include "Bench.h"
     59 
     60 using namespace NWindows;
     61 
     62 static const UInt32 k_LZMA = 0x030101;
     63 
     64 static const UInt64 kComplexInCommands = (UInt64)1 <<
     65   #ifdef UNDER_CE
     66     31;
     67   #else
     68     34;
     69   #endif
     70 
     71 static const UInt32 kComplexInSeconds = 4;
     72 
     73 static void SetComplexCommands(UInt32 complexInSeconds,
     74     bool isSpecifiedFreq, UInt64 cpuFreq, UInt64 &complexInCommands)
     75 {
     76   complexInCommands = kComplexInCommands;
     77   const UInt64 kMinFreq = (UInt64)1000000 * 4;
     78   const UInt64 kMaxFreq = (UInt64)1000000 * 20000;
     79   if (cpuFreq < kMinFreq && !isSpecifiedFreq)
     80     cpuFreq = kMinFreq;
     81   if (cpuFreq < kMaxFreq || isSpecifiedFreq)
     82   {
     83     if (complexInSeconds != 0)
     84       complexInCommands = complexInSeconds * cpuFreq;
     85     else
     86       complexInCommands = cpuFreq >> 2;
     87   }
     88 }
     89 
     90 static const unsigned kNumHashDictBits = 17;
     91 static const UInt32 kFilterUnpackSize = (48 << 10);
     92 
     93 static const unsigned kOldLzmaDictBits = 30;
     94 
     95 static const UInt32 kAdditionalSize = (1 << 16);
     96 static const UInt32 kCompressedAdditionalSize = (1 << 10);
     97 static const UInt32 kMaxLzmaPropSize = 5;
     98 
     99 class CBaseRandomGenerator
    100 {
    101   UInt32 A1;
    102   UInt32 A2;
    103 public:
    104   CBaseRandomGenerator() { Init(); }
    105   void Init() { A1 = 362436069; A2 = 521288629;}
    106   UInt32 GetRnd()
    107   {
    108     return
    109       ((A1 = 36969 * (A1 & 0xffff) + (A1 >> 16)) << 16) +
    110       ((A2 = 18000 * (A2 & 0xffff) + (A2 >> 16)) );
    111   }
    112 };
    113 
    114 
    115 static const unsigned kBufferAlignment = 1 << 4;
    116 
    117 struct CBenchBuffer
    118 {
    119   size_t BufferSize;
    120 
    121   #ifdef _WIN32
    122 
    123   Byte *Buffer;
    124 
    125   CBenchBuffer(): BufferSize(0), Buffer(NULL) {}
    126   ~CBenchBuffer() { ::MidFree(Buffer); }
    127 
    128   void AllocAlignedMask(size_t size, size_t)
    129   {
    130     ::MidFree(Buffer);
    131     BufferSize = 0;
    132     Buffer = (Byte *)::MidAlloc(size);
    133     if (Buffer)
    134       BufferSize = size;
    135   }
    136 
    137   #else
    138 
    139   Byte *Buffer;
    140   Byte *_bufBase;
    141 
    142   CBenchBuffer(): BufferSize(0), Buffer(NULL), _bufBase(NULL){}
    143   ~CBenchBuffer() { ::MidFree(_bufBase); }
    144 
    145   void AllocAlignedMask(size_t size, size_t alignMask)
    146   {
    147     ::MidFree(_bufBase);
    148     Buffer = NULL;
    149     BufferSize = 0;
    150     _bufBase = (Byte *)::MidAlloc(size + alignMask);
    151 
    152     if (_bufBase)
    153     {
    154       // Buffer = (Byte *)(((uintptr_t)_bufBase + alignMask) & ~(uintptr_t)alignMask);
    155          Buffer = (Byte *)(((ptrdiff_t)_bufBase + alignMask) & ~(ptrdiff_t)alignMask);
    156       BufferSize = size;
    157     }
    158   }
    159 
    160   #endif
    161 
    162   bool Alloc(size_t size)
    163   {
    164     if (Buffer && BufferSize == size)
    165       return true;
    166     AllocAlignedMask(size, kBufferAlignment - 1);
    167     return (Buffer != NULL || size == 0);
    168   }
    169 };
    170 
    171 
    172 class CBenchRandomGenerator: public CBenchBuffer
    173 {
    174   static UInt32 GetVal(UInt32 &res, unsigned numBits)
    175   {
    176     UInt32 val = res & (((UInt32)1 << numBits) - 1);
    177     res >>= numBits;
    178     return val;
    179   }
    180 
    181   static UInt32 GetLen(UInt32 &r)
    182   {
    183     UInt32 len = GetVal(r, 2);
    184     return GetVal(r, 1 + len);
    185   }
    186 
    187 public:
    188 
    189   void GenerateSimpleRandom(CBaseRandomGenerator *_RG_)
    190   {
    191     CBaseRandomGenerator rg = *_RG_;
    192     const size_t bufSize = BufferSize;
    193     Byte *buf = Buffer;
    194     for (size_t i = 0; i < bufSize; i++)
    195       buf[i] = (Byte)rg.GetRnd();
    196     *_RG_ = rg;
    197   }
    198 
    199   void GenerateLz(unsigned dictBits, CBaseRandomGenerator *_RG_)
    200   {
    201     CBaseRandomGenerator rg = *_RG_;
    202     UInt32 pos = 0;
    203     UInt32 rep0 = 1;
    204     const size_t bufSize = BufferSize;
    205     Byte *buf = Buffer;
    206     unsigned posBits = 1;
    207 
    208     while (pos < bufSize)
    209     {
    210       UInt32 r = rg.GetRnd();
    211       if (GetVal(r, 1) == 0 || pos < 1024)
    212         buf[pos++] = (Byte)(r & 0xFF);
    213       else
    214       {
    215         UInt32 len;
    216         len = 1 + GetLen(r);
    217 
    218         if (GetVal(r, 3) != 0)
    219         {
    220           len += GetLen(r);
    221 
    222           while (((UInt32)1 << posBits) < pos)
    223             posBits++;
    224 
    225           unsigned numBitsMax = dictBits;
    226           if (numBitsMax > posBits)
    227             numBitsMax = posBits;
    228 
    229           const unsigned kAddBits = 6;
    230           unsigned numLogBits = 5;
    231           if (numBitsMax <= (1 << 4) - 1 + kAddBits)
    232             numLogBits = 4;
    233 
    234           for (;;)
    235           {
    236             UInt32 ppp = GetVal(r, numLogBits) + kAddBits;
    237             r = rg.GetRnd();
    238             if (ppp > numBitsMax)
    239               continue;
    240             rep0 = GetVal(r, ppp);
    241             if (rep0 < pos)
    242               break;
    243             r = rg.GetRnd();
    244           }
    245           rep0++;
    246         }
    247 
    248         {
    249           UInt32 rem = (UInt32)bufSize - pos;
    250           if (len > rem)
    251             len = rem;
    252         }
    253         Byte *dest = buf + pos;
    254         const Byte *src = dest - rep0;
    255         pos += len;
    256         for (UInt32 i = 0; i < len; i++)
    257           *dest++ = *src++;
    258       }
    259     }
    260 
    261     *_RG_ = rg;
    262   }
    263 };
    264 
    265 
    266 class CBenchmarkInStream:
    267   public ISequentialInStream,
    268   public CMyUnknownImp
    269 {
    270   const Byte *Data;
    271   size_t Pos;
    272   size_t Size;
    273 public:
    274   MY_UNKNOWN_IMP
    275   void Init(const Byte *data, size_t size)
    276   {
    277     Data = data;
    278     Size = size;
    279     Pos = 0;
    280   }
    281   STDMETHOD(Read)(void *data, UInt32 size, UInt32 *processedSize);
    282 };
    283 
    284 STDMETHODIMP CBenchmarkInStream::Read(void *data, UInt32 size, UInt32 *processedSize)
    285 {
    286   size_t remain = Size - Pos;
    287   UInt32 kMaxBlockSize = (1 << 20);
    288   if (size > kMaxBlockSize)
    289     size = kMaxBlockSize;
    290   if (size > remain)
    291     size = (UInt32)remain;
    292   for (UInt32 i = 0; i < size; i++)
    293     ((Byte *)data)[i] = Data[Pos + i];
    294   Pos += size;
    295   if (processedSize)
    296     *processedSize = size;
    297   return S_OK;
    298 }
    299 
    300 class CBenchmarkOutStream:
    301   public ISequentialOutStream,
    302   public CBenchBuffer,
    303   public CMyUnknownImp
    304 {
    305   // bool _overflow;
    306 public:
    307   size_t Pos;
    308   bool RealCopy;
    309   bool CalcCrc;
    310   UInt32 Crc;
    311 
    312   // CBenchmarkOutStream(): _overflow(false) {}
    313   void Init(bool realCopy, bool calcCrc)
    314   {
    315     Crc = CRC_INIT_VAL;
    316     RealCopy = realCopy;
    317     CalcCrc = calcCrc;
    318     // _overflow = false;
    319     Pos = 0;
    320   }
    321 
    322   // void Print() { printf("\n%8d %8d\n", (unsigned)BufferSize, (unsigned)Pos); }
    323 
    324   MY_UNKNOWN_IMP
    325   STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
    326 };
    327 
    328 STDMETHODIMP CBenchmarkOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
    329 {
    330   size_t curSize = BufferSize - Pos;
    331   if (curSize > size)
    332     curSize = size;
    333   if (curSize != 0)
    334   {
    335     if (RealCopy)
    336       memcpy(Buffer + Pos, data, curSize);
    337     if (CalcCrc)
    338       Crc = CrcUpdate(Crc, data, curSize);
    339     Pos += curSize;
    340   }
    341   if (processedSize)
    342     *processedSize = (UInt32)curSize;
    343   if (curSize != size)
    344   {
    345     // _overflow = true;
    346     return E_FAIL;
    347   }
    348   return S_OK;
    349 }
    350 
    351 class CCrcOutStream:
    352   public ISequentialOutStream,
    353   public CMyUnknownImp
    354 {
    355 public:
    356   bool CalcCrc;
    357   UInt32 Crc;
    358   MY_UNKNOWN_IMP
    359 
    360   CCrcOutStream(): CalcCrc(true) {};
    361   void Init() { Crc = CRC_INIT_VAL; }
    362   STDMETHOD(Write)(const void *data, UInt32 size, UInt32 *processedSize);
    363 };
    364 
    365 STDMETHODIMP CCrcOutStream::Write(const void *data, UInt32 size, UInt32 *processedSize)
    366 {
    367   if (CalcCrc)
    368     Crc = CrcUpdate(Crc, data, size);
    369   if (processedSize)
    370     *processedSize = size;
    371   return S_OK;
    372 }
    373 
    374 static UInt64 GetTimeCount()
    375 {
    376   #ifdef USE_POSIX_TIME
    377   #ifdef USE_POSIX_TIME2
    378   timeval v;
    379   if (gettimeofday(&v, 0) == 0)
    380     return (UInt64)(v.tv_sec) * 1000000 + v.tv_usec;
    381   return (UInt64)time(NULL) * 1000000;
    382   #else
    383   return time(NULL);
    384   #endif
    385   #else
    386   /*
    387   LARGE_INTEGER value;
    388   if (::QueryPerformanceCounter(&value))
    389     return value.QuadPart;
    390   */
    391   return GetTickCount();
    392   #endif
    393 }
    394 
    395 static UInt64 GetFreq()
    396 {
    397   #ifdef USE_POSIX_TIME
    398   #ifdef USE_POSIX_TIME2
    399   return 1000000;
    400   #else
    401   return 1;
    402   #endif
    403   #else
    404   /*
    405   LARGE_INTEGER value;
    406   if (::QueryPerformanceFrequency(&value))
    407     return value.QuadPart;
    408   */
    409   return 1000;
    410   #endif
    411 }
    412 
    413 #ifdef USE_POSIX_TIME
    414 
    415 struct CUserTime
    416 {
    417   UInt64 Sum;
    418   clock_t Prev;
    419 
    420   void Init()
    421   {
    422     Prev = clock();
    423     Sum = 0;
    424   }
    425 
    426   UInt64 GetUserTime()
    427   {
    428     clock_t v = clock();
    429     Sum += v - Prev;
    430     Prev = v;
    431     return Sum;
    432   }
    433 };
    434 
    435 #else
    436 
    437 static inline UInt64 GetTime64(const FILETIME &t) { return ((UInt64)t.dwHighDateTime << 32) | t.dwLowDateTime; }
    438 UInt64 GetWinUserTime()
    439 {
    440   FILETIME creationTime, exitTime, kernelTime, userTime;
    441   if (
    442   #ifdef UNDER_CE
    443     ::GetThreadTimes(::GetCurrentThread()
    444   #else
    445     ::GetProcessTimes(::GetCurrentProcess()
    446   #endif
    447     , &creationTime, &exitTime, &kernelTime, &userTime) != 0)
    448     return GetTime64(userTime) + GetTime64(kernelTime);
    449   return (UInt64)GetTickCount() * 10000;
    450 }
    451 
    452 struct CUserTime
    453 {
    454   UInt64 StartTime;
    455 
    456   void Init() { StartTime = GetWinUserTime(); }
    457   UInt64 GetUserTime() { return GetWinUserTime() - StartTime; }
    458 };
    459 
    460 #endif
    461 
    462 static UInt64 GetUserFreq()
    463 {
    464   #ifdef USE_POSIX_TIME
    465   return CLOCKS_PER_SEC;
    466   #else
    467   return 10000000;
    468   #endif
    469 }
    470 
    471 class CBenchProgressStatus
    472 {
    473   #ifndef _7ZIP_ST
    474   NSynchronization::CCriticalSection CS;
    475   #endif
    476 public:
    477   HRESULT Res;
    478   bool EncodeMode;
    479   void SetResult(HRESULT res)
    480   {
    481     #ifndef _7ZIP_ST
    482     NSynchronization::CCriticalSectionLock lock(CS);
    483     #endif
    484     Res = res;
    485   }
    486   HRESULT GetResult()
    487   {
    488     #ifndef _7ZIP_ST
    489     NSynchronization::CCriticalSectionLock lock(CS);
    490     #endif
    491     return Res;
    492   }
    493 };
    494 
    495 struct CBenchInfoCalc
    496 {
    497   CBenchInfo BenchInfo;
    498   CUserTime UserTime;
    499 
    500   void SetStartTime();
    501   void SetFinishTime(CBenchInfo &dest);
    502 };
    503 
    504 void CBenchInfoCalc::SetStartTime()
    505 {
    506   BenchInfo.GlobalFreq = GetFreq();
    507   BenchInfo.UserFreq = GetUserFreq();
    508   BenchInfo.GlobalTime = ::GetTimeCount();
    509   BenchInfo.UserTime = 0;
    510   UserTime.Init();
    511 }
    512 
    513 void CBenchInfoCalc::SetFinishTime(CBenchInfo &dest)
    514 {
    515   dest = BenchInfo;
    516   dest.GlobalTime = ::GetTimeCount() - BenchInfo.GlobalTime;
    517   dest.UserTime = UserTime.GetUserTime();
    518 }
    519 
    520 class CBenchProgressInfo:
    521   public ICompressProgressInfo,
    522   public CMyUnknownImp,
    523   public CBenchInfoCalc
    524 {
    525 public:
    526   CBenchProgressStatus *Status;
    527   HRESULT Res;
    528   IBenchCallback *Callback;
    529 
    530   CBenchProgressInfo(): Callback(0) {}
    531   MY_UNKNOWN_IMP
    532   STDMETHOD(SetRatioInfo)(const UInt64 *inSize, const UInt64 *outSize);
    533 };
    534 
    535 STDMETHODIMP CBenchProgressInfo::SetRatioInfo(const UInt64 *inSize, const UInt64 *outSize)
    536 {
    537   HRESULT res = Status->GetResult();
    538   if (res != S_OK)
    539     return res;
    540   if (!Callback)
    541     return res;
    542   CBenchInfo info;
    543   SetFinishTime(info);
    544   if (Status->EncodeMode)
    545   {
    546     info.UnpackSize = BenchInfo.UnpackSize + *inSize;
    547     info.PackSize = BenchInfo.PackSize + *outSize;
    548     res = Callback->SetEncodeResult(info, false);
    549   }
    550   else
    551   {
    552     info.PackSize = BenchInfo.PackSize + *inSize;
    553     info.UnpackSize = BenchInfo.UnpackSize + *outSize;
    554     res = Callback->SetDecodeResult(info, false);
    555   }
    556   if (res != S_OK)
    557     Status->SetResult(res);
    558   return res;
    559 }
    560 
    561 static const unsigned kSubBits = 8;
    562 
    563 static UInt32 GetLogSize(UInt32 size)
    564 {
    565   for (unsigned i = kSubBits; i < 32; i++)
    566     for (UInt32 j = 0; j < (1 << kSubBits); j++)
    567       if (size <= (((UInt32)1) << i) + (j << (i - kSubBits)))
    568         return (i << kSubBits) + j;
    569   return (32 << kSubBits);
    570 }
    571 
    572 static void NormalizeVals(UInt64 &v1, UInt64 &v2)
    573 {
    574   while (v1 > 1000000)
    575   {
    576     v1 >>= 1;
    577     v2 >>= 1;
    578   }
    579 }
    580 
    581 UInt64 CBenchInfo::GetUsage() const
    582 {
    583   UInt64 userTime = UserTime;
    584   UInt64 userFreq = UserFreq;
    585   UInt64 globalTime = GlobalTime;
    586   UInt64 globalFreq = GlobalFreq;
    587   NormalizeVals(userTime, userFreq);
    588   NormalizeVals(globalFreq, globalTime);
    589   if (userFreq == 0)
    590     userFreq = 1;
    591   if (globalTime == 0)
    592     globalTime = 1;
    593   return userTime * globalFreq * 1000000 / userFreq / globalTime;
    594 }
    595 
    596 UInt64 CBenchInfo::GetRatingPerUsage(UInt64 rating) const
    597 {
    598   UInt64 userTime = UserTime;
    599   UInt64 userFreq = UserFreq;
    600   UInt64 globalTime = GlobalTime;
    601   UInt64 globalFreq = GlobalFreq;
    602   NormalizeVals(userFreq, userTime);
    603   NormalizeVals(globalTime, globalFreq);
    604   if (globalFreq == 0)
    605     globalFreq = 1;
    606   if (userTime == 0)
    607     userTime = 1;
    608   return userFreq * globalTime / globalFreq * rating / userTime;
    609 }
    610 
    611 static UInt64 MyMultDiv64(UInt64 value, UInt64 elapsedTime, UInt64 freq)
    612 {
    613   UInt64 elTime = elapsedTime;
    614   NormalizeVals(freq, elTime);
    615   if (elTime == 0)
    616     elTime = 1;
    617   return value * freq / elTime;
    618 }
    619 
    620 UInt64 CBenchInfo::GetSpeed(UInt64 numCommands) const
    621 {
    622   return MyMultDiv64(numCommands, GlobalTime, GlobalFreq);
    623 }
    624 
    625 struct CBenchProps
    626 {
    627   bool LzmaRatingMode;
    628 
    629   UInt32 EncComplex;
    630   UInt32 DecComplexCompr;
    631   UInt32 DecComplexUnc;
    632 
    633   CBenchProps(): LzmaRatingMode(false) {}
    634   void SetLzmaCompexity();
    635 
    636   UInt64 GeComprCommands(UInt64 unpackSize)
    637   {
    638     return unpackSize * EncComplex;
    639   }
    640 
    641   UInt64 GeDecomprCommands(UInt64 packSize, UInt64 unpackSize)
    642   {
    643     return (packSize * DecComplexCompr + unpackSize * DecComplexUnc);
    644   }
    645 
    646   UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size);
    647   UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations);
    648 };
    649 
    650 void CBenchProps::SetLzmaCompexity()
    651 {
    652   EncComplex = 1200;
    653   DecComplexUnc = 4;
    654   DecComplexCompr = 190;
    655   LzmaRatingMode = true;
    656 }
    657 
    658 UInt64 CBenchProps::GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
    659 {
    660   if (dictSize < (1 << kBenchMinDicLogSize))
    661     dictSize = (1 << kBenchMinDicLogSize);
    662   UInt64 encComplex = EncComplex;
    663   if (LzmaRatingMode)
    664   {
    665     UInt64 t = GetLogSize(dictSize) - (kBenchMinDicLogSize << kSubBits);
    666     encComplex = 870 + ((t * t * 5) >> (2 * kSubBits));
    667   }
    668   UInt64 numCommands = (UInt64)size * encComplex;
    669   return MyMultDiv64(numCommands, elapsedTime, freq);
    670 }
    671 
    672 UInt64 CBenchProps::GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
    673 {
    674   UInt64 numCommands = (inSize * DecComplexCompr + outSize * DecComplexUnc) * numIterations;
    675   return MyMultDiv64(numCommands, elapsedTime, freq);
    676 }
    677 
    678 UInt64 GetCompressRating(UInt32 dictSize, UInt64 elapsedTime, UInt64 freq, UInt64 size)
    679 {
    680   CBenchProps props;
    681   props.SetLzmaCompexity();
    682   return props.GetCompressRating(dictSize, elapsedTime, freq, size);
    683 }
    684 
    685 UInt64 GetDecompressRating(UInt64 elapsedTime, UInt64 freq, UInt64 outSize, UInt64 inSize, UInt64 numIterations)
    686 {
    687   CBenchProps props;
    688   props.SetLzmaCompexity();
    689   return props.GetDecompressRating(elapsedTime, freq, outSize, inSize, numIterations);
    690 }
    691 
    692 struct CEncoderInfo;
    693 
    694 struct CEncoderInfo
    695 {
    696   #ifndef _7ZIP_ST
    697   NWindows::CThread thread[2];
    698   UInt32 NumDecoderSubThreads;
    699   #endif
    700   CMyComPtr<ICompressCoder> _encoder;
    701   CMyComPtr<ICompressFilter> _encoderFilter;
    702   CBenchProgressInfo *progressInfoSpec[2];
    703   CMyComPtr<ICompressProgressInfo> progressInfo[2];
    704   UInt64 NumIterations;
    705 
    706   #ifdef USE_ALLOCA
    707   size_t AllocaSize;
    708   #endif
    709 
    710   Byte _key[32];
    711   Byte _iv[16];
    712   Byte _psw[16];
    713   bool CheckCrc_Enc;
    714   bool CheckCrc_Dec;
    715 
    716   struct CDecoderInfo
    717   {
    718     CEncoderInfo *Encoder;
    719     UInt32 DecoderIndex;
    720     bool CallbackMode;
    721 
    722     #ifdef USE_ALLOCA
    723     size_t AllocaSize;
    724     #endif
    725   };
    726   CDecoderInfo decodersInfo[2];
    727 
    728   CMyComPtr<ICompressCoder> _decoders[2];
    729   CMyComPtr<ICompressFilter> _decoderFilter;
    730 
    731   HRESULT Results[2];
    732   CBenchmarkOutStream *outStreamSpec;
    733   CMyComPtr<ISequentialOutStream> outStream;
    734   IBenchCallback *callback;
    735   IBenchPrintCallback *printCallback;
    736   UInt32 crc;
    737   size_t kBufferSize;
    738   size_t compressedSize;
    739   const Byte *uncompressedDataPtr;
    740 
    741   const Byte *fileData;
    742   CBenchRandomGenerator rg;
    743 
    744   CBenchBuffer rgCopy; // it must be 16-byte aligned !!!
    745   CBenchmarkOutStream *propStreamSpec;
    746   CMyComPtr<ISequentialOutStream> propStream;
    747 
    748   // for decode
    749   COneMethodInfo _method;
    750   size_t _uncompressedDataSize;
    751 
    752   HRESULT Init(
    753       const COneMethodInfo &method,
    754       unsigned generateDictBits,
    755       CBaseRandomGenerator *rg);
    756   HRESULT Encode();
    757   HRESULT Decode(UInt32 decoderIndex);
    758 
    759   CEncoderInfo():
    760     fileData(NULL),
    761     CheckCrc_Enc(true),
    762     CheckCrc_Dec(true),
    763     outStreamSpec(0), callback(0), printCallback(0), propStreamSpec(0) {}
    764 
    765   #ifndef _7ZIP_ST
    766 
    767   static THREAD_FUNC_DECL EncodeThreadFunction(void *param)
    768   {
    769     HRESULT res;
    770     CEncoderInfo *encoder = (CEncoderInfo *)param;
    771     try
    772     {
    773       #ifdef USE_ALLOCA
    774       alloca(encoder->AllocaSize);
    775       #endif
    776 
    777       res = encoder->Encode();
    778       encoder->Results[0] = res;
    779     }
    780     catch(...)
    781     {
    782       res = E_FAIL;
    783     }
    784     if (res != S_OK)
    785       encoder->progressInfoSpec[0]->Status->SetResult(res);
    786     return 0;
    787   }
    788 
    789   static THREAD_FUNC_DECL DecodeThreadFunction(void *param)
    790   {
    791     CDecoderInfo *decoder = (CDecoderInfo *)param;
    792 
    793     #ifdef USE_ALLOCA
    794     alloca(decoder->AllocaSize);
    795     #endif
    796 
    797     CEncoderInfo *encoder = decoder->Encoder;
    798     encoder->Results[decoder->DecoderIndex] = encoder->Decode(decoder->DecoderIndex);
    799     return 0;
    800   }
    801 
    802   HRESULT CreateEncoderThread()
    803   {
    804     return thread[0].Create(EncodeThreadFunction, this);
    805   }
    806 
    807   HRESULT CreateDecoderThread(unsigned index, bool callbackMode
    808       #ifdef USE_ALLOCA
    809       , size_t allocaSize
    810       #endif
    811       )
    812   {
    813     CDecoderInfo &decoder = decodersInfo[index];
    814     decoder.DecoderIndex = index;
    815     decoder.Encoder = this;
    816 
    817     #ifdef USE_ALLOCA
    818     decoder.AllocaSize = allocaSize;
    819     #endif
    820 
    821     decoder.CallbackMode = callbackMode;
    822     return thread[index].Create(DecodeThreadFunction, &decoder);
    823   }
    824 
    825   #endif
    826 };
    827 
    828 
    829 HRESULT CEncoderInfo::Init(
    830     const COneMethodInfo &method,
    831     unsigned generateDictBits,
    832     CBaseRandomGenerator *rgLoc)
    833 {
    834   // we need extra space, if input data is already compressed
    835   const size_t kCompressedBufferSize =
    836       kCompressedAdditionalSize +
    837       kBufferSize + kBufferSize / 16;
    838       // kBufferSize / 2;
    839 
    840   if (kCompressedBufferSize < kBufferSize)
    841     return E_FAIL;
    842 
    843   uncompressedDataPtr = fileData;
    844 
    845   if (!fileData)
    846   {
    847     if (!rg.Alloc(kBufferSize))
    848       return E_OUTOFMEMORY;
    849 
    850     // DWORD ttt = GetTickCount();
    851     if (generateDictBits == 0)
    852       rg.GenerateSimpleRandom(rgLoc);
    853     else
    854       rg.GenerateLz(generateDictBits, rgLoc);
    855     // printf("\n%d\n            ", GetTickCount() - ttt);
    856 
    857     crc = CrcCalc(rg.Buffer, rg.BufferSize);
    858     uncompressedDataPtr = rg.Buffer;
    859   }
    860 
    861   if (_encoderFilter)
    862   {
    863     if (!rgCopy.Alloc(kBufferSize))
    864       return E_OUTOFMEMORY;
    865   }
    866 
    867 
    868   outStreamSpec = new CBenchmarkOutStream;
    869   outStream = outStreamSpec;
    870   if (!outStreamSpec->Alloc(kCompressedBufferSize))
    871     return E_OUTOFMEMORY;
    872 
    873   propStreamSpec = 0;
    874   if (!propStream)
    875   {
    876     propStreamSpec = new CBenchmarkOutStream;
    877     propStream = propStreamSpec;
    878   }
    879   if (!propStreamSpec->Alloc(kMaxLzmaPropSize))
    880     return E_OUTOFMEMORY;
    881   propStreamSpec->Init(true, false);
    882 
    883 
    884   CMyComPtr<IUnknown> coder;
    885   if (_encoderFilter)
    886     coder = _encoderFilter;
    887   else
    888     coder = _encoder;
    889   {
    890     CMyComPtr<ICompressSetCoderProperties> scp;
    891     coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
    892     if (scp)
    893     {
    894       UInt64 reduceSize = kBufferSize;
    895       RINOK(method.SetCoderProps(scp, &reduceSize));
    896     }
    897     else
    898     {
    899       if (method.AreThereNonOptionalProps())
    900         return E_INVALIDARG;
    901     }
    902 
    903     CMyComPtr<ICompressWriteCoderProperties> writeCoderProps;
    904     coder.QueryInterface(IID_ICompressWriteCoderProperties, &writeCoderProps);
    905     if (writeCoderProps)
    906     {
    907       RINOK(writeCoderProps->WriteCoderProperties(propStream));
    908     }
    909 
    910     {
    911       CMyComPtr<ICryptoSetPassword> sp;
    912       coder.QueryInterface(IID_ICryptoSetPassword, &sp);
    913       if (sp)
    914       {
    915         RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)));
    916 
    917         // we must call encoding one time to calculate password key for key cache.
    918         // it must be after WriteCoderProperties!
    919         Byte temp[16];
    920         memset(temp, 0, sizeof(temp));
    921 
    922         if (_encoderFilter)
    923         {
    924           _encoderFilter->Init();
    925           _encoderFilter->Filter(temp, sizeof(temp));
    926         }
    927         else
    928         {
    929           CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
    930           CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
    931           inStreamSpec->Init(temp, sizeof(temp));
    932 
    933           CCrcOutStream *crcStreamSpec = new CCrcOutStream;
    934           CMyComPtr<ISequentialOutStream> crcStream = crcStreamSpec;
    935           crcStreamSpec->Init();
    936 
    937           RINOK(_encoder->Code(inStream, crcStream, 0, 0, NULL));
    938         }
    939       }
    940     }
    941   }
    942 
    943   return S_OK;
    944 }
    945 
    946 
    947 static void My_FilterBench(ICompressFilter *filter, Byte *data, size_t size)
    948 {
    949   while (size != 0)
    950   {
    951     UInt32 cur = (UInt32)1 << 31;
    952     if (cur > size)
    953       cur = (UInt32)size;
    954     UInt32 processed = filter->Filter(data, cur);
    955     data += processed;
    956     // if (processed > size) (in AES filter), we must fill last block with zeros.
    957     // but it is not important for benchmark. So we just copy that data without filtering.
    958     if (processed > size || processed == 0)
    959       break;
    960     size -= processed;
    961   }
    962 }
    963 
    964 
    965 HRESULT CEncoderInfo::Encode()
    966 {
    967   CBenchInfo &bi = progressInfoSpec[0]->BenchInfo;
    968   bi.UnpackSize = 0;
    969   bi.PackSize = 0;
    970   CMyComPtr<ICryptoProperties> cp;
    971   CMyComPtr<IUnknown> coder;
    972   if (_encoderFilter)
    973     coder = _encoderFilter;
    974   else
    975     coder = _encoder;
    976   coder.QueryInterface(IID_ICryptoProperties, &cp);
    977   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
    978   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
    979   UInt64 prev = 0;
    980 
    981   UInt32 crcPrev = 0;
    982 
    983   if (cp)
    984   {
    985     RINOK(cp->SetKey(_key, sizeof(_key)));
    986     RINOK(cp->SetInitVector(_iv, sizeof(_iv)));
    987   }
    988 
    989   for (UInt64 i = 0; i < NumIterations; i++)
    990   {
    991     if (printCallback && bi.UnpackSize - prev > (1 << 20))
    992     {
    993       RINOK(printCallback->CheckBreak());
    994       prev = bi.UnpackSize;
    995     }
    996 
    997     bool isLast = (i == NumIterations - 1);
    998     bool calcCrc = ((isLast || (i & 0x7F) == 0 || CheckCrc_Enc) && NumIterations != 1);
    999     outStreamSpec->Init(isLast, calcCrc);
   1000 
   1001     if (_encoderFilter)
   1002     {
   1003       memcpy(rgCopy.Buffer, uncompressedDataPtr, kBufferSize);
   1004       _encoderFilter->Init();
   1005       My_FilterBench(_encoderFilter, rgCopy.Buffer, kBufferSize);
   1006       RINOK(WriteStream(outStream, rgCopy.Buffer, kBufferSize));
   1007     }
   1008     else
   1009     {
   1010       inStreamSpec->Init(uncompressedDataPtr, kBufferSize);
   1011       RINOK(_encoder->Code(inStream, outStream, NULL, NULL, progressInfo[0]));
   1012     }
   1013 
   1014     // outStreamSpec->Print();
   1015 
   1016     UInt32 crcNew = CRC_GET_DIGEST(outStreamSpec->Crc);
   1017     if (i == 0)
   1018       crcPrev = crcNew;
   1019     else if (calcCrc && crcPrev != crcNew)
   1020       return E_FAIL;
   1021 
   1022     compressedSize = outStreamSpec->Pos;
   1023     bi.UnpackSize += kBufferSize;
   1024     bi.PackSize += compressedSize;
   1025   }
   1026 
   1027   _encoder.Release();
   1028   _encoderFilter.Release();
   1029   return S_OK;
   1030 }
   1031 
   1032 
   1033 HRESULT CEncoderInfo::Decode(UInt32 decoderIndex)
   1034 {
   1035   CBenchmarkInStream *inStreamSpec = new CBenchmarkInStream;
   1036   CMyComPtr<ISequentialInStream> inStream = inStreamSpec;
   1037   CMyComPtr<ICompressCoder> &decoder = _decoders[decoderIndex];
   1038   CMyComPtr<IUnknown> coder;
   1039   if (_decoderFilter)
   1040   {
   1041     if (decoderIndex != 0)
   1042       return E_FAIL;
   1043     coder = _decoderFilter;
   1044   }
   1045   else
   1046     coder = decoder;
   1047 
   1048   CMyComPtr<ICompressSetDecoderProperties2> setDecProps;
   1049   coder.QueryInterface(IID_ICompressSetDecoderProperties2, &setDecProps);
   1050   if (!setDecProps && propStreamSpec->Pos != 0)
   1051     return E_FAIL;
   1052 
   1053   CCrcOutStream *crcOutStreamSpec = new CCrcOutStream;
   1054   CMyComPtr<ISequentialOutStream> crcOutStream = crcOutStreamSpec;
   1055 
   1056   CBenchProgressInfo *pi = progressInfoSpec[decoderIndex];
   1057   pi->BenchInfo.UnpackSize = 0;
   1058   pi->BenchInfo.PackSize = 0;
   1059 
   1060   #ifndef _7ZIP_ST
   1061   {
   1062     CMyComPtr<ICompressSetCoderMt> setCoderMt;
   1063     coder.QueryInterface(IID_ICompressSetCoderMt, &setCoderMt);
   1064     if (setCoderMt)
   1065     {
   1066       RINOK(setCoderMt->SetNumberOfThreads(NumDecoderSubThreads));
   1067     }
   1068   }
   1069   #endif
   1070 
   1071   CMyComPtr<ICompressSetCoderProperties> scp;
   1072   coder.QueryInterface(IID_ICompressSetCoderProperties, &scp);
   1073   if (scp)
   1074   {
   1075     UInt64 reduceSize = _uncompressedDataSize;
   1076     RINOK(_method.SetCoderProps(scp, &reduceSize));
   1077   }
   1078 
   1079   CMyComPtr<ICryptoProperties> cp;
   1080   coder.QueryInterface(IID_ICryptoProperties, &cp);
   1081 
   1082   if (setDecProps)
   1083   {
   1084     RINOK(setDecProps->SetDecoderProperties2(propStreamSpec->Buffer, (UInt32)propStreamSpec->Pos));
   1085   }
   1086 
   1087   {
   1088     CMyComPtr<ICryptoSetPassword> sp;
   1089     coder.QueryInterface(IID_ICryptoSetPassword, &sp);
   1090     if (sp)
   1091     {
   1092       RINOK(sp->CryptoSetPassword(_psw, sizeof(_psw)));
   1093     }
   1094   }
   1095 
   1096   UInt64 prev = 0;
   1097 
   1098   if (cp)
   1099   {
   1100     RINOK(cp->SetKey(_key, sizeof(_key)));
   1101     RINOK(cp->SetInitVector(_iv, sizeof(_iv)));
   1102   }
   1103 
   1104   for (UInt64 i = 0; i < NumIterations; i++)
   1105   {
   1106     if (printCallback && pi->BenchInfo.UnpackSize - prev > (1 << 20))
   1107     {
   1108       RINOK(printCallback->CheckBreak());
   1109       prev = pi->BenchInfo.UnpackSize;
   1110     }
   1111 
   1112     inStreamSpec->Init(outStreamSpec->Buffer, compressedSize);
   1113     crcOutStreamSpec->Init();
   1114 
   1115     UInt64 outSize = kBufferSize;
   1116     crcOutStreamSpec->CalcCrc = ((i & 0x7F) == 0 || CheckCrc_Dec);
   1117 
   1118     if (_decoderFilter)
   1119     {
   1120       if (compressedSize > rgCopy.BufferSize)
   1121         return E_FAIL;
   1122       memcpy(rgCopy.Buffer, outStreamSpec->Buffer, compressedSize);
   1123       _decoderFilter->Init();
   1124       My_FilterBench(_decoderFilter, rgCopy.Buffer, compressedSize);
   1125       RINOK(WriteStream(crcOutStream, rgCopy.Buffer, compressedSize));
   1126     }
   1127     else
   1128     {
   1129       RINOK(decoder->Code(inStream, crcOutStream, 0, &outSize, progressInfo[decoderIndex]));
   1130     }
   1131 
   1132     if (crcOutStreamSpec->CalcCrc && CRC_GET_DIGEST(crcOutStreamSpec->Crc) != crc)
   1133       return S_FALSE;
   1134     pi->BenchInfo.UnpackSize += kBufferSize;
   1135     pi->BenchInfo.PackSize += compressedSize;
   1136   }
   1137 
   1138   decoder.Release();
   1139   _decoderFilter.Release();
   1140   return S_OK;
   1141 }
   1142 
   1143 
   1144 static const UInt32 kNumThreadsMax = (1 << 12);
   1145 
   1146 struct CBenchEncoders
   1147 {
   1148   CEncoderInfo *encoders;
   1149   CBenchEncoders(UInt32 num): encoders(0) { encoders = new CEncoderInfo[num]; }
   1150   ~CBenchEncoders() { delete []encoders; }
   1151 };
   1152 
   1153 
   1154 static UInt64 GetNumIterations(UInt64 numCommands, UInt64 complexInCommands)
   1155 {
   1156   if (numCommands < (1 << 4))
   1157     numCommands = (1 << 4);
   1158   UInt64 res = complexInCommands / numCommands;
   1159   return (res == 0 ? 1 : res);
   1160 }
   1161 
   1162 
   1163 static HRESULT MethodBench(
   1164     DECL_EXTERNAL_CODECS_LOC_VARS
   1165     UInt64 complexInCommands,
   1166     bool
   1167       #ifndef _7ZIP_ST
   1168         oldLzmaBenchMode
   1169       #endif
   1170     ,
   1171     UInt32
   1172       #ifndef _7ZIP_ST
   1173         numThreads
   1174       #endif
   1175     ,
   1176     const COneMethodInfo &method2,
   1177     size_t uncompressedDataSize,
   1178     const Byte *fileData,
   1179     unsigned generateDictBits,
   1180 
   1181     IBenchPrintCallback *printCallback,
   1182     IBenchCallback *callback,
   1183     CBenchProps *benchProps)
   1184 {
   1185   COneMethodInfo method = method2;
   1186   UInt64 methodId;
   1187   UInt32 numStreams;
   1188   if (!FindMethod(
   1189       EXTERNAL_CODECS_LOC_VARS
   1190       method.MethodName, methodId, numStreams))
   1191     return E_NOTIMPL;
   1192   if (numStreams != 1)
   1193     return E_INVALIDARG;
   1194 
   1195   UInt32 numEncoderThreads = 1;
   1196   UInt32 numSubDecoderThreads = 1;
   1197 
   1198   #ifndef _7ZIP_ST
   1199     numEncoderThreads = numThreads;
   1200 
   1201     if (oldLzmaBenchMode && methodId == k_LZMA)
   1202     {
   1203       bool fixedNumber;
   1204       UInt32 numLzmaThreads = method.Get_Lzma_NumThreads(fixedNumber);
   1205       if (!fixedNumber && numThreads == 1)
   1206         method.AddProp_NumThreads(1);
   1207       if (numThreads > 1 && numLzmaThreads > 1)
   1208       {
   1209         numEncoderThreads = numThreads / 2;
   1210         numSubDecoderThreads = 2;
   1211       }
   1212     }
   1213   #endif
   1214 
   1215   CBenchEncoders encodersSpec(numEncoderThreads);
   1216   CEncoderInfo *encoders = encodersSpec.encoders;
   1217 
   1218   UInt32 i;
   1219 
   1220   for (i = 0; i < numEncoderThreads; i++)
   1221   {
   1222     CEncoderInfo &encoder = encoders[i];
   1223     encoder.callback = (i == 0) ? callback : 0;
   1224     encoder.printCallback = printCallback;
   1225 
   1226     {
   1227       CCreatedCoder cod;
   1228       RINOK(CreateCoder(EXTERNAL_CODECS_LOC_VARS methodId, true, encoder._encoderFilter, cod));
   1229       encoder._encoder = cod.Coder;
   1230       if (!encoder._encoder && !encoder._encoderFilter)
   1231         return E_NOTIMPL;
   1232     }
   1233 
   1234     encoder.CheckCrc_Enc = (benchProps->EncComplex) > 30 ;
   1235     encoder.CheckCrc_Dec = (benchProps->DecComplexCompr + benchProps->DecComplexUnc) > 30 ;
   1236 
   1237     memset(encoder._iv, 0, sizeof(encoder._iv));
   1238     memset(encoder._key, 0, sizeof(encoder._key));
   1239     memset(encoder._psw, 0, sizeof(encoder._psw));
   1240 
   1241     for (UInt32 j = 0; j < numSubDecoderThreads; j++)
   1242     {
   1243       CCreatedCoder cod;
   1244       CMyComPtr<ICompressCoder> &decoder = encoder._decoders[j];
   1245       RINOK(CreateCoder(EXTERNAL_CODECS_LOC_VARS methodId, false, encoder._decoderFilter, cod));
   1246       decoder = cod.Coder;
   1247       if (!encoder._decoderFilter && !decoder)
   1248         return E_NOTIMPL;
   1249     }
   1250   }
   1251 
   1252   CBaseRandomGenerator rg;
   1253   rg.Init();
   1254 
   1255   UInt32 crc = 0;
   1256   if (fileData)
   1257     crc = CrcCalc(fileData, uncompressedDataSize);
   1258 
   1259   for (i = 0; i < numEncoderThreads; i++)
   1260   {
   1261     CEncoderInfo &encoder = encoders[i];
   1262     encoder._method = method;
   1263     encoder._uncompressedDataSize = uncompressedDataSize;
   1264     encoder.kBufferSize = uncompressedDataSize;
   1265     encoder.fileData = fileData;
   1266     encoder.crc = crc;
   1267 
   1268     RINOK(encoders[i].Init(method, generateDictBits, &rg));
   1269   }
   1270 
   1271   CBenchProgressStatus status;
   1272   status.Res = S_OK;
   1273   status.EncodeMode = true;
   1274 
   1275   for (i = 0; i < numEncoderThreads; i++)
   1276   {
   1277     CEncoderInfo &encoder = encoders[i];
   1278     encoder.NumIterations = GetNumIterations(benchProps->GeComprCommands(uncompressedDataSize), complexInCommands);
   1279 
   1280     for (int j = 0; j < 2; j++)
   1281     {
   1282       CBenchProgressInfo *spec = new CBenchProgressInfo;
   1283       encoder.progressInfoSpec[j] = spec;
   1284       encoder.progressInfo[j] = spec;
   1285       spec->Status = &status;
   1286     }
   1287 
   1288     if (i == 0)
   1289     {
   1290       CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
   1291       bpi->Callback = callback;
   1292       bpi->BenchInfo.NumIterations = numEncoderThreads;
   1293       bpi->SetStartTime();
   1294     }
   1295 
   1296     #ifndef _7ZIP_ST
   1297     if (numEncoderThreads > 1)
   1298     {
   1299       #ifdef USE_ALLOCA
   1300       encoder.AllocaSize = (i * 16 * 21) & 0x7FF;
   1301       #endif
   1302 
   1303       RINOK(encoder.CreateEncoderThread())
   1304     }
   1305     else
   1306     #endif
   1307     {
   1308       RINOK(encoder.Encode());
   1309     }
   1310   }
   1311 
   1312   #ifndef _7ZIP_ST
   1313   if (numEncoderThreads > 1)
   1314     for (i = 0; i < numEncoderThreads; i++)
   1315       encoders[i].thread[0].Wait();
   1316   #endif
   1317 
   1318   RINOK(status.Res);
   1319 
   1320   CBenchInfo info;
   1321 
   1322   encoders[0].progressInfoSpec[0]->SetFinishTime(info);
   1323   info.UnpackSize = 0;
   1324   info.PackSize = 0;
   1325   info.NumIterations = encoders[0].NumIterations;
   1326 
   1327   for (i = 0; i < numEncoderThreads; i++)
   1328   {
   1329     CEncoderInfo &encoder = encoders[i];
   1330     info.UnpackSize += encoder.kBufferSize;
   1331     info.PackSize += encoder.compressedSize;
   1332   }
   1333 
   1334   RINOK(callback->SetEncodeResult(info, true));
   1335 
   1336 
   1337   status.Res = S_OK;
   1338   status.EncodeMode = false;
   1339 
   1340   UInt32 numDecoderThreads = numEncoderThreads * numSubDecoderThreads;
   1341 
   1342   for (i = 0; i < numEncoderThreads; i++)
   1343   {
   1344     CEncoderInfo &encoder = encoders[i];
   1345 
   1346     if (i == 0)
   1347     {
   1348       encoder.NumIterations = GetNumIterations(benchProps->GeDecomprCommands(encoder.compressedSize, encoder.kBufferSize), complexInCommands);
   1349       CBenchProgressInfo *bpi = encoder.progressInfoSpec[0];
   1350       bpi->Callback = callback;
   1351       bpi->BenchInfo.NumIterations = numDecoderThreads;
   1352       bpi->SetStartTime();
   1353     }
   1354     else
   1355       encoder.NumIterations = encoders[0].NumIterations;
   1356 
   1357     #ifndef _7ZIP_ST
   1358     {
   1359       int numSubThreads = method.Get_NumThreads();
   1360       encoder.NumDecoderSubThreads = (numSubThreads <= 0) ? 1 : numSubThreads;
   1361     }
   1362     if (numDecoderThreads > 1)
   1363     {
   1364       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
   1365       {
   1366         HRESULT res = encoder.CreateDecoderThread(j, (i == 0 && j == 0)
   1367             #ifdef USE_ALLOCA
   1368             , ((i * numSubDecoderThreads + j) * 16 * 21) & 0x7FF
   1369             #endif
   1370             );
   1371         RINOK(res);
   1372       }
   1373     }
   1374     else
   1375     #endif
   1376     {
   1377       RINOK(encoder.Decode(0));
   1378     }
   1379   }
   1380 
   1381   #ifndef _7ZIP_ST
   1382   HRESULT res = S_OK;
   1383   if (numDecoderThreads > 1)
   1384     for (i = 0; i < numEncoderThreads; i++)
   1385       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
   1386       {
   1387         CEncoderInfo &encoder = encoders[i];
   1388         encoder.thread[j].Wait();
   1389         if (encoder.Results[j] != S_OK)
   1390           res = encoder.Results[j];
   1391       }
   1392   RINOK(res);
   1393   #endif
   1394 
   1395   RINOK(status.Res);
   1396   encoders[0].progressInfoSpec[0]->SetFinishTime(info);
   1397 
   1398   #ifndef _7ZIP_ST
   1399   #ifdef UNDER_CE
   1400   if (numDecoderThreads > 1)
   1401     for (i = 0; i < numEncoderThreads; i++)
   1402       for (UInt32 j = 0; j < numSubDecoderThreads; j++)
   1403       {
   1404         FILETIME creationTime, exitTime, kernelTime, userTime;
   1405         if (::GetThreadTimes(encoders[i].thread[j], &creationTime, &exitTime, &kernelTime, &userTime) != 0)
   1406           info.UserTime += GetTime64(userTime) + GetTime64(kernelTime);
   1407       }
   1408   #endif
   1409   #endif
   1410 
   1411   info.UnpackSize = 0;
   1412   info.PackSize = 0;
   1413   info.NumIterations = numSubDecoderThreads * encoders[0].NumIterations;
   1414 
   1415   for (i = 0; i < numEncoderThreads; i++)
   1416   {
   1417     CEncoderInfo &encoder = encoders[i];
   1418     info.UnpackSize += encoder.kBufferSize;
   1419     info.PackSize += encoder.compressedSize;
   1420   }
   1421 
   1422   RINOK(callback->SetDecodeResult(info, false));
   1423   RINOK(callback->SetDecodeResult(info, true));
   1424 
   1425   return S_OK;
   1426 }
   1427 
   1428 
   1429 static inline UInt64 GetLZMAUsage(bool multiThread, UInt32 dictionary)
   1430 {
   1431   UInt32 hs = dictionary - 1;
   1432   hs |= (hs >> 1);
   1433   hs |= (hs >> 2);
   1434   hs |= (hs >> 4);
   1435   hs |= (hs >> 8);
   1436   hs >>= 1;
   1437   hs |= 0xFFFF;
   1438   if (hs > (1 << 24))
   1439     hs >>= 1;
   1440   hs++;
   1441   return ((hs + (1 << 16)) + (UInt64)dictionary * 2) * 4 + (UInt64)dictionary * 3 / 2 +
   1442       (1 << 20) + (multiThread ? (6 << 20) : 0);
   1443 }
   1444 
   1445 UInt64 GetBenchMemoryUsage(UInt32 numThreads, UInt32 dictionary, bool totalBench)
   1446 {
   1447   const UInt32 kBufferSize = dictionary;
   1448   const UInt32 kCompressedBufferSize = kBufferSize; // / 2;
   1449   bool lzmaMt = (totalBench || numThreads > 1);
   1450   UInt32 numBigThreads = numThreads;
   1451   if (!totalBench && lzmaMt)
   1452     numBigThreads /= 2;
   1453   return ((UInt64)kBufferSize + kCompressedBufferSize +
   1454     GetLZMAUsage(lzmaMt, dictionary) + (2 << 20)) * numBigThreads;
   1455 }
   1456 
   1457 static HRESULT CrcBig(const void *data, UInt32 size, UInt64 numIterations,
   1458     const UInt32 *checkSum, IHasher *hf,
   1459     IBenchPrintCallback *callback)
   1460 {
   1461   Byte hash[64];
   1462   UInt64 i;
   1463   for (i = 0; i < sizeof(hash); i++)
   1464     hash[i] = 0;
   1465   for (i = 0; i < numIterations; i++)
   1466   {
   1467     if (callback && (i & 0xFF) == 0)
   1468     {
   1469       RINOK(callback->CheckBreak());
   1470     }
   1471     hf->Init();
   1472     hf->Update(data, size);
   1473     hf->Final(hash);
   1474     UInt32 hashSize = hf->GetDigestSize();
   1475     if (hashSize > sizeof(hash))
   1476       return S_FALSE;
   1477     UInt32 sum = 0;
   1478     for (UInt32 j = 0; j < hashSize; j += 4)
   1479       sum ^= GetUi32(hash + j);
   1480     if (checkSum && sum != *checkSum)
   1481     {
   1482       return S_FALSE;
   1483     }
   1484   }
   1485   return S_OK;
   1486 }
   1487 
   1488 UInt32 g_BenchCpuFreqTemp = 1;
   1489 
   1490 #define YY1 sum += val; sum ^= val;
   1491 #define YY3 YY1 YY1 YY1 YY1
   1492 #define YY5 YY3 YY3 YY3 YY3
   1493 #define YY7 YY5 YY5 YY5 YY5
   1494 static const UInt32 kNumFreqCommands = 128;
   1495 
   1496 EXTERN_C_BEGIN
   1497 
   1498 static UInt32 CountCpuFreq(UInt32 sum, UInt32 num, UInt32 val)
   1499 {
   1500   for (UInt32 i = 0; i < num; i++)
   1501   {
   1502     YY7
   1503   }
   1504   return sum;
   1505 }
   1506 
   1507 EXTERN_C_END
   1508 
   1509 
   1510 #ifndef _7ZIP_ST
   1511 
   1512 struct CFreqInfo
   1513 {
   1514   NWindows::CThread Thread;
   1515   IBenchPrintCallback *Callback;
   1516   HRESULT CallbackRes;
   1517   UInt32 ValRes;
   1518   UInt32 Size;
   1519   UInt64 NumIterations;
   1520 
   1521   void Wait()
   1522   {
   1523     Thread.Wait();
   1524     Thread.Close();
   1525   }
   1526 };
   1527 
   1528 static THREAD_FUNC_DECL FreqThreadFunction(void *param)
   1529 {
   1530   CFreqInfo *p = (CFreqInfo *)param;
   1531 
   1532   UInt32 sum = g_BenchCpuFreqTemp;
   1533   for (UInt64 k = p->NumIterations; k > 0; k--)
   1534   {
   1535     p->CallbackRes = p->Callback->CheckBreak();
   1536     if (p->CallbackRes != S_OK)
   1537       return 0;
   1538     sum = CountCpuFreq(sum, p->Size, g_BenchCpuFreqTemp);
   1539   }
   1540   p->ValRes = sum;
   1541   return 0;
   1542 }
   1543 
   1544 struct CFreqThreads
   1545 {
   1546   CFreqInfo *Items;
   1547   UInt32 NumThreads;
   1548 
   1549   CFreqThreads(): Items(0), NumThreads(0) {}
   1550   void WaitAll()
   1551   {
   1552     for (UInt32 i = 0; i < NumThreads; i++)
   1553       Items[i].Wait();
   1554     NumThreads = 0;
   1555   }
   1556   ~CFreqThreads()
   1557   {
   1558     WaitAll();
   1559     delete []Items;
   1560   }
   1561 };
   1562 
   1563 struct CCrcInfo
   1564 {
   1565   NWindows::CThread Thread;
   1566   IBenchPrintCallback *Callback;
   1567   HRESULT CallbackRes;
   1568 
   1569   const Byte *Data;
   1570   UInt32 Size;
   1571   UInt64 NumIterations;
   1572   bool CheckSumDefined;
   1573   UInt32 CheckSum;
   1574   CMyComPtr<IHasher> Hasher;
   1575   HRESULT Res;
   1576 
   1577   #ifdef USE_ALLOCA
   1578   size_t AllocaSize;
   1579   #endif
   1580 
   1581   void Wait()
   1582   {
   1583     Thread.Wait();
   1584     Thread.Close();
   1585   }
   1586 };
   1587 
   1588 static THREAD_FUNC_DECL CrcThreadFunction(void *param)
   1589 {
   1590   CCrcInfo *p = (CCrcInfo *)param;
   1591 
   1592   #ifdef USE_ALLOCA
   1593   alloca(p->AllocaSize);
   1594   #endif
   1595 
   1596   p->Res = CrcBig(p->Data, p->Size, p->NumIterations,
   1597       p->CheckSumDefined ? &p->CheckSum : NULL, p->Hasher,
   1598       p->Callback);
   1599   return 0;
   1600 }
   1601 
   1602 struct CCrcThreads
   1603 {
   1604   CCrcInfo *Items;
   1605   UInt32 NumThreads;
   1606 
   1607   CCrcThreads(): Items(0), NumThreads(0) {}
   1608   void WaitAll()
   1609   {
   1610     for (UInt32 i = 0; i < NumThreads; i++)
   1611       Items[i].Wait();
   1612     NumThreads = 0;
   1613   }
   1614   ~CCrcThreads()
   1615   {
   1616     WaitAll();
   1617     delete []Items;
   1618   }
   1619 };
   1620 
   1621 #endif
   1622 
   1623 static UInt32 CrcCalc1(const Byte *buf, UInt32 size)
   1624 {
   1625   UInt32 crc = CRC_INIT_VAL;;
   1626   for (UInt32 i = 0; i < size; i++)
   1627     crc = CRC_UPDATE_BYTE(crc, buf[i]);
   1628   return CRC_GET_DIGEST(crc);
   1629 }
   1630 
   1631 static void RandGen(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
   1632 {
   1633   for (UInt32 i = 0; i < size; i++)
   1634     buf[i] = (Byte)RG.GetRnd();
   1635 }
   1636 
   1637 static UInt32 RandGenCrc(Byte *buf, UInt32 size, CBaseRandomGenerator &RG)
   1638 {
   1639   RandGen(buf, size, RG);
   1640   return CrcCalc1(buf, size);
   1641 }
   1642 
   1643 bool CrcInternalTest()
   1644 {
   1645   CBenchBuffer buffer;
   1646   const UInt32 kBufferSize0 = (1 << 8);
   1647   const UInt32 kBufferSize1 = (1 << 10);
   1648   const UInt32 kCheckSize = (1 << 5);
   1649   if (!buffer.Alloc(kBufferSize0 + kBufferSize1))
   1650     return false;
   1651   Byte *buf = buffer.Buffer;
   1652   UInt32 i;
   1653   for (i = 0; i < kBufferSize0; i++)
   1654     buf[i] = (Byte)i;
   1655   UInt32 crc1 = CrcCalc1(buf, kBufferSize0);
   1656   if (crc1 != 0x29058C73)
   1657     return false;
   1658   CBaseRandomGenerator RG;
   1659   RandGen(buf + kBufferSize0, kBufferSize1, RG);
   1660   for (i = 0; i < kBufferSize0 + kBufferSize1 - kCheckSize; i++)
   1661     for (UInt32 j = 0; j < kCheckSize; j++)
   1662       if (CrcCalc1(buf + i, j) != CrcCalc(buf + i, j))
   1663         return false;
   1664   return true;
   1665 }
   1666 
   1667 struct CBenchMethod
   1668 {
   1669   unsigned Weight;
   1670   unsigned DictBits;
   1671   UInt32 EncComplex;
   1672   UInt32 DecComplexCompr;
   1673   UInt32 DecComplexUnc;
   1674   const char *Name;
   1675 };
   1676 
   1677 static const CBenchMethod g_Bench[] =
   1678 {
   1679   { 40, 17,  357,  145,   20, "LZMA:x1" },
   1680   { 80, 24, 1220,  145,   20, "LZMA:x5:mt1" },
   1681   { 80, 24, 1220,  145,   20, "LZMA:x5:mt2" },
   1682 
   1683   { 10, 16,  124,   40,   14, "Deflate:x1" },
   1684   { 20, 16,  376,   40,   14, "Deflate:x5" },
   1685   { 10, 16, 1082,   40,   14, "Deflate:x7" },
   1686   { 10, 17,  422,   40,   14, "Deflate64:x5" },
   1687 
   1688   { 10, 15,  590,   69,   69, "BZip2:x1" },
   1689   { 20, 19,  815,  122,  122, "BZip2:x5" },
   1690   { 10, 19,  815,  122,  122, "BZip2:x5:mt2" },
   1691   { 10, 19, 2530,  122,  122, "BZip2:x7" },
   1692 
   1693   { 10, 18, 1010,    0, 1150, "PPMD:x1" },
   1694   { 10, 22, 1655,    0, 1830, "PPMD:x5" },
   1695 
   1696   {  2,  0,    6,    0,    6, "Delta:4" },
   1697   {  2,  0,    4,    0,    4, "BCJ" },
   1698 
   1699   { 10,  0,   24,    0,   24, "AES256CBC:1" },
   1700   {  2,  0,    8,    0,    2, "AES256CBC:2" }
   1701 };
   1702 
   1703 struct CBenchHash
   1704 {
   1705   unsigned Weight;
   1706   UInt32 Complex;
   1707   UInt32 CheckSum;
   1708   const char *Name;
   1709 };
   1710 
   1711 static const CBenchHash g_Hash[] =
   1712 {
   1713   {  1,  1820, 0x8F8FEDAB, "CRC32:1" },
   1714   { 10,   558, 0x8F8FEDAB, "CRC32:4" },
   1715   { 10,   339, 0x8F8FEDAB, "CRC32:8" },
   1716   { 10,   512, 0xDF1C17CC, "CRC64" },
   1717   { 10,  5100, 0x2D79FF2E, "SHA256" },
   1718   { 10,  2340, 0x4C25132B, "SHA1" },
   1719   {  2,  5500, 0xE084E913, "BLAKE2sp" }
   1720 };
   1721 
   1722 struct CTotalBenchRes
   1723 {
   1724   // UInt64 NumIterations1; // for Usage
   1725   UInt64 NumIterations2; // for Rating / RPU
   1726 
   1727   UInt64 Rating;
   1728   UInt64 Usage;
   1729   UInt64 RPU;
   1730 
   1731   void Init() { /* NumIterations1 = 0; */ NumIterations2 = 0; Rating = 0; Usage = 0; RPU = 0; }
   1732 
   1733   void SetSum(const CTotalBenchRes &r1, const CTotalBenchRes &r2)
   1734   {
   1735     Rating = (r1.Rating + r2.Rating);
   1736     Usage = (r1.Usage + r2.Usage);
   1737     RPU = (r1.RPU + r2.RPU);
   1738     // NumIterations1 = (r1.NumIterations1 + r2.NumIterations1);
   1739     NumIterations2 = (r1.NumIterations2 + r2.NumIterations2);
   1740   }
   1741 };
   1742 
   1743 static void PrintNumber(IBenchPrintCallback &f, UInt64 value, unsigned size)
   1744 {
   1745   char s[128];
   1746   unsigned startPos = (unsigned)sizeof(s) - 32;
   1747   memset(s, ' ', startPos);
   1748   ConvertUInt64ToString(value, s + startPos);
   1749   // if (withSpace)
   1750   {
   1751     startPos--;
   1752     size++;
   1753   }
   1754   unsigned len = (unsigned)strlen(s + startPos);
   1755   if (size > len)
   1756   {
   1757     startPos -= (size - len);
   1758     if (startPos < 0)
   1759       startPos = 0;
   1760   }
   1761   f.Print(s + startPos);
   1762 }
   1763 
   1764 static const unsigned kFieldSize_Name = 12;
   1765 static const unsigned kFieldSize_SmallName = 4;
   1766 static const unsigned kFieldSize_Speed = 9;
   1767 static const unsigned kFieldSize_Usage = 5;
   1768 static const unsigned kFieldSize_RU = 6;
   1769 static const unsigned kFieldSize_Rating = 6;
   1770 static const unsigned kFieldSize_EU = 5;
   1771 static const unsigned kFieldSize_Effec = 5;
   1772 
   1773 static const unsigned kFieldSize_TotalSize = 4 + kFieldSize_Speed + kFieldSize_Usage + kFieldSize_RU + kFieldSize_Rating;
   1774 static const unsigned kFieldSize_EUAndEffec = 2 + kFieldSize_EU + kFieldSize_Effec;
   1775 
   1776 
   1777 static void PrintRating(IBenchPrintCallback &f, UInt64 rating, unsigned size)
   1778 {
   1779   PrintNumber(f, (rating + 500000) / 1000000, size);
   1780 }
   1781 
   1782 
   1783 static void PrintPercents(IBenchPrintCallback &f, UInt64 val, UInt64 divider, unsigned size)
   1784 {
   1785   PrintNumber(f, (val * 100 + divider / 2) / divider, size);
   1786 }
   1787 
   1788 static void PrintChars(IBenchPrintCallback &f, char c, unsigned size)
   1789 {
   1790   char s[256];
   1791   memset(s, (Byte)c, size);
   1792   s[size] = 0;
   1793   f.Print(s);
   1794 }
   1795 
   1796 static void PrintSpaces(IBenchPrintCallback &f, unsigned size)
   1797 {
   1798   PrintChars(f, ' ', size);
   1799 }
   1800 
   1801 static void PrintResults(IBenchPrintCallback &f, UInt64 usage, UInt64 rpu, UInt64 rating, bool showFreq, UInt64 cpuFreq)
   1802 {
   1803   PrintNumber(f, (usage + 5000) / 10000, kFieldSize_Usage);
   1804   PrintRating(f, rpu, kFieldSize_RU);
   1805   PrintRating(f, rating, kFieldSize_Rating);
   1806   if (showFreq)
   1807   {
   1808     if (cpuFreq == 0)
   1809       PrintSpaces(f, kFieldSize_EUAndEffec);
   1810     else
   1811     {
   1812       UInt64 ddd = cpuFreq * usage / 100;
   1813       if (ddd == 0)
   1814         ddd = 1;
   1815       PrintPercents(f, (rating * 10000), ddd, kFieldSize_EU);
   1816       PrintPercents(f, rating, cpuFreq, kFieldSize_Effec);
   1817     }
   1818   }
   1819 }
   1820 
   1821 static void PrintResults(IBenchPrintCallback *f,
   1822     const CBenchInfo &info,
   1823     unsigned weight,
   1824     UInt64 rating,
   1825     bool showFreq, UInt64 cpuFreq,
   1826     CTotalBenchRes *res)
   1827 {
   1828   UInt64 speed = info.GetSpeed(info.UnpackSize * info.NumIterations);
   1829   if (f)
   1830   {
   1831     if (speed != 0)
   1832       PrintNumber(*f, speed / 1024, kFieldSize_Speed);
   1833     else
   1834       PrintSpaces(*f, 1 + kFieldSize_Speed);
   1835   }
   1836   UInt64 usage = info.GetUsage();
   1837   UInt64 rpu = info.GetRatingPerUsage(rating);
   1838   if (f)
   1839   {
   1840     PrintResults(*f, usage, rpu, rating, showFreq, cpuFreq);
   1841   }
   1842 
   1843   if (res)
   1844   {
   1845     // res->NumIterations1++;
   1846     res->NumIterations2 += weight;
   1847     res->RPU += (rpu * weight);
   1848     res->Rating += (rating * weight);
   1849     res->Usage += (usage * weight);
   1850   }
   1851 }
   1852 
   1853 static void PrintTotals(IBenchPrintCallback &f, bool showFreq, UInt64 cpuFreq, const CTotalBenchRes &res)
   1854 {
   1855   PrintSpaces(f, 1 + kFieldSize_Speed);
   1856   // UInt64 numIterations1 = res.NumIterations1; if (numIterations1 == 0) numIterations1 = 1;
   1857   UInt64 numIterations2 = res.NumIterations2; if (numIterations2 == 0) numIterations2 = 1;
   1858   PrintResults(f, res.Usage / numIterations2, res.RPU / numIterations2, res.Rating / numIterations2, showFreq, cpuFreq);
   1859 }
   1860 
   1861 static void PrintRequirements(IBenchPrintCallback &f, const char *sizeString,
   1862     bool size_Defined, UInt64 size, const char *threadsString, UInt32 numThreads)
   1863 {
   1864   f.Print("RAM ");
   1865   f.Print(sizeString);
   1866   if (size_Defined)
   1867     PrintNumber(f, (size >> 20), 6);
   1868   else
   1869     f.Print("      ?");
   1870   f.Print(" MB,  # ");
   1871   f.Print(threadsString);
   1872   PrintNumber(f, numThreads, 3);
   1873   f.NewLine();
   1874 }
   1875 
   1876 struct CBenchCallbackToPrint: public IBenchCallback
   1877 {
   1878   CBenchProps BenchProps;
   1879   CTotalBenchRes EncodeRes;
   1880   CTotalBenchRes DecodeRes;
   1881   IBenchPrintCallback *_file;
   1882   UInt32 DictSize;
   1883 
   1884   bool Use2Columns;
   1885   unsigned NameFieldSize;
   1886 
   1887   bool ShowFreq;
   1888   UInt64 CpuFreq;
   1889 
   1890   unsigned EncodeWeight;
   1891   unsigned DecodeWeight;
   1892 
   1893   CBenchCallbackToPrint():
   1894       Use2Columns(false),
   1895       NameFieldSize(0),
   1896       ShowFreq(false),
   1897       CpuFreq(0),
   1898       EncodeWeight(1),
   1899       DecodeWeight(1)
   1900       {}
   1901 
   1902   void Init() { EncodeRes.Init(); DecodeRes.Init(); }
   1903   void Print(const char *s);
   1904   void NewLine();
   1905 
   1906   HRESULT SetFreq(bool showFreq, UInt64 cpuFreq);
   1907   HRESULT SetEncodeResult(const CBenchInfo &info, bool final);
   1908   HRESULT SetDecodeResult(const CBenchInfo &info, bool final);
   1909 };
   1910 
   1911 HRESULT CBenchCallbackToPrint::SetFreq(bool showFreq, UInt64 cpuFreq)
   1912 {
   1913   ShowFreq = showFreq;
   1914   CpuFreq = cpuFreq;
   1915   return S_OK;
   1916 }
   1917 
   1918 HRESULT CBenchCallbackToPrint::SetEncodeResult(const CBenchInfo &info, bool final)
   1919 {
   1920   RINOK(_file->CheckBreak());
   1921   if (final)
   1922   {
   1923     UInt64 rating = BenchProps.GetCompressRating(DictSize, info.GlobalTime, info.GlobalFreq, info.UnpackSize * info.NumIterations);
   1924     PrintResults(_file, info,
   1925         EncodeWeight, rating,
   1926         ShowFreq, CpuFreq, &EncodeRes);
   1927     if (!Use2Columns)
   1928       _file->NewLine();
   1929   }
   1930   return S_OK;
   1931 }
   1932 
   1933 static const char *kSep = "  | ";
   1934 
   1935 HRESULT CBenchCallbackToPrint::SetDecodeResult(const CBenchInfo &info, bool final)
   1936 {
   1937   RINOK(_file->CheckBreak());
   1938   if (final)
   1939   {
   1940     UInt64 rating = BenchProps.GetDecompressRating(info.GlobalTime, info.GlobalFreq, info.UnpackSize, info.PackSize, info.NumIterations);
   1941     if (Use2Columns)
   1942       _file->Print(kSep);
   1943     else
   1944       PrintSpaces(*_file, NameFieldSize);
   1945     CBenchInfo info2 = info;
   1946     info2.UnpackSize *= info2.NumIterations;
   1947     info2.PackSize *= info2.NumIterations;
   1948     info2.NumIterations = 1;
   1949     PrintResults(_file, info2,
   1950         DecodeWeight, rating,
   1951         ShowFreq, CpuFreq, &DecodeRes);
   1952   }
   1953   return S_OK;
   1954 }
   1955 
   1956 void CBenchCallbackToPrint::Print(const char *s)
   1957 {
   1958   _file->Print(s);
   1959 }
   1960 
   1961 void CBenchCallbackToPrint::NewLine()
   1962 {
   1963   _file->NewLine();
   1964 }
   1965 
   1966 void PrintLeft(IBenchPrintCallback &f, const char *s, unsigned size)
   1967 {
   1968   f.Print(s);
   1969   int numSpaces = size - MyStringLen(s);
   1970   if (numSpaces > 0)
   1971     PrintSpaces(f, numSpaces);
   1972 }
   1973 
   1974 void PrintRight(IBenchPrintCallback &f, const char *s, unsigned size)
   1975 {
   1976   int numSpaces = size - MyStringLen(s);
   1977   if (numSpaces > 0)
   1978     PrintSpaces(f, numSpaces);
   1979   f.Print(s);
   1980 }
   1981 
   1982 static HRESULT TotalBench(
   1983     DECL_EXTERNAL_CODECS_LOC_VARS
   1984     UInt64 complexInCommands,
   1985     UInt32 numThreads,
   1986     bool forceUnpackSize,
   1987     size_t unpackSize,
   1988     const Byte *fileData,
   1989     IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback)
   1990 {
   1991   for (unsigned i = 0; i < ARRAY_SIZE(g_Bench); i++)
   1992   {
   1993     const CBenchMethod &bench = g_Bench[i];
   1994     PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
   1995     callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
   1996     callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
   1997     callback->BenchProps.EncComplex = bench.EncComplex;
   1998 
   1999     COneMethodInfo method;
   2000     NCOM::CPropVariant propVariant;
   2001     propVariant = bench.Name;
   2002     RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant));
   2003 
   2004     size_t unpackSize2 = unpackSize;
   2005     if (!forceUnpackSize && bench.DictBits == 0)
   2006       unpackSize2 = kFilterUnpackSize;
   2007 
   2008     callback->EncodeWeight = bench.Weight;
   2009     callback->DecodeWeight = bench.Weight;
   2010 
   2011     HRESULT res = MethodBench(
   2012         EXTERNAL_CODECS_LOC_VARS
   2013         complexInCommands,
   2014         false, numThreads, method,
   2015         unpackSize2, fileData,
   2016         bench.DictBits,
   2017         printCallback, callback, &callback->BenchProps);
   2018 
   2019     if (res == E_NOTIMPL)
   2020     {
   2021       // callback->Print(" ---");
   2022       // we need additional empty line as line for decompression results
   2023       if (!callback->Use2Columns)
   2024         callback->NewLine();
   2025     }
   2026     else
   2027     {
   2028       RINOK(res);
   2029     }
   2030 
   2031     callback->NewLine();
   2032   }
   2033   return S_OK;
   2034 }
   2035 
   2036 
   2037 static HRESULT FreqBench(
   2038     UInt64 complexInCommands,
   2039     UInt32 numThreads,
   2040     IBenchPrintCallback *_file,
   2041     bool showFreq,
   2042     UInt64 specifiedFreq,
   2043     UInt64 &cpuFreq,
   2044     UInt32 &res)
   2045 {
   2046   res = 0;
   2047   cpuFreq = 0;
   2048 
   2049   UInt32 bufferSize = 1 << 20;
   2050   UInt32 complexity = kNumFreqCommands;
   2051   if (numThreads == 0)
   2052     numThreads = 1;
   2053 
   2054   #ifdef _7ZIP_ST
   2055   numThreads = 1;
   2056   #endif
   2057 
   2058   UInt32 bsize = (bufferSize == 0 ? 1 : bufferSize);
   2059   UInt64 numIterations = complexInCommands / complexity / bsize;
   2060   if (numIterations == 0)
   2061     numIterations = 1;
   2062 
   2063   CBenchInfoCalc progressInfoSpec;
   2064 
   2065   #ifndef _7ZIP_ST
   2066   CFreqThreads threads;
   2067   if (numThreads > 1)
   2068   {
   2069     threads.Items = new CFreqInfo[numThreads];
   2070     UInt32 i;
   2071     for (i = 0; i < numThreads; i++)
   2072     {
   2073       CFreqInfo &info = threads.Items[i];
   2074       info.Callback = _file;
   2075       info.CallbackRes = S_OK;
   2076       info.NumIterations = numIterations;
   2077       info.Size = bufferSize;
   2078     }
   2079     progressInfoSpec.SetStartTime();
   2080     for (i = 0; i < numThreads; i++)
   2081     {
   2082       CFreqInfo &info = threads.Items[i];
   2083       RINOK(info.Thread.Create(FreqThreadFunction, &info));
   2084       threads.NumThreads++;
   2085     }
   2086     threads.WaitAll();
   2087     for (i = 0; i < numThreads; i++)
   2088     {
   2089       RINOK(threads.Items[i].CallbackRes);
   2090     }
   2091   }
   2092   else
   2093   #endif
   2094   {
   2095     progressInfoSpec.SetStartTime();
   2096     UInt32 sum = g_BenchCpuFreqTemp;
   2097     for (UInt64 k = numIterations; k > 0; k--)
   2098     {
   2099       RINOK(_file->CheckBreak());
   2100       sum = CountCpuFreq(sum, bufferSize, g_BenchCpuFreqTemp);
   2101     }
   2102     res += sum;
   2103   }
   2104 
   2105   CBenchInfo info;
   2106   progressInfoSpec.SetFinishTime(info);
   2107 
   2108   info.UnpackSize = 0;
   2109   info.PackSize = 0;
   2110   info.NumIterations = 1;
   2111 
   2112   if (_file)
   2113   {
   2114     {
   2115       UInt64 numCommands = (UInt64)numIterations * bufferSize * numThreads * complexity;
   2116       UInt64 rating = info.GetSpeed(numCommands);
   2117       cpuFreq = rating / numThreads;
   2118       PrintResults(_file, info,
   2119           0, // weight
   2120           rating,
   2121           showFreq, showFreq ? (specifiedFreq != 0 ? specifiedFreq : cpuFreq) : 0, NULL);
   2122     }
   2123     RINOK(_file->CheckBreak());
   2124   }
   2125 
   2126   return S_OK;
   2127 }
   2128 
   2129 
   2130 
   2131 static HRESULT CrcBench(
   2132     DECL_EXTERNAL_CODECS_LOC_VARS
   2133     UInt64 complexInCommands,
   2134     UInt32 numThreads, UInt32 bufferSize,
   2135     UInt64 &speed,
   2136     UInt32 complexity, unsigned benchWeight,
   2137     const UInt32 *checkSum,
   2138     const COneMethodInfo &method,
   2139     IBenchPrintCallback *_file,
   2140     CTotalBenchRes *encodeRes,
   2141     bool showFreq, UInt64 cpuFreq)
   2142 {
   2143   if (numThreads == 0)
   2144     numThreads = 1;
   2145 
   2146   #ifdef _7ZIP_ST
   2147   numThreads = 1;
   2148   #endif
   2149 
   2150   AString methodName = method.MethodName;
   2151   // methodName.RemoveChar(L'-');
   2152   CMethodId hashID;
   2153   if (!FindHashMethod(
   2154       EXTERNAL_CODECS_LOC_VARS
   2155       methodName, hashID))
   2156     return E_NOTIMPL;
   2157 
   2158   CBenchBuffer buffer;
   2159   size_t totalSize = (size_t)bufferSize * numThreads;
   2160   if (totalSize / numThreads != bufferSize)
   2161     return E_OUTOFMEMORY;
   2162   if (!buffer.Alloc(totalSize))
   2163     return E_OUTOFMEMORY;
   2164 
   2165   Byte *buf = buffer.Buffer;
   2166   CBaseRandomGenerator RG;
   2167   UInt32 bsize = (bufferSize == 0 ? 1 : bufferSize);
   2168   UInt64 numIterations = complexInCommands * 256 / complexity / bsize;
   2169   if (numIterations == 0)
   2170     numIterations = 1;
   2171 
   2172   CBenchInfoCalc progressInfoSpec;
   2173 
   2174   #ifndef _7ZIP_ST
   2175   CCrcThreads threads;
   2176   if (numThreads > 1)
   2177   {
   2178     threads.Items = new CCrcInfo[numThreads];
   2179 
   2180     UInt32 i;
   2181     for (i = 0; i < numThreads; i++)
   2182     {
   2183       CCrcInfo &info = threads.Items[i];
   2184       AString name;
   2185       RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, info.Hasher));
   2186       if (!info.Hasher)
   2187         return E_NOTIMPL;
   2188       CMyComPtr<ICompressSetCoderProperties> scp;
   2189       info.Hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
   2190       if (scp)
   2191       {
   2192         UInt64 reduceSize = 1;
   2193         RINOK(method.SetCoderProps(scp, &reduceSize));
   2194       }
   2195 
   2196       Byte *data = buf + (size_t)bufferSize * i;
   2197       info.Callback = _file;
   2198       info.Data = data;
   2199       info.NumIterations = numIterations;
   2200       info.Size = bufferSize;
   2201       /* info.Crc = */ RandGenCrc(data, bufferSize, RG);
   2202       info.CheckSumDefined = false;
   2203       if (checkSum)
   2204       {
   2205         info.CheckSum = *checkSum;
   2206         info.CheckSumDefined = (checkSum && (i == 0));
   2207       }
   2208 
   2209       #ifdef USE_ALLOCA
   2210       info.AllocaSize = (i * 16 * 21) & 0x7FF;
   2211       #endif
   2212     }
   2213 
   2214     progressInfoSpec.SetStartTime();
   2215 
   2216     for (i = 0; i < numThreads; i++)
   2217     {
   2218       CCrcInfo &info = threads.Items[i];
   2219       RINOK(info.Thread.Create(CrcThreadFunction, &info));
   2220       threads.NumThreads++;
   2221     }
   2222     threads.WaitAll();
   2223     for (i = 0; i < numThreads; i++)
   2224     {
   2225       RINOK(threads.Items[i].Res);
   2226     }
   2227   }
   2228   else
   2229   #endif
   2230   {
   2231     /* UInt32 crc = */ RandGenCrc(buf, bufferSize, RG);
   2232     progressInfoSpec.SetStartTime();
   2233     CMyComPtr<IHasher> hasher;
   2234     AString name;
   2235     RINOK(CreateHasher(EXTERNAL_CODECS_LOC_VARS hashID, name, hasher));
   2236     if (!hasher)
   2237       return E_NOTIMPL;
   2238     CMyComPtr<ICompressSetCoderProperties> scp;
   2239     hasher.QueryInterface(IID_ICompressSetCoderProperties, &scp);
   2240     if (scp)
   2241     {
   2242       UInt64 reduceSize = 1;
   2243       RINOK(method.SetCoderProps(scp, &reduceSize));
   2244     }
   2245     RINOK(CrcBig(buf, bufferSize, numIterations, checkSum, hasher, _file));
   2246   }
   2247 
   2248   CBenchInfo info;
   2249   progressInfoSpec.SetFinishTime(info);
   2250 
   2251   UInt64 unpSize = numIterations * bufferSize;
   2252   UInt64 unpSizeThreads = unpSize * numThreads;
   2253   info.UnpackSize = unpSizeThreads;
   2254   info.PackSize = unpSizeThreads;
   2255   info.NumIterations = 1;
   2256 
   2257   if (_file)
   2258   {
   2259     {
   2260       UInt64 numCommands = unpSizeThreads * complexity / 256;
   2261       UInt64 rating = info.GetSpeed(numCommands);
   2262       PrintResults(_file, info,
   2263           benchWeight, rating,
   2264           showFreq, cpuFreq, encodeRes);
   2265     }
   2266     RINOK(_file->CheckBreak());
   2267   }
   2268 
   2269   speed = info.GetSpeed(unpSizeThreads);
   2270 
   2271   return S_OK;
   2272 }
   2273 
   2274 static HRESULT TotalBench_Hash(
   2275     DECL_EXTERNAL_CODECS_LOC_VARS
   2276     UInt64 complexInCommands,
   2277     UInt32 numThreads, UInt32 bufSize,
   2278     IBenchPrintCallback *printCallback, CBenchCallbackToPrint *callback,
   2279     CTotalBenchRes *encodeRes,
   2280     bool showFreq, UInt64 cpuFreq)
   2281 {
   2282   for (unsigned i = 0; i < ARRAY_SIZE(g_Hash); i++)
   2283   {
   2284     const CBenchHash &bench = g_Hash[i];
   2285     PrintLeft(*callback->_file, bench.Name, kFieldSize_Name);
   2286     // callback->BenchProps.DecComplexUnc = bench.DecComplexUnc;
   2287     // callback->BenchProps.DecComplexCompr = bench.DecComplexCompr;
   2288     // callback->BenchProps.EncComplex = bench.EncComplex;
   2289 
   2290     COneMethodInfo method;
   2291     NCOM::CPropVariant propVariant;
   2292     propVariant = bench.Name;
   2293     RINOK(method.ParseMethodFromPROPVARIANT(UString(), propVariant));
   2294 
   2295     UInt64 speed;
   2296     HRESULT res = CrcBench(
   2297         EXTERNAL_CODECS_LOC_VARS
   2298         complexInCommands,
   2299         numThreads, bufSize,
   2300         speed,
   2301         bench.Complex, bench.Weight,
   2302         &bench.CheckSum, method,
   2303         printCallback, encodeRes, showFreq, cpuFreq);
   2304     if (res == E_NOTIMPL)
   2305     {
   2306       // callback->Print(" ---");
   2307     }
   2308     else
   2309     {
   2310       RINOK(res);
   2311     }
   2312     callback->NewLine();
   2313   }
   2314   return S_OK;
   2315 }
   2316 
   2317 struct CTempValues
   2318 {
   2319   UInt64 *Values;
   2320   CTempValues(UInt32 num) { Values = new UInt64[num]; }
   2321   ~CTempValues() { delete []Values; }
   2322 };
   2323 
   2324 static void ParseNumberString(const UString &s, NCOM::CPropVariant &prop)
   2325 {
   2326   const wchar_t *end;
   2327   UInt64 result = ConvertStringToUInt64(s, &end);
   2328   if (*end != 0 || s.IsEmpty())
   2329     prop = s;
   2330   else if (result <= (UInt32)0xFFFFFFFF)
   2331     prop = (UInt32)result;
   2332   else
   2333     prop = result;
   2334 }
   2335 
   2336 static UInt32 GetNumThreadsNext(unsigned i, UInt32 numThreads)
   2337 {
   2338   if (i < 2)
   2339     return i + 1;
   2340   i -= 1;
   2341   UInt32 num = (UInt32)(2 + (i & 1)) << (i >> 1);
   2342   return (num <= numThreads) ? num : numThreads;
   2343 }
   2344 
   2345 static bool AreSameMethodNames(const char *fullName, const char *shortName)
   2346 {
   2347   for (;;)
   2348   {
   2349     char c2 = *shortName++;
   2350     if (c2 == 0)
   2351       return true;
   2352     char c1 = *fullName++;
   2353     if (MyCharLower_Ascii(c1) != MyCharLower_Ascii(c2))
   2354       return false;
   2355   }
   2356 }
   2357 
   2358 
   2359 #ifdef MY_CPU_X86_OR_AMD64
   2360 
   2361 static void PrintCpuChars(AString &s, UInt32 v)
   2362 {
   2363   for (int j = 0; j < 4; j++)
   2364   {
   2365     Byte b = (Byte)(v & 0xFF);
   2366     v >>= 8;
   2367     if (b == 0)
   2368       break;
   2369     s += (char)b;
   2370   }
   2371 }
   2372 
   2373 static void x86cpuid_to_String(const Cx86cpuid &c, AString &s)
   2374 {
   2375   s.Empty();
   2376 
   2377   UInt32 maxFunc2 = 0;
   2378   UInt32 t[3];
   2379 
   2380   MyCPUID(0x80000000, &maxFunc2, &t[0], &t[1], &t[2]);
   2381 
   2382   bool fullNameIsAvail = (maxFunc2 >= 0x80000004);
   2383 
   2384   if (!fullNameIsAvail)
   2385   {
   2386     for (int i = 0; i < 3; i++)
   2387       PrintCpuChars(s, c.vendor[i]);
   2388   }
   2389   else
   2390   {
   2391     for (int i = 0; i < 3; i++)
   2392     {
   2393       UInt32 d[4] = { 0 };
   2394       MyCPUID(0x80000002 + i, &d[0], &d[1], &d[2], &d[3]);
   2395       for (int j = 0; j < 4; j++)
   2396         PrintCpuChars(s, d[j]);
   2397     }
   2398   }
   2399 
   2400   s.Add_Space_if_NotEmpty();
   2401   {
   2402     char temp[32];
   2403     ConvertUInt32ToHex(c.ver, temp);
   2404     s += '(';
   2405     s += temp;
   2406     s += ')';
   2407   }
   2408 }
   2409 
   2410 #endif
   2411 
   2412 
   2413 void GetCpuName(AString &s)
   2414 {
   2415   s.Empty();
   2416 
   2417   #ifdef MY_CPU_X86_OR_AMD64
   2418   {
   2419     Cx86cpuid cpuid;
   2420     if (x86cpuid_CheckAndRead(&cpuid))
   2421     {
   2422       x86cpuid_to_String(cpuid, s);
   2423       return;
   2424     }
   2425     #ifdef MY_CPU_AMD64
   2426     s = "x64";
   2427     #else
   2428     s = "x86";
   2429     #endif
   2430   }
   2431   #else
   2432 
   2433     #ifdef MY_CPU_LE
   2434       s = "LE";
   2435     #elif defined(MY_CPU_BE)
   2436       s = "BE";
   2437     #endif
   2438 
   2439   #endif
   2440 }
   2441 
   2442 
   2443 HRESULT Bench(
   2444     DECL_EXTERNAL_CODECS_LOC_VARS
   2445     IBenchPrintCallback *printCallback,
   2446     IBenchCallback *benchCallback,
   2447     const CObjectVector<CProperty> &props,
   2448     UInt32 numIterations,
   2449     bool multiDict)
   2450 {
   2451   if (!CrcInternalTest())
   2452     return S_FALSE;
   2453 
   2454   UInt32 numCPUs = 1;
   2455   UInt64 ramSize = (UInt64)(sizeof(size_t)) << 29;
   2456 
   2457   #ifndef _7ZIP_ST
   2458   numCPUs = NSystem::GetNumberOfProcessors();
   2459   #endif
   2460 
   2461   bool ramSize_Defined = NSystem::GetRamSize(ramSize);
   2462 
   2463   UInt32 numThreadsSpecified = numCPUs;
   2464 
   2465   UInt32 testTime = kComplexInSeconds;
   2466 
   2467   UInt64 specifiedFreq = 0;
   2468 
   2469   bool multiThreadTests = false;
   2470 
   2471   COneMethodInfo method;
   2472 
   2473   CBenchBuffer fileDataBuffer;
   2474 
   2475   {
   2476   unsigned i;
   2477   for (i = 0; i < props.Size(); i++)
   2478   {
   2479     const CProperty &property = props[i];
   2480     UString name = property.Name;
   2481     name.MakeLower_Ascii();
   2482 
   2483     if (name.IsEqualTo("file"))
   2484     {
   2485       if (property.Value.IsEmpty())
   2486         return E_INVALIDARG;
   2487 
   2488       #ifdef USE_WIN_FILE
   2489 
   2490       NFile::NIO::CInFile file;
   2491       if (!file.Open(us2fs(property.Value)))
   2492         return E_INVALIDARG;
   2493       UInt64 len;
   2494       if (!file.GetLength(len))
   2495         return E_FAIL;
   2496       if (len >= ((UInt32)1 << 31) || len == 0)
   2497         return E_INVALIDARG;
   2498       if (!fileDataBuffer.Alloc((size_t)len))
   2499         return E_OUTOFMEMORY;
   2500       UInt32 processedSize;
   2501       file.Read(fileDataBuffer.Buffer, (UInt32)len, processedSize);
   2502       if (processedSize != len)
   2503         return E_FAIL;
   2504       if (printCallback)
   2505       {
   2506         printCallback->Print("file size =");
   2507         // printCallback->Print(GetOemString(property.Value));
   2508         PrintNumber(*printCallback, len, 0);
   2509         printCallback->NewLine();
   2510       }
   2511       continue;
   2512 
   2513       #else
   2514 
   2515       return E_NOTIMPL;
   2516 
   2517       #endif
   2518     }
   2519 
   2520     NCOM::CPropVariant propVariant;
   2521     if (!property.Value.IsEmpty())
   2522       ParseNumberString(property.Value, propVariant);
   2523 
   2524     if (name.IsEqualTo("time"))
   2525     {
   2526       RINOK(ParsePropToUInt32(L"", propVariant, testTime));
   2527       continue;
   2528     }
   2529 
   2530     if (name.IsEqualTo("freq"))
   2531     {
   2532       UInt32 freq32 = 0;
   2533       RINOK(ParsePropToUInt32(L"", propVariant, freq32));
   2534       if (freq32 == 0)
   2535         return E_INVALIDARG;
   2536       specifiedFreq = (UInt64)freq32 * 1000000;
   2537 
   2538       if (printCallback)
   2539       {
   2540         printCallback->Print("freq=");
   2541         PrintNumber(*printCallback, freq32, 0);
   2542         printCallback->NewLine();
   2543       }
   2544 
   2545       continue;
   2546     }
   2547 
   2548     if (name.IsPrefixedBy_Ascii_NoCase("mt"))
   2549     {
   2550       UString s = name.Ptr(2);
   2551       if (s == L"*")
   2552       {
   2553         multiThreadTests = true;
   2554         continue;
   2555       }
   2556       if (s.IsEmpty() && propVariant.vt == VT_BSTR)
   2557       {
   2558         if (wcscmp(propVariant.bstrVal, L"*") == 0)
   2559         {
   2560           multiThreadTests = true;
   2561           continue;
   2562         }
   2563       }
   2564       #ifndef _7ZIP_ST
   2565       RINOK(ParseMtProp(s, propVariant, numCPUs, numThreadsSpecified));
   2566       #endif
   2567       continue;
   2568     }
   2569 
   2570     RINOK(method.ParseMethodFromPROPVARIANT(name, propVariant));
   2571   }
   2572   }
   2573 
   2574   if (printCallback)
   2575   {
   2576     AString s;
   2577     GetCpuName(s);
   2578     printCallback->Print(s);
   2579     printCallback->NewLine();
   2580   }
   2581 
   2582   if (printCallback)
   2583   {
   2584     printCallback->Print("CPU Freq:");
   2585   }
   2586 
   2587   UInt64 complexInCommands = kComplexInCommands;
   2588 
   2589   if (printCallback /* || benchCallback */)
   2590   {
   2591     UInt64 numMilCommands = 1 << 6;
   2592     if (specifiedFreq != 0)
   2593     {
   2594       while (numMilCommands > 1 && specifiedFreq < (numMilCommands * 1000000))
   2595         numMilCommands >>= 1;
   2596     }
   2597 
   2598     for (int jj = 0;; jj++)
   2599     {
   2600       if (printCallback)
   2601         RINOK(printCallback->CheckBreak());
   2602 
   2603       UInt64 start = ::GetTimeCount();
   2604       UInt32 sum = (UInt32)start;
   2605       sum = CountCpuFreq(sum, (UInt32)(numMilCommands * 1000000 / kNumFreqCommands), g_BenchCpuFreqTemp);
   2606       const UInt64 realDelta = ::GetTimeCount() - start;
   2607       start = realDelta;
   2608       if (start == 0)
   2609         start = 1;
   2610       UInt64 freq = GetFreq();
   2611       // mips is constant in some compilers
   2612       const UInt64 mipsVal = numMilCommands * freq / start;
   2613       if (printCallback)
   2614       {
   2615         if (realDelta == 0)
   2616         {
   2617           printCallback->Print(" -");
   2618         }
   2619         else
   2620         {
   2621           // PrintNumber(*printCallback, start, 0);
   2622           PrintNumber(*printCallback, mipsVal, 5 + ((sum == 0xF1541213) ? 1 : 0));
   2623         }
   2624       }
   2625       /*
   2626       if (benchCallback)
   2627         benchCallback->AddCpuFreq(mipsVal);
   2628       */
   2629 
   2630       if (jj >= 3)
   2631       {
   2632         SetComplexCommands(testTime, false, mipsVal * 1000000, complexInCommands);
   2633         if (jj >= 8 || start >= freq)
   2634           break;
   2635         // break; // change it
   2636         numMilCommands <<= 1;
   2637       }
   2638     }
   2639   }
   2640 
   2641   if (printCallback)
   2642   {
   2643     printCallback->NewLine();
   2644     printCallback->NewLine();
   2645     PrintRequirements(*printCallback, "size: ", ramSize_Defined, ramSize, "CPU hardware threads:", numCPUs);
   2646   }
   2647 
   2648   if (numThreadsSpecified < 1 || numThreadsSpecified > kNumThreadsMax)
   2649     return E_INVALIDARG;
   2650 
   2651   UInt32 dict;
   2652   bool dictIsDefined = method.Get_DicSize(dict);
   2653 
   2654   if (method.MethodName.IsEmpty())
   2655     method.MethodName = "LZMA";
   2656 
   2657   if (benchCallback)
   2658   {
   2659     CBenchProps benchProps;
   2660     benchProps.SetLzmaCompexity();
   2661     UInt32 dictSize = method.Get_Lzma_DicSize();
   2662     UInt32 uncompressedDataSize = kAdditionalSize + dictSize;
   2663     return MethodBench(
   2664         EXTERNAL_CODECS_LOC_VARS
   2665         complexInCommands,
   2666         true, numThreadsSpecified,
   2667         method,
   2668         uncompressedDataSize, fileDataBuffer.Buffer,
   2669         kOldLzmaDictBits, printCallback, benchCallback, &benchProps);
   2670   }
   2671 
   2672   AString methodName = method.MethodName;
   2673   if (methodName.IsEqualTo_Ascii_NoCase("CRC"))
   2674     methodName = "crc32";
   2675   method.MethodName = methodName;
   2676   CMethodId hashID;
   2677 
   2678   if (FindHashMethod(EXTERNAL_CODECS_LOC_VARS methodName, hashID))
   2679   {
   2680     if (!printCallback)
   2681       return S_FALSE;
   2682     IBenchPrintCallback &f = *printCallback;
   2683     if (!dictIsDefined)
   2684       dict = (1 << 24);
   2685 
   2686 
   2687     // methhodName.RemoveChar(L'-');
   2688     UInt32 complexity = 10000;
   2689     const UInt32 *checkSum = NULL;
   2690     {
   2691       for (unsigned i = 0; i < ARRAY_SIZE(g_Hash); i++)
   2692       {
   2693         const CBenchHash &h = g_Hash[i];
   2694         AString s = h.Name;
   2695         AString hProp;
   2696         int propPos = s.Find(':');
   2697         if (propPos >= 0)
   2698         {
   2699           hProp = s.Ptr(propPos + 1);
   2700           s.DeleteFrom(propPos);
   2701         }
   2702 
   2703         if (AreSameMethodNames(s, methodName))
   2704         {
   2705           complexity = h.Complex;
   2706           checkSum = &h.CheckSum;
   2707           if (method.PropsString.IsEqualTo_Ascii_NoCase(hProp))
   2708             break;
   2709         }
   2710       }
   2711     }
   2712 
   2713     f.NewLine();
   2714     f.Print("Size");
   2715     const unsigned kFieldSize_CrcSpeed = 6;
   2716     unsigned numThreadsTests = 0;
   2717     for (;;)
   2718     {
   2719       UInt32 t = GetNumThreadsNext(numThreadsTests, numThreadsSpecified);
   2720       PrintNumber(f, t, kFieldSize_CrcSpeed);
   2721       numThreadsTests++;
   2722       if (t >= numThreadsSpecified)
   2723         break;
   2724     }
   2725     f.NewLine();
   2726     f.NewLine();
   2727     CTempValues speedTotals(numThreadsTests);
   2728     {
   2729       for (unsigned ti = 0; ti < numThreadsTests; ti++)
   2730         speedTotals.Values[ti] = 0;
   2731     }
   2732 
   2733     UInt64 numSteps = 0;
   2734     for (UInt32 i = 0; i < numIterations; i++)
   2735     {
   2736       for (unsigned pow = 10; pow < 32; pow++)
   2737       {
   2738         UInt32 bufSize = (UInt32)1 << pow;
   2739         if (bufSize > dict)
   2740           break;
   2741         char s[16];
   2742         ConvertUInt32ToString(pow, s);
   2743         unsigned pos = MyStringLen(s);
   2744         s[pos++] = ':';
   2745         s[pos++] = ' ';
   2746         s[pos] = 0;
   2747         f.Print(s);
   2748 
   2749         for (unsigned ti = 0; ti < numThreadsTests; ti++)
   2750         {
   2751           RINOK(f.CheckBreak());
   2752           UInt32 t = GetNumThreadsNext(ti, numThreadsSpecified);
   2753           UInt64 speed = 0;
   2754           RINOK(CrcBench(EXTERNAL_CODECS_LOC_VARS complexInCommands,
   2755               t, bufSize, speed,
   2756               complexity,
   2757               1, // benchWeight,
   2758               (pow == kNumHashDictBits) ? checkSum : NULL, method, NULL, NULL, false, 0));
   2759           PrintNumber(f, (speed >> 20), kFieldSize_CrcSpeed);
   2760           speedTotals.Values[ti] += speed;
   2761         }
   2762         f.NewLine();
   2763         numSteps++;
   2764       }
   2765     }
   2766     if (numSteps != 0)
   2767     {
   2768       f.NewLine();
   2769       f.Print("Avg:");
   2770       for (unsigned ti = 0; ti < numThreadsTests; ti++)
   2771       {
   2772         PrintNumber(f, ((speedTotals.Values[ti] / numSteps) >> 20), kFieldSize_CrcSpeed);
   2773       }
   2774       f.NewLine();
   2775     }
   2776     return S_OK;
   2777   }
   2778 
   2779   bool use2Columns = false;
   2780 
   2781   bool totalBenchMode = (method.MethodName.IsEqualTo_Ascii_NoCase("*"));
   2782   bool onlyHashBench = false;
   2783   if (method.MethodName.IsEqualTo_Ascii_NoCase("hash"))
   2784   {
   2785     onlyHashBench = true;
   2786     totalBenchMode = true;
   2787   }
   2788 
   2789   // ---------- Threads loop ----------
   2790   for (unsigned threadsPassIndex = 0; threadsPassIndex < 3; threadsPassIndex++)
   2791   {
   2792 
   2793   UInt32 numThreads = numThreadsSpecified;
   2794 
   2795   if (!multiThreadTests)
   2796   {
   2797     if (threadsPassIndex != 0)
   2798       break;
   2799   }
   2800   else
   2801   {
   2802     numThreads = 1;
   2803     if (threadsPassIndex != 0)
   2804     {
   2805       if (numCPUs < 2)
   2806         break;
   2807       numThreads = numCPUs;
   2808       if (threadsPassIndex == 1)
   2809       {
   2810         if (numCPUs >= 4)
   2811           numThreads = numCPUs / 2;
   2812       }
   2813       else if (numCPUs < 4)
   2814         break;
   2815     }
   2816   }
   2817 
   2818   CBenchCallbackToPrint callback;
   2819   callback.Init();
   2820   callback._file = printCallback;
   2821 
   2822   IBenchPrintCallback &f = *printCallback;
   2823 
   2824   if (threadsPassIndex > 0)
   2825   {
   2826     f.NewLine();
   2827     f.NewLine();
   2828   }
   2829 
   2830   if (!dictIsDefined)
   2831   {
   2832     const unsigned dicSizeLog_Main = (totalBenchMode ? 24 : 25);
   2833     unsigned dicSizeLog = dicSizeLog_Main;
   2834 
   2835     #ifdef UNDER_CE
   2836     dicSizeLog = (UInt64)1 << 20;
   2837     #endif
   2838 
   2839     if (ramSize_Defined)
   2840     for (; dicSizeLog > kBenchMinDicLogSize; dicSizeLog--)
   2841       if (GetBenchMemoryUsage(numThreads, ((UInt32)1 << dicSizeLog), totalBenchMode) + (8 << 20) <= ramSize)
   2842         break;
   2843 
   2844     dict = (UInt32)1 << dicSizeLog;
   2845 
   2846     if (totalBenchMode && dicSizeLog != dicSizeLog_Main)
   2847     {
   2848       f.Print("Dictionary reduced to: ");
   2849       PrintNumber(f, dicSizeLog, 1);
   2850       f.NewLine();
   2851     }
   2852   }
   2853 
   2854   PrintRequirements(f, "usage:", true, GetBenchMemoryUsage(numThreads, dict, totalBenchMode), "Benchmark threads:   ", numThreads);
   2855 
   2856   f.NewLine();
   2857 
   2858   if (totalBenchMode)
   2859   {
   2860     callback.NameFieldSize = kFieldSize_Name;
   2861     use2Columns = false;
   2862   }
   2863   else
   2864   {
   2865     callback.NameFieldSize = kFieldSize_SmallName;
   2866     use2Columns = true;
   2867   }
   2868   callback.Use2Columns = use2Columns;
   2869 
   2870   bool showFreq = false;
   2871   UInt64 cpuFreq = 0;
   2872 
   2873   if (totalBenchMode)
   2874   {
   2875     showFreq = true;
   2876   }
   2877 
   2878   unsigned fileldSize = kFieldSize_TotalSize;
   2879   if (showFreq)
   2880     fileldSize += kFieldSize_EUAndEffec;
   2881 
   2882   if (use2Columns)
   2883   {
   2884     PrintSpaces(f, callback.NameFieldSize);
   2885     PrintRight(f, "Compressing", fileldSize);
   2886     f.Print(kSep);
   2887     PrintRight(f, "Decompressing", fileldSize);
   2888   }
   2889   f.NewLine();
   2890   PrintLeft(f, totalBenchMode ? "Method" : "Dict", callback.NameFieldSize);
   2891 
   2892   int j;
   2893 
   2894   for (j = 0; j < 2; j++)
   2895   {
   2896     PrintRight(f, "Speed", kFieldSize_Speed + 1);
   2897     PrintRight(f, "Usage", kFieldSize_Usage + 1);
   2898     PrintRight(f, "R/U", kFieldSize_RU + 1);
   2899     PrintRight(f, "Rating", kFieldSize_Rating + 1);
   2900     if (showFreq)
   2901     {
   2902       PrintRight(f, "E/U", kFieldSize_EU + 1);
   2903       PrintRight(f, "Effec", kFieldSize_Effec + 1);
   2904     }
   2905     if (!use2Columns)
   2906       break;
   2907     if (j == 0)
   2908       f.Print(kSep);
   2909   }
   2910 
   2911   f.NewLine();
   2912   PrintSpaces(f, callback.NameFieldSize);
   2913 
   2914   for (j = 0; j < 2; j++)
   2915   {
   2916     PrintRight(f, "KiB/s", kFieldSize_Speed + 1);
   2917     PrintRight(f, "%", kFieldSize_Usage + 1);
   2918     PrintRight(f, "MIPS", kFieldSize_RU + 1);
   2919     PrintRight(f, "MIPS", kFieldSize_Rating + 1);
   2920     if (showFreq)
   2921     {
   2922       PrintRight(f, "%", kFieldSize_EU + 1);
   2923       PrintRight(f, "%", kFieldSize_Effec + 1);
   2924     }
   2925     if (!use2Columns)
   2926       break;
   2927     if (j == 0)
   2928       f.Print(kSep);
   2929   }
   2930 
   2931   f.NewLine();
   2932   f.NewLine();
   2933 
   2934   if (specifiedFreq != 0)
   2935     cpuFreq = specifiedFreq;
   2936 
   2937 
   2938   if (totalBenchMode)
   2939   {
   2940     for (UInt32 i = 0; i < numIterations; i++)
   2941     {
   2942       if (i != 0)
   2943         printCallback->NewLine();
   2944       HRESULT res;
   2945 
   2946       const unsigned kNumCpuTests = 3;
   2947       for (unsigned freqTest = 0; freqTest < kNumCpuTests; freqTest++)
   2948       {
   2949         PrintLeft(f, "CPU", kFieldSize_Name);
   2950         UInt32 resVal;
   2951         RINOK(FreqBench(complexInCommands, numThreads, printCallback,
   2952             (freqTest == kNumCpuTests - 1 || specifiedFreq != 0), // showFreq
   2953             specifiedFreq,
   2954             cpuFreq, resVal));
   2955         callback.NewLine();
   2956 
   2957         if (specifiedFreq != 0)
   2958           cpuFreq = specifiedFreq;
   2959 
   2960         if (freqTest == kNumCpuTests - 1)
   2961           SetComplexCommands(testTime, specifiedFreq != 0, cpuFreq, complexInCommands);
   2962       }
   2963       callback.NewLine();
   2964 
   2965       callback.SetFreq(true, cpuFreq);
   2966 
   2967       if (!onlyHashBench)
   2968       {
   2969         res = TotalBench(EXTERNAL_CODECS_LOC_VARS
   2970             complexInCommands, numThreads,
   2971             dictIsDefined || fileDataBuffer.Buffer, // forceUnpackSize
   2972             fileDataBuffer.Buffer ? fileDataBuffer.BufferSize : dict,
   2973             fileDataBuffer.Buffer,
   2974             printCallback, &callback);
   2975         RINOK(res);
   2976       }
   2977 
   2978       res = TotalBench_Hash(EXTERNAL_CODECS_LOC_VARS complexInCommands, numThreads,
   2979           1 << kNumHashDictBits, printCallback, &callback, &callback.EncodeRes, true, cpuFreq);
   2980       RINOK(res);
   2981 
   2982       callback.NewLine();
   2983       {
   2984         PrintLeft(f, "CPU", kFieldSize_Name);
   2985         UInt32 resVal;
   2986         UInt64 cpuFreqLastTemp = cpuFreq;
   2987         RINOK(FreqBench(complexInCommands, numThreads, printCallback,
   2988             specifiedFreq != 0, // showFreq
   2989             specifiedFreq,
   2990             cpuFreqLastTemp, resVal));
   2991         callback.NewLine();
   2992       }
   2993     }
   2994   }
   2995   else
   2996   {
   2997     bool needSetComplexity = true;
   2998     if (!methodName.IsEqualTo_Ascii_NoCase("LZMA"))
   2999     {
   3000       for (unsigned i = 0; i < ARRAY_SIZE(g_Bench); i++)
   3001       {
   3002         const CBenchMethod &h = g_Bench[i];
   3003         AString s = h.Name;
   3004         if (AreSameMethodNames(h.Name, methodName))
   3005         {
   3006           callback.BenchProps.EncComplex = h.EncComplex;
   3007           callback.BenchProps.DecComplexCompr = h.DecComplexCompr;
   3008           callback.BenchProps.DecComplexUnc = h.DecComplexUnc;;
   3009           needSetComplexity = false;
   3010           break;
   3011         }
   3012       }
   3013     }
   3014     if (needSetComplexity)
   3015       callback.BenchProps.SetLzmaCompexity();
   3016 
   3017   for (unsigned i = 0; i < numIterations; i++)
   3018   {
   3019     const unsigned kStartDicLog = 22;
   3020     unsigned pow = (dict < ((UInt32)1 << kStartDicLog)) ? kBenchMinDicLogSize : kStartDicLog;
   3021     if (!multiDict)
   3022       pow = 31;
   3023     while (((UInt32)1 << pow) > dict && pow > 0)
   3024       pow--;
   3025     for (; ((UInt32)1 << pow) <= dict; pow++)
   3026     {
   3027       char s[16];
   3028       ConvertUInt32ToString(pow, s);
   3029       unsigned pos = MyStringLen(s);
   3030       s[pos++] = ':';
   3031       s[pos] = 0;
   3032       PrintLeft(f, s, kFieldSize_SmallName);
   3033       callback.DictSize = (UInt32)1 << pow;
   3034 
   3035       COneMethodInfo method2 = method;
   3036 
   3037       if (StringsAreEqualNoCase_Ascii(method2.MethodName, "LZMA"))
   3038       {
   3039         // We add dictionary size property.
   3040         // method2 can have two different dictionary size properties.
   3041         // And last property is main.
   3042         NCOM::CPropVariant propVariant = (UInt32)pow;
   3043         RINOK(method2.ParseMethodFromPROPVARIANT(L"d", propVariant));
   3044       }
   3045 
   3046       size_t uncompressedDataSize;
   3047       if (fileDataBuffer.Buffer)
   3048       {
   3049         uncompressedDataSize = fileDataBuffer.BufferSize;
   3050       }
   3051       else
   3052       {
   3053         uncompressedDataSize = callback.DictSize;
   3054         if (uncompressedDataSize >= (1 << 18))
   3055           uncompressedDataSize += kAdditionalSize;
   3056       }
   3057 
   3058       HRESULT res = MethodBench(
   3059           EXTERNAL_CODECS_LOC_VARS
   3060           complexInCommands,
   3061           true, numThreads,
   3062           method2,
   3063           uncompressedDataSize, fileDataBuffer.Buffer,
   3064           kOldLzmaDictBits, printCallback, &callback, &callback.BenchProps);
   3065       f.NewLine();
   3066       RINOK(res);
   3067       if (!multiDict)
   3068         break;
   3069     }
   3070   }
   3071   }
   3072 
   3073   PrintChars(f, '-', callback.NameFieldSize + fileldSize);
   3074 
   3075   if (use2Columns)
   3076   {
   3077     f.Print(kSep);
   3078     PrintChars(f, '-', fileldSize);
   3079   }
   3080 
   3081   f.NewLine();
   3082 
   3083   if (use2Columns)
   3084   {
   3085     PrintLeft(f, "Avr:", callback.NameFieldSize);
   3086     PrintTotals(f, showFreq, cpuFreq, callback.EncodeRes);
   3087     f.Print(kSep);
   3088     PrintTotals(f, showFreq, cpuFreq, callback.DecodeRes);
   3089     f.NewLine();
   3090   }
   3091 
   3092   PrintLeft(f, "Tot:", callback.NameFieldSize);
   3093   CTotalBenchRes midRes;
   3094   midRes.SetSum(callback.EncodeRes, callback.DecodeRes);
   3095   PrintTotals(f, showFreq, cpuFreq, midRes);
   3096   f.NewLine();
   3097 
   3098   }
   3099   return S_OK;
   3100 }
   3101