Home | History | Annotate | Download | only in Fuzzer
      1 //===- FuzzerInternal.h - Internal header for the Fuzzer --------*- C++ -* ===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 // Define the main class fuzzer::Fuzzer and most functions.
     10 //===----------------------------------------------------------------------===//
     11 
     12 #ifndef LLVM_FUZZER_INTERNAL_H
     13 #define LLVM_FUZZER_INTERNAL_H
     14 
     15 #include <algorithm>
     16 #include <atomic>
     17 #include <cassert>
     18 #include <chrono>
     19 #include <climits>
     20 #include <cstddef>
     21 #include <cstdlib>
     22 #include <random>
     23 #include <string.h>
     24 #include <string>
     25 #include <unordered_set>
     26 #include <vector>
     27 
     28 #include "FuzzerExtFunctions.h"
     29 #include "FuzzerInterface.h"
     30 #include "FuzzerTracePC.h"
     31 
     32 // Platform detection.
     33 #ifdef __linux__
     34 #define LIBFUZZER_LINUX 1
     35 #define LIBFUZZER_APPLE 0
     36 #elif __APPLE__
     37 #define LIBFUZZER_LINUX 0
     38 #define LIBFUZZER_APPLE 1
     39 #else
     40 #error "Support for your platform has not been implemented"
     41 #endif
     42 
     43 namespace fuzzer {
     44 
     45 typedef int (*UserCallback)(const uint8_t *Data, size_t Size);
     46 int FuzzerDriver(int *argc, char ***argv, UserCallback Callback);
     47 
     48 using namespace std::chrono;
     49 typedef std::vector<uint8_t> Unit;
     50 typedef std::vector<Unit> UnitVector;
     51 
     52 // A simple POD sized array of bytes.
     53 template <size_t kMaxSize> class FixedWord {
     54 public:
     55   FixedWord() {}
     56   FixedWord(const uint8_t *B, uint8_t S) { Set(B, S); }
     57 
     58   void Set(const uint8_t *B, uint8_t S) {
     59     assert(S <= kMaxSize);
     60     memcpy(Data, B, S);
     61     Size = S;
     62   }
     63 
     64   bool operator==(const FixedWord<kMaxSize> &w) const {
     65     return Size == w.Size && 0 == memcmp(Data, w.Data, Size);
     66   }
     67 
     68   bool operator<(const FixedWord<kMaxSize> &w) const {
     69     if (Size != w.Size)
     70       return Size < w.Size;
     71     return memcmp(Data, w.Data, Size) < 0;
     72   }
     73 
     74   static size_t GetMaxSize() { return kMaxSize; }
     75   const uint8_t *data() const { return Data; }
     76   uint8_t size() const { return Size; }
     77 
     78 private:
     79   uint8_t Size = 0;
     80   uint8_t Data[kMaxSize];
     81 };
     82 
     83 typedef FixedWord<27> Word; // 28 bytes.
     84 
     85 bool IsFile(const std::string &Path);
     86 std::string FileToString(const std::string &Path);
     87 Unit FileToVector(const std::string &Path, size_t MaxSize = 0);
     88 void ReadDirToVectorOfUnits(const char *Path, std::vector<Unit> *V,
     89                             long *Epoch, size_t MaxSize);
     90 void WriteToFile(const Unit &U, const std::string &Path);
     91 void CopyFileToErr(const std::string &Path);
     92 // Returns "Dir/FileName" or equivalent for the current OS.
     93 std::string DirPlusFile(const std::string &DirPath,
     94                         const std::string &FileName);
     95 
     96 void DupAndCloseStderr();
     97 void CloseStdout();
     98 void Printf(const char *Fmt, ...);
     99 void PrintHexArray(const Unit &U, const char *PrintAfter = "");
    100 void PrintHexArray(const uint8_t *Data, size_t Size,
    101                    const char *PrintAfter = "");
    102 void PrintASCII(const uint8_t *Data, size_t Size, const char *PrintAfter = "");
    103 void PrintASCII(const Unit &U, const char *PrintAfter = "");
    104 void PrintASCII(const Word &W, const char *PrintAfter = "");
    105 std::string Hash(const Unit &U);
    106 void SetTimer(int Seconds);
    107 void SetSigSegvHandler();
    108 void SetSigBusHandler();
    109 void SetSigAbrtHandler();
    110 void SetSigIllHandler();
    111 void SetSigFpeHandler();
    112 void SetSigIntHandler();
    113 void SetSigTermHandler();
    114 std::string Base64(const Unit &U);
    115 int ExecuteCommand(const std::string &Command);
    116 size_t GetPeakRSSMb();
    117 
    118 // Private copy of SHA1 implementation.
    119 static const int kSHA1NumBytes = 20;
    120 // Computes SHA1 hash of 'Len' bytes in 'Data', writes kSHA1NumBytes to 'Out'.
    121 void ComputeSHA1(const uint8_t *Data, size_t Len, uint8_t *Out);
    122 
    123 // Changes U to contain only ASCII (isprint+isspace) characters.
    124 // Returns true iff U has been changed.
    125 bool ToASCII(uint8_t *Data, size_t Size);
    126 bool IsASCII(const Unit &U);
    127 bool IsASCII(const uint8_t *Data, size_t Size);
    128 
    129 int NumberOfCpuCores();
    130 int GetPid();
    131 void SleepSeconds(int Seconds);
    132 
    133 class Random {
    134  public:
    135   Random(unsigned int seed) : R(seed) {}
    136   size_t Rand() { return R(); }
    137   size_t RandBool() { return Rand() % 2; }
    138   size_t operator()(size_t n) { return n ? Rand() % n : 0; }
    139   std::mt19937 &Get_mt19937() { return R; }
    140  private:
    141   std::mt19937 R;
    142 };
    143 
    144 // Dictionary.
    145 
    146 // Parses one dictionary entry.
    147 // If successfull, write the enty to Unit and returns true,
    148 // otherwise returns false.
    149 bool ParseOneDictionaryEntry(const std::string &Str, Unit *U);
    150 // Parses the dictionary file, fills Units, returns true iff all lines
    151 // were parsed succesfully.
    152 bool ParseDictionaryFile(const std::string &Text, std::vector<Unit> *Units);
    153 
    154 class DictionaryEntry {
    155  public:
    156   DictionaryEntry() {}
    157   DictionaryEntry(Word W) : W(W) {}
    158   DictionaryEntry(Word W, size_t PositionHint) : W(W), PositionHint(PositionHint) {}
    159   const Word &GetW() const { return W; }
    160 
    161   bool HasPositionHint() const { return PositionHint != std::numeric_limits<size_t>::max(); }
    162   size_t GetPositionHint() const {
    163     assert(HasPositionHint());
    164     return PositionHint;
    165   }
    166   void IncUseCount() { UseCount++; }
    167   void IncSuccessCount() { SuccessCount++; }
    168   size_t GetUseCount() const { return UseCount; }
    169   size_t GetSuccessCount() const {return SuccessCount; }
    170 
    171 private:
    172   Word W;
    173   size_t PositionHint = std::numeric_limits<size_t>::max();
    174   size_t UseCount = 0;
    175   size_t SuccessCount = 0;
    176 };
    177 
    178 class Dictionary {
    179  public:
    180   static const size_t kMaxDictSize = 1 << 14;
    181 
    182   bool ContainsWord(const Word &W) const {
    183     return std::any_of(begin(), end(), [&](const DictionaryEntry &DE) {
    184       return DE.GetW() == W;
    185     });
    186   }
    187   const DictionaryEntry *begin() const { return &DE[0]; }
    188   const DictionaryEntry *end() const { return begin() + Size; }
    189   DictionaryEntry & operator[] (size_t Idx) {
    190     assert(Idx < Size);
    191     return DE[Idx];
    192   }
    193   void push_back(DictionaryEntry DE) {
    194     if (Size < kMaxDictSize)
    195       this->DE[Size++] = DE;
    196   }
    197   void clear() { Size = 0; }
    198   bool empty() const { return Size == 0; }
    199   size_t size() const { return Size; }
    200 
    201 private:
    202   DictionaryEntry DE[kMaxDictSize];
    203   size_t Size = 0;
    204 };
    205 
    206 struct FuzzingOptions {
    207   int Verbosity = 1;
    208   size_t MaxLen = 0;
    209   int UnitTimeoutSec = 300;
    210   int TimeoutExitCode = 77;
    211   int ErrorExitCode = 77;
    212   int MaxTotalTimeSec = 0;
    213   int RssLimitMb = 0;
    214   bool DoCrossOver = true;
    215   int MutateDepth = 5;
    216   bool UseCounters = false;
    217   bool UseIndirCalls = true;
    218   bool UseTraces = false;
    219   bool UseMemcmp = true;
    220   bool UseFullCoverageSet = false;
    221   bool Reload = true;
    222   bool ShuffleAtStartUp = true;
    223   bool PreferSmall = true;
    224   size_t MaxNumberOfRuns = ULONG_MAX;
    225   int ReportSlowUnits = 10;
    226   bool OnlyASCII = false;
    227   std::string OutputCorpus;
    228   std::string ArtifactPrefix = "./";
    229   std::string ExactArtifactPath;
    230   bool SaveArtifacts = true;
    231   bool PrintNEW = true; // Print a status line when new units are found;
    232   bool OutputCSV = false;
    233   bool PrintNewCovPcs = false;
    234   bool PrintFinalStats = false;
    235   bool DetectLeaks = true;
    236   bool TruncateUnits = false;
    237   bool PruneCorpus = true;
    238 };
    239 
    240 class MutationDispatcher {
    241 public:
    242   MutationDispatcher(Random &Rand, const FuzzingOptions &Options);
    243   ~MutationDispatcher() {}
    244   /// Indicate that we are about to start a new sequence of mutations.
    245   void StartMutationSequence();
    246   /// Print the current sequence of mutations.
    247   void PrintMutationSequence();
    248   /// Indicate that the current sequence of mutations was successfull.
    249   void RecordSuccessfulMutationSequence();
    250   /// Mutates data by invoking user-provided mutator.
    251   size_t Mutate_Custom(uint8_t *Data, size_t Size, size_t MaxSize);
    252   /// Mutates data by invoking user-provided crossover.
    253   size_t Mutate_CustomCrossOver(uint8_t *Data, size_t Size, size_t MaxSize);
    254   /// Mutates data by shuffling bytes.
    255   size_t Mutate_ShuffleBytes(uint8_t *Data, size_t Size, size_t MaxSize);
    256   /// Mutates data by erasing a byte.
    257   size_t Mutate_EraseByte(uint8_t *Data, size_t Size, size_t MaxSize);
    258   /// Mutates data by inserting a byte.
    259   size_t Mutate_InsertByte(uint8_t *Data, size_t Size, size_t MaxSize);
    260   /// Mutates data by chanding one byte.
    261   size_t Mutate_ChangeByte(uint8_t *Data, size_t Size, size_t MaxSize);
    262   /// Mutates data by chanding one bit.
    263   size_t Mutate_ChangeBit(uint8_t *Data, size_t Size, size_t MaxSize);
    264 
    265   /// Mutates data by adding a word from the manual dictionary.
    266   size_t Mutate_AddWordFromManualDictionary(uint8_t *Data, size_t Size,
    267                                             size_t MaxSize);
    268 
    269   /// Mutates data by adding a word from the temporary automatic dictionary.
    270   size_t Mutate_AddWordFromTemporaryAutoDictionary(uint8_t *Data, size_t Size,
    271                                                    size_t MaxSize);
    272 
    273   /// Mutates data by adding a word from the persistent automatic dictionary.
    274   size_t Mutate_AddWordFromPersistentAutoDictionary(uint8_t *Data, size_t Size,
    275                                                     size_t MaxSize);
    276 
    277   /// Tries to find an ASCII integer in Data, changes it to another ASCII int.
    278   size_t Mutate_ChangeASCIIInteger(uint8_t *Data, size_t Size, size_t MaxSize);
    279 
    280   /// CrossOver Data with some other element of the corpus.
    281   size_t Mutate_CrossOver(uint8_t *Data, size_t Size, size_t MaxSize);
    282 
    283   /// Applies one of the configured mutations.
    284   /// Returns the new size of data which could be up to MaxSize.
    285   size_t Mutate(uint8_t *Data, size_t Size, size_t MaxSize);
    286   /// Applies one of the default mutations. Provided as a service
    287   /// to mutation authors.
    288   size_t DefaultMutate(uint8_t *Data, size_t Size, size_t MaxSize);
    289 
    290   /// Creates a cross-over of two pieces of Data, returns its size.
    291   size_t CrossOver(const uint8_t *Data1, size_t Size1, const uint8_t *Data2,
    292                    size_t Size2, uint8_t *Out, size_t MaxOutSize);
    293 
    294   void AddWordToManualDictionary(const Word &W);
    295 
    296   void AddWordToAutoDictionary(const Word &W, size_t PositionHint);
    297   void ClearAutoDictionary();
    298   void PrintRecommendedDictionary();
    299 
    300   void SetCorpus(const std::vector<Unit> *Corpus) { this->Corpus = Corpus; }
    301 
    302   Random &GetRand() { return Rand; }
    303 
    304 private:
    305 
    306   struct Mutator {
    307     size_t (MutationDispatcher::*Fn)(uint8_t *Data, size_t Size, size_t Max);
    308     const char *Name;
    309   };
    310 
    311   size_t AddWordFromDictionary(Dictionary &D, uint8_t *Data, size_t Size,
    312                                size_t MaxSize);
    313   size_t MutateImpl(uint8_t *Data, size_t Size, size_t MaxSize,
    314                     const std::vector<Mutator> &Mutators);
    315 
    316   Random &Rand;
    317   const FuzzingOptions Options;
    318 
    319   // Dictionary provided by the user via -dict=DICT_FILE.
    320   Dictionary ManualDictionary;
    321   // Temporary dictionary modified by the fuzzer itself,
    322   // recreated periodically.
    323   Dictionary TempAutoDictionary;
    324   // Persistent dictionary modified by the fuzzer, consists of
    325   // entries that led to successfull discoveries in the past mutations.
    326   Dictionary PersistentAutoDictionary;
    327   std::vector<Mutator> CurrentMutatorSequence;
    328   std::vector<DictionaryEntry *> CurrentDictionaryEntrySequence;
    329   const std::vector<Unit> *Corpus = nullptr;
    330   std::vector<uint8_t> MutateInPlaceHere;
    331 
    332   std::vector<Mutator> Mutators;
    333   std::vector<Mutator> DefaultMutators;
    334 };
    335 
    336 class Fuzzer {
    337 public:
    338 
    339   // Aggregates all available coverage measurements.
    340   struct Coverage {
    341     Coverage() { Reset(); }
    342 
    343     void Reset() {
    344       BlockCoverage = 0;
    345       CallerCalleeCoverage = 0;
    346       PcMapBits = 0;
    347       CounterBitmapBits = 0;
    348       PcBufferLen = 0;
    349       CounterBitmap.clear();
    350       PCMap.Reset();
    351     }
    352 
    353     std::string DebugString() const;
    354 
    355     size_t BlockCoverage;
    356     size_t CallerCalleeCoverage;
    357 
    358     size_t PcBufferLen;
    359     // Precalculated number of bits in CounterBitmap.
    360     size_t CounterBitmapBits;
    361     std::vector<uint8_t> CounterBitmap;
    362     // Precalculated number of bits in PCMap.
    363     size_t PcMapBits;
    364     PcCoverageMap PCMap;
    365   };
    366 
    367   Fuzzer(UserCallback CB, MutationDispatcher &MD, FuzzingOptions Options);
    368   void AddToCorpus(const Unit &U) {
    369     Corpus.push_back(U);
    370     UpdateCorpusDistribution();
    371   }
    372   size_t ChooseUnitIdxToMutate();
    373   const Unit &ChooseUnitToMutate() { return Corpus[ChooseUnitIdxToMutate()]; };
    374   void TruncateUnits(std::vector<Unit> *NewCorpus);
    375   void Loop();
    376   void Drill();
    377   void ShuffleAndMinimize();
    378   void InitializeTraceState();
    379   void AssignTaintLabels(uint8_t *Data, size_t Size);
    380   size_t CorpusSize() const { return Corpus.size(); }
    381   size_t MaxUnitSizeInCorpus() const;
    382   void ReadDir(const std::string &Path, long *Epoch, size_t MaxSize) {
    383     Printf("Loading corpus: %s\n", Path.c_str());
    384     ReadDirToVectorOfUnits(Path.c_str(), &Corpus, Epoch, MaxSize);
    385   }
    386   void RereadOutputCorpus(size_t MaxSize);
    387   // Save the current corpus to OutputCorpus.
    388   void SaveCorpus();
    389 
    390   size_t secondsSinceProcessStartUp() {
    391     return duration_cast<seconds>(system_clock::now() - ProcessStartTime)
    392         .count();
    393   }
    394   size_t execPerSec() {
    395     size_t Seconds = secondsSinceProcessStartUp();
    396     return Seconds ? TotalNumberOfRuns / Seconds : 0;
    397   }
    398 
    399   size_t getTotalNumberOfRuns() { return TotalNumberOfRuns; }
    400 
    401   static void StaticAlarmCallback();
    402   static void StaticCrashSignalCallback();
    403   static void StaticInterruptCallback();
    404 
    405   void ExecuteCallback(const uint8_t *Data, size_t Size);
    406   bool RunOne(const uint8_t *Data, size_t Size);
    407 
    408   // Merge Corpora[1:] into Corpora[0].
    409   void Merge(const std::vector<std::string> &Corpora);
    410   // Returns a subset of 'Extra' that adds coverage to 'Initial'.
    411   UnitVector FindExtraUnits(const UnitVector &Initial, const UnitVector &Extra);
    412   MutationDispatcher &GetMD() { return MD; }
    413   void PrintFinalStats();
    414   void SetMaxLen(size_t MaxLen);
    415   void RssLimitCallback();
    416 
    417   // Public for tests.
    418   void ResetCoverage();
    419 
    420   bool InFuzzingThread() const { return IsMyThread; }
    421   size_t GetCurrentUnitInFuzzingThead(const uint8_t **Data) const;
    422 
    423 private:
    424   void AlarmCallback();
    425   void CrashCallback();
    426   void InterruptCallback();
    427   void MutateAndTestOne();
    428   void ReportNewCoverage(const Unit &U);
    429   bool RunOne(const Unit &U) { return RunOne(U.data(), U.size()); }
    430   void RunOneAndUpdateCorpus(const uint8_t *Data, size_t Size);
    431   void WriteToOutputCorpus(const Unit &U);
    432   void WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix);
    433   void PrintStats(const char *Where, const char *End = "\n");
    434   void PrintStatusForNewUnit(const Unit &U);
    435   void ShuffleCorpus(UnitVector *V);
    436   void TryDetectingAMemoryLeak(const uint8_t *Data, size_t Size,
    437                                bool DuringInitialCorpusExecution);
    438 
    439   // Updates the probability distribution for the units in the corpus.
    440   // Must be called whenever the corpus or unit weights are changed.
    441   void UpdateCorpusDistribution();
    442 
    443   bool UpdateMaxCoverage();
    444 
    445   // Trace-based fuzzing: we run a unit with some kind of tracing
    446   // enabled and record potentially useful mutations. Then
    447   // We apply these mutations one by one to the unit and run it again.
    448 
    449   // Start tracing; forget all previously proposed mutations.
    450   void StartTraceRecording();
    451   // Stop tracing.
    452   void StopTraceRecording();
    453 
    454   void SetDeathCallback();
    455   static void StaticDeathCallback();
    456   void DumpCurrentUnit(const char *Prefix);
    457   void DeathCallback();
    458 
    459   void LazyAllocateCurrentUnitData();
    460   uint8_t *CurrentUnitData = nullptr;
    461   std::atomic<size_t> CurrentUnitSize;
    462 
    463   size_t TotalNumberOfRuns = 0;
    464   size_t NumberOfNewUnitsAdded = 0;
    465 
    466   bool HasMoreMallocsThanFrees = false;
    467   size_t NumberOfLeakDetectionAttempts = 0;
    468 
    469   std::vector<Unit> Corpus;
    470   std::unordered_set<std::string> UnitHashesAddedToCorpus;
    471 
    472   std::piecewise_constant_distribution<double> CorpusDistribution;
    473   UserCallback CB;
    474   MutationDispatcher &MD;
    475   FuzzingOptions Options;
    476   system_clock::time_point ProcessStartTime = system_clock::now();
    477   system_clock::time_point UnitStartTime;
    478   long TimeOfLongestUnitInSeconds = 0;
    479   long EpochOfLastReadOfOutputCorpus = 0;
    480 
    481   // Maximum recorded coverage.
    482   Coverage MaxCoverage;
    483 
    484   // Need to know our own thread.
    485   static thread_local bool IsMyThread;
    486 };
    487 
    488 // Global interface to functions that may or may not be available.
    489 extern ExternalFunctions *EF;
    490 
    491 }; // namespace fuzzer
    492 
    493 #endif // LLVM_FUZZER_INTERNAL_H
    494