Home | History | Annotate | Download | only in Fuzzer
      1 //===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 // Merging Corpora.
     10 //
     11 // The task:
     12 //   Take the existing corpus (possibly empty) and merge new inputs into
     13 //   it so that only inputs with new coverage ('features') are added.
     14 //   The process should tolerate the crashes, OOMs, leaks, etc.
     15 //
     16 // Algorithm:
     17 //   The outter process collects the set of files and writes their names
     18 //   into a temporary "control" file, then repeatedly launches the inner
     19 //   process until all inputs are processed.
     20 //   The outer process does not actually execute the target code.
     21 //
     22 //   The inner process reads the control file and sees a) list of all the inputs
     23 //   and b) the last processed input. Then it starts processing the inputs one
     24 //   by one. Before processing every input it writes one line to control file:
     25 //   STARTED INPUT_ID INPUT_SIZE
     26 //   After processing an input it write another line:
     27 //   DONE INPUT_ID Feature1 Feature2 Feature3 ...
     28 //   If a crash happens while processing an input the last line in the control
     29 //   file will be "STARTED INPUT_ID" and so the next process will know
     30 //   where to resume.
     31 //
     32 //   Once all inputs are processed by the innner process(es) the outer process
     33 //   reads the control files and does the merge based entirely on the contents
     34 //   of control file.
     35 //   It uses a single pass greedy algorithm choosing first the smallest inputs
     36 //   within the same size the inputs that have more new features.
     37 //
     38 //===----------------------------------------------------------------------===//
     39 
     40 #ifndef LLVM_FUZZER_MERGE_H
     41 #define LLVM_FUZZER_MERGE_H
     42 
     43 #include "FuzzerDefs.h"
     44 
     45 #include <istream>
     46 #include <ostream>
     47 #include <set>
     48 #include <vector>
     49 
     50 namespace fuzzer {
     51 
     52 struct MergeFileInfo {
     53   std::string Name;
     54   size_t Size = 0;
     55   std::vector<uint32_t> Features;
     56 };
     57 
     58 struct Merger {
     59   std::vector<MergeFileInfo> Files;
     60   size_t NumFilesInFirstCorpus = 0;
     61   size_t FirstNotProcessedFile = 0;
     62   std::string LastFailure;
     63 
     64   bool Parse(std::istream &IS, bool ParseCoverage);
     65   bool Parse(const std::string &Str, bool ParseCoverage);
     66   void ParseOrExit(std::istream &IS, bool ParseCoverage);
     67   void PrintSummary(std::ostream &OS);
     68   std::set<uint32_t> ParseSummary(std::istream &IS);
     69   size_t Merge(const std::set<uint32_t> &InitialFeatures,
     70                std::vector<std::string> *NewFiles);
     71   size_t Merge(std::vector<std::string> *NewFiles) {
     72     return Merge({}, NewFiles);
     73   }
     74   size_t ApproximateMemoryConsumption() const;
     75   std::set<uint32_t> AllFeatures() const;
     76 };
     77 
     78 }  // namespace fuzzer
     79 
     80 #endif  // LLVM_FUZZER_MERGE_H
     81