1 //===- FuzzerCorpus.h - Internal header for the Fuzzer ----------*- C++ -* ===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // fuzzer::InputCorpus 10 //===----------------------------------------------------------------------===// 11 12 #ifndef LLVM_FUZZER_CORPUS 13 #define LLVM_FUZZER_CORPUS 14 15 #include "FuzzerDefs.h" 16 #include "FuzzerIO.h" 17 #include "FuzzerRandom.h" 18 #include "FuzzerSHA1.h" 19 #include "FuzzerTracePC.h" 20 #include <algorithm> 21 #include <numeric> 22 #include <random> 23 #include <unordered_set> 24 25 namespace fuzzer { 26 27 struct InputInfo { 28 Unit U; // The actual input data. 29 uint8_t Sha1[kSHA1NumBytes]; // Checksum. 30 // Number of features that this input has and no smaller input has. 31 size_t NumFeatures = 0; 32 size_t Tmp = 0; // Used by ValidateFeatureSet. 33 // Stats. 34 size_t NumExecutedMutations = 0; 35 size_t NumSuccessfullMutations = 0; 36 bool MayDeleteFile = false; 37 }; 38 39 class InputCorpus { 40 static const size_t kFeatureSetSize = 1 << 21; 41 public: 42 InputCorpus(const std::string &OutputCorpus) : OutputCorpus(OutputCorpus) { 43 memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); 44 memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); 45 } 46 ~InputCorpus() { 47 for (auto II : Inputs) 48 delete II; 49 } 50 size_t size() const { return Inputs.size(); } 51 size_t SizeInBytes() const { 52 size_t Res = 0; 53 for (auto II : Inputs) 54 Res += II->U.size(); 55 return Res; 56 } 57 size_t NumActiveUnits() const { 58 size_t Res = 0; 59 for (auto II : Inputs) 60 Res += !II->U.empty(); 61 return Res; 62 } 63 size_t MaxInputSize() const { 64 size_t Res = 0; 65 for (auto II : Inputs) 66 Res = std::max(Res, II->U.size()); 67 return Res; 68 } 69 bool empty() const { return Inputs.empty(); } 70 const Unit &operator[] (size_t Idx) const { return Inputs[Idx]->U; } 71 void AddToCorpus(const Unit &U, size_t NumFeatures, 72 bool MayDeleteFile = false) { 73 assert(!U.empty()); 74 uint8_t Hash[kSHA1NumBytes]; 75 if (FeatureDebug) 76 Printf("ADD_TO_CORPUS %zd NF %zd\n", Inputs.size(), NumFeatures); 77 ComputeSHA1(U.data(), U.size(), Hash); 78 Hashes.insert(Sha1ToString(Hash)); 79 Inputs.push_back(new InputInfo()); 80 InputInfo &II = *Inputs.back(); 81 II.U = U; 82 II.NumFeatures = NumFeatures; 83 II.MayDeleteFile = MayDeleteFile; 84 memcpy(II.Sha1, Hash, kSHA1NumBytes); 85 UpdateCorpusDistribution(); 86 // ValidateFeatureSet(); 87 } 88 89 bool HasUnit(const Unit &U) { return Hashes.count(Hash(U)); } 90 bool HasUnit(const std::string &H) { return Hashes.count(H); } 91 InputInfo &ChooseUnitToMutate(Random &Rand) { 92 InputInfo &II = *Inputs[ChooseUnitIdxToMutate(Rand)]; 93 assert(!II.U.empty()); 94 return II; 95 }; 96 97 // Returns an index of random unit from the corpus to mutate. 98 // Hypothesis: units added to the corpus last are more likely to be 99 // interesting. This function gives more weight to the more recent units. 100 size_t ChooseUnitIdxToMutate(Random &Rand) { 101 size_t Idx = static_cast<size_t>(CorpusDistribution(Rand)); 102 assert(Idx < Inputs.size()); 103 return Idx; 104 } 105 106 void PrintStats() { 107 for (size_t i = 0; i < Inputs.size(); i++) { 108 const auto &II = *Inputs[i]; 109 Printf(" [%zd %s]\tsz: %zd\truns: %zd\tsucc: %zd\n", i, 110 Sha1ToString(II.Sha1).c_str(), II.U.size(), 111 II.NumExecutedMutations, II.NumSuccessfullMutations); 112 } 113 } 114 115 void PrintFeatureSet() { 116 for (size_t i = 0; i < kFeatureSetSize; i++) { 117 if(size_t Sz = GetFeature(i)) 118 Printf("[%zd: id %zd sz%zd] ", i, SmallestElementPerFeature[i], Sz); 119 } 120 Printf("\n\t"); 121 for (size_t i = 0; i < Inputs.size(); i++) 122 if (size_t N = Inputs[i]->NumFeatures) 123 Printf(" %zd=>%zd ", i, N); 124 Printf("\n"); 125 } 126 127 void DeleteInput(size_t Idx) { 128 InputInfo &II = *Inputs[Idx]; 129 if (!OutputCorpus.empty() && II.MayDeleteFile) 130 RemoveFile(DirPlusFile(OutputCorpus, Sha1ToString(II.Sha1))); 131 Unit().swap(II.U); 132 if (FeatureDebug) 133 Printf("EVICTED %zd\n", Idx); 134 } 135 136 void AddFeature(size_t Idx, uint32_t NewSize, bool Shrink) { 137 assert(NewSize); 138 Idx = Idx % kFeatureSetSize; 139 uint32_t OldSize = GetFeature(Idx); 140 if (OldSize == 0 || (Shrink && OldSize > NewSize)) { 141 if (OldSize > 0) { 142 size_t OldIdx = SmallestElementPerFeature[Idx]; 143 InputInfo &II = *Inputs[OldIdx]; 144 assert(II.NumFeatures > 0); 145 II.NumFeatures--; 146 if (II.NumFeatures == 0) 147 DeleteInput(OldIdx); 148 } else { 149 NumAddedFeatures++; 150 } 151 NumUpdatedFeatures++; 152 if (FeatureDebug) 153 Printf("ADD FEATURE %zd sz %d\n", Idx, NewSize); 154 SmallestElementPerFeature[Idx] = Inputs.size(); 155 InputSizesPerFeature[Idx] = NewSize; 156 CountingFeatures = true; 157 } 158 } 159 160 size_t NumFeatures() const { return NumAddedFeatures; } 161 size_t NumFeatureUpdates() const { return NumUpdatedFeatures; } 162 163 void ResetFeatureSet() { 164 assert(Inputs.empty()); 165 memset(InputSizesPerFeature, 0, sizeof(InputSizesPerFeature)); 166 memset(SmallestElementPerFeature, 0, sizeof(SmallestElementPerFeature)); 167 } 168 169 private: 170 171 static const bool FeatureDebug = false; 172 173 size_t GetFeature(size_t Idx) const { return InputSizesPerFeature[Idx]; } 174 175 void ValidateFeatureSet() { 176 if (!CountingFeatures) return; 177 if (FeatureDebug) 178 PrintFeatureSet(); 179 for (size_t Idx = 0; Idx < kFeatureSetSize; Idx++) 180 if (GetFeature(Idx)) 181 Inputs[SmallestElementPerFeature[Idx]]->Tmp++; 182 for (auto II: Inputs) { 183 if (II->Tmp != II->NumFeatures) 184 Printf("ZZZ %zd %zd\n", II->Tmp, II->NumFeatures); 185 assert(II->Tmp == II->NumFeatures); 186 II->Tmp = 0; 187 } 188 } 189 190 // Updates the probability distribution for the units in the corpus. 191 // Must be called whenever the corpus or unit weights are changed. 192 void UpdateCorpusDistribution() { 193 size_t N = Inputs.size(); 194 Intervals.resize(N + 1); 195 Weights.resize(N); 196 std::iota(Intervals.begin(), Intervals.end(), 0); 197 if (CountingFeatures) 198 for (size_t i = 0; i < N; i++) 199 Weights[i] = Inputs[i]->NumFeatures * (i + 1); 200 else 201 std::iota(Weights.begin(), Weights.end(), 1); 202 CorpusDistribution = std::piecewise_constant_distribution<double>( 203 Intervals.begin(), Intervals.end(), Weights.begin()); 204 } 205 std::piecewise_constant_distribution<double> CorpusDistribution; 206 207 std::vector<double> Intervals; 208 std::vector<double> Weights; 209 210 std::unordered_set<std::string> Hashes; 211 std::vector<InputInfo*> Inputs; 212 213 bool CountingFeatures = false; 214 size_t NumAddedFeatures = 0; 215 size_t NumUpdatedFeatures = 0; 216 uint32_t InputSizesPerFeature[kFeatureSetSize]; 217 uint32_t SmallestElementPerFeature[kFeatureSetSize]; 218 219 std::string OutputCorpus; 220 }; 221 222 } // namespace fuzzer 223 224 #endif // LLVM_FUZZER_CORPUS 225