1 // -*- C++ -*- 2 //===------------------------- fuzzing.cpp -------------------------------===// 3 // 4 // The LLVM Compiler Infrastructure 5 // 6 // This file is dual licensed under the MIT and the University of Illinois Open 7 // Source Licenses. See LICENSE.TXT for details. 8 // 9 //===----------------------------------------------------------------------===// 10 11 // A set of routines to use when fuzzing the algorithms in libc++ 12 // Each one tests a single algorithm. 13 // 14 // They all have the form of: 15 // int `algorithm`(const uint8_t *data, size_t size); 16 // 17 // They perform the operation, and then check to see if the results are correct. 18 // If so, they return zero, and non-zero otherwise. 19 // 20 // For example, sort calls std::sort, then checks two things: 21 // (1) The resulting vector is sorted 22 // (2) The resulting vector contains the same elements as the original data. 23 24 25 26 #include "fuzzing.h" 27 #include <vector> 28 #include <algorithm> 29 #include <functional> 30 #include <regex> 31 #include <cassert> 32 33 #include <iostream> 34 35 // If we had C++14, we could use the four iterator version of is_permutation and equal 36 37 namespace fuzzing { 38 39 // This is a struct we can use to test the stable_XXX algorithms. 40 // perform the operation on the key, then check the order of the payload. 41 42 struct stable_test { 43 uint8_t key; 44 size_t payload; 45 46 stable_test(uint8_t k) : key(k), payload(0) {} 47 stable_test(uint8_t k, size_t p) : key(k), payload(p) {} 48 }; 49 50 void swap(stable_test &lhs, stable_test &rhs) 51 { 52 using std::swap; 53 swap(lhs.key, rhs.key); 54 swap(lhs.payload, rhs.payload); 55 } 56 57 struct key_less 58 { 59 bool operator () (const stable_test &lhs, const stable_test &rhs) const 60 { 61 return lhs.key < rhs.key; 62 } 63 }; 64 65 struct payload_less 66 { 67 bool operator () (const stable_test &lhs, const stable_test &rhs) const 68 { 69 return lhs.payload < rhs.payload; 70 } 71 }; 72 73 struct total_less 74 { 75 bool operator () (const stable_test &lhs, const stable_test &rhs) const 76 { 77 return lhs.key == rhs.key ? lhs.payload < rhs.payload : lhs.key < rhs.key; 78 } 79 }; 80 81 bool operator==(const stable_test &lhs, const stable_test &rhs) 82 { 83 return lhs.key == rhs.key && lhs.payload == rhs.payload; 84 } 85 86 87 template<typename T> 88 struct is_even 89 { 90 bool operator () (const T &t) const 91 { 92 return t % 2 == 0; 93 } 94 }; 95 96 97 template<> 98 struct is_even<stable_test> 99 { 100 bool operator () (const stable_test &t) const 101 { 102 return t.key % 2 == 0; 103 } 104 }; 105 106 typedef std::vector<uint8_t> Vec; 107 typedef std::vector<stable_test> StableVec; 108 typedef StableVec::const_iterator SVIter; 109 110 // Cheap version of is_permutation 111 // Builds a set of buckets for each of the key values. 112 // Sums all the payloads. 113 // Not 100% perfect, but _way_ faster 114 bool is_permutation(SVIter first1, SVIter last1, SVIter first2) 115 { 116 size_t xBuckets[256] = {0}; 117 size_t xPayloads[256] = {0}; 118 size_t yBuckets[256] = {0}; 119 size_t yPayloads[256] = {0}; 120 121 for (; first1 != last1; ++first1, ++first2) 122 { 123 xBuckets [first1->key]++; 124 xPayloads[first1->key] += first1->payload; 125 126 yBuckets [first2->key]++; 127 yPayloads[first2->key] += first2->payload; 128 } 129 130 for (size_t i = 0; i < 256; ++i) 131 { 132 if (xBuckets[i] != yBuckets[i]) 133 return false; 134 if (xPayloads[i] != yPayloads[i]) 135 return false; 136 } 137 138 return true; 139 } 140 141 template <typename Iter1, typename Iter2> 142 bool is_permutation(Iter1 first1, Iter1 last1, Iter2 first2) 143 { 144 static_assert((std::is_same<typename std::iterator_traits<Iter1>::value_type, uint8_t>::value), ""); 145 static_assert((std::is_same<typename std::iterator_traits<Iter2>::value_type, uint8_t>::value), ""); 146 147 size_t xBuckets[256] = {0}; 148 size_t yBuckets[256] = {0}; 149 150 for (; first1 != last1; ++first1, ++first2) 151 { 152 xBuckets [*first1]++; 153 yBuckets [*first2]++; 154 } 155 156 for (size_t i = 0; i < 256; ++i) 157 if (xBuckets[i] != yBuckets[i]) 158 return false; 159 160 return true; 161 } 162 163 // == sort == 164 int sort(const uint8_t *data, size_t size) 165 { 166 Vec working(data, data + size); 167 std::sort(working.begin(), working.end()); 168 169 if (!std::is_sorted(working.begin(), working.end())) return 1; 170 if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99; 171 return 0; 172 } 173 174 175 // == stable_sort == 176 int stable_sort(const uint8_t *data, size_t size) 177 { 178 StableVec input; 179 for (size_t i = 0; i < size; ++i) 180 input.push_back(stable_test(data[i], i)); 181 StableVec working = input; 182 std::stable_sort(working.begin(), working.end(), key_less()); 183 184 if (!std::is_sorted(working.begin(), working.end(), key_less())) return 1; 185 auto iter = working.begin(); 186 while (iter != working.end()) 187 { 188 auto range = std::equal_range(iter, working.end(), *iter, key_less()); 189 if (!std::is_sorted(range.first, range.second, total_less())) return 2; 190 iter = range.second; 191 } 192 if (!fuzzing::is_permutation(input.cbegin(), input.cend(), working.cbegin())) return 99; 193 return 0; 194 } 195 196 // == partition == 197 int partition(const uint8_t *data, size_t size) 198 { 199 Vec working(data, data + size); 200 auto iter = std::partition(working.begin(), working.end(), is_even<uint8_t>()); 201 202 if (!std::all_of (working.begin(), iter, is_even<uint8_t>())) return 1; 203 if (!std::none_of(iter, working.end(), is_even<uint8_t>())) return 2; 204 if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99; 205 return 0; 206 } 207 208 209 // == partition_copy == 210 int partition_copy(const uint8_t *data, size_t size) 211 { 212 Vec v1, v2; 213 auto iter = std::partition_copy(data, data + size, 214 std::back_inserter<Vec>(v1), std::back_inserter<Vec>(v2), 215 is_even<uint8_t>()); 216 217 // The two vectors should add up to the original size 218 if (v1.size() + v2.size() != size) return 1; 219 220 // All of the even values should be in the first vector, and none in the second 221 if (!std::all_of (v1.begin(), v1.end(), is_even<uint8_t>())) return 2; 222 if (!std::none_of(v2.begin(), v2.end(), is_even<uint8_t>())) return 3; 223 224 // Every value in both vectors has to be in the original 225 for (auto v: v1) 226 if (std::find(data, data + size, v) == data + size) return 4; 227 228 for (auto v: v2) 229 if (std::find(data, data + size, v) == data + size) return 5; 230 231 return 0; 232 } 233 234 // == stable_partition == 235 int stable_partition (const uint8_t *data, size_t size) 236 { 237 StableVec input; 238 for (size_t i = 0; i < size; ++i) 239 input.push_back(stable_test(data[i], i)); 240 StableVec working = input; 241 auto iter = std::stable_partition(working.begin(), working.end(), is_even<stable_test>()); 242 243 if (!std::all_of (working.begin(), iter, is_even<stable_test>())) return 1; 244 if (!std::none_of(iter, working.end(), is_even<stable_test>())) return 2; 245 if (!std::is_sorted(working.begin(), iter, payload_less())) return 3; 246 if (!std::is_sorted(iter, working.end(), payload_less())) return 4; 247 if (!fuzzing::is_permutation(input.cbegin(), input.cend(), working.cbegin())) return 99; 248 return 0; 249 } 250 251 // == nth_element == 252 // use the first element as a position into the data 253 int nth_element (const uint8_t *data, size_t size) 254 { 255 if (size <= 1) return 0; 256 const size_t partition_point = data[0] % size; 257 Vec working(data + 1, data + size); 258 const auto partition_iter = working.begin() + partition_point; 259 std::nth_element(working.begin(), partition_iter, working.end()); 260 261 // nth may be the end iterator, in this case nth_element has no effect. 262 if (partition_iter == working.end()) 263 { 264 if (!std::equal(data + 1, data + size, working.begin())) return 98; 265 } 266 else 267 { 268 const uint8_t nth = *partition_iter; 269 if (!std::all_of(working.begin(), partition_iter, [=](uint8_t v) { return v <= nth; })) 270 return 1; 271 if (!std::all_of(partition_iter, working.end(), [=](uint8_t v) { return v >= nth; })) 272 return 2; 273 if (!fuzzing::is_permutation(data + 1, data + size, working.cbegin())) return 99; 274 } 275 276 return 0; 277 } 278 279 // == partial_sort == 280 // use the first element as a position into the data 281 int partial_sort (const uint8_t *data, size_t size) 282 { 283 if (size <= 1) return 0; 284 const size_t sort_point = data[0] % size; 285 Vec working(data + 1, data + size); 286 const auto sort_iter = working.begin() + sort_point; 287 std::partial_sort(working.begin(), sort_iter, working.end()); 288 289 if (sort_iter != working.end()) 290 { 291 const uint8_t nth = *std::min_element(sort_iter, working.end()); 292 if (!std::all_of(working.begin(), sort_iter, [=](uint8_t v) { return v <= nth; })) 293 return 1; 294 if (!std::all_of(sort_iter, working.end(), [=](uint8_t v) { return v >= nth; })) 295 return 2; 296 } 297 if (!std::is_sorted(working.begin(), sort_iter)) return 3; 298 if (!fuzzing::is_permutation(data + 1, data + size, working.cbegin())) return 99; 299 300 return 0; 301 } 302 303 304 // == partial_sort_copy == 305 // use the first element as a count 306 int partial_sort_copy (const uint8_t *data, size_t size) 307 { 308 if (size <= 1) return 0; 309 const size_t num_results = data[0] % size; 310 Vec results(num_results); 311 (void) std::partial_sort_copy(data + 1, data + size, results.begin(), results.end()); 312 313 // The results have to be sorted 314 if (!std::is_sorted(results.begin(), results.end())) return 1; 315 // All the values in results have to be in the original data 316 for (auto v: results) 317 if (std::find(data + 1, data + size, v) == data + size) return 2; 318 319 // The things in results have to be the smallest N in the original data 320 Vec sorted(data + 1, data + size); 321 std::sort(sorted.begin(), sorted.end()); 322 if (!std::equal(results.begin(), results.end(), sorted.begin())) return 3; 323 return 0; 324 } 325 326 // The second sequence has been "uniqued" 327 template <typename Iter1, typename Iter2> 328 static bool compare_unique(Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2) 329 { 330 assert(first1 != last1 && first2 != last2); 331 if (*first1 != *first2) return false; 332 333 uint8_t last_value = *first1; 334 ++first1; ++first2; 335 while(first1 != last1 && first2 != last2) 336 { 337 // Skip over dups in the first sequence 338 while (*first1 == last_value) 339 if (++first1 == last1) return false; 340 if (*first1 != *first2) return false; 341 last_value = *first1; 342 ++first1; ++first2; 343 } 344 345 // Still stuff left in the 'uniqued' sequence - oops 346 if (first1 == last1 && first2 != last2) return false; 347 348 // Still stuff left in the original sequence - better be all the same 349 while (first1 != last1) 350 { 351 if (*first1 != last_value) return false; 352 ++first1; 353 } 354 return true; 355 } 356 357 // == unique == 358 int unique (const uint8_t *data, size_t size) 359 { 360 Vec working(data, data + size); 361 std::sort(working.begin(), working.end()); 362 Vec results = working; 363 Vec::iterator new_end = std::unique(results.begin(), results.end()); 364 Vec::iterator it; // scratch iterator 365 366 // Check the size of the unique'd sequence. 367 // it should only be zero if the input sequence was empty. 368 if (results.begin() == new_end) 369 return working.size() == 0 ? 0 : 1; 370 371 // 'results' is sorted 372 if (!std::is_sorted(results.begin(), new_end)) return 2; 373 374 // All the elements in 'results' must be different 375 it = results.begin(); 376 uint8_t prev_value = *it++; 377 for (; it != new_end; ++it) 378 { 379 if (*it == prev_value) return 3; 380 prev_value = *it; 381 } 382 383 // Every element in 'results' must be in 'working' 384 for (it = results.begin(); it != new_end; ++it) 385 if (std::find(working.begin(), working.end(), *it) == working.end()) 386 return 4; 387 388 // Every element in 'working' must be in 'results' 389 for (auto v : working) 390 if (std::find(results.begin(), new_end, v) == new_end) 391 return 5; 392 393 return 0; 394 } 395 396 // == unique_copy == 397 int unique_copy (const uint8_t *data, size_t size) 398 { 399 Vec working(data, data + size); 400 std::sort(working.begin(), working.end()); 401 Vec results; 402 (void) std::unique_copy(working.begin(), working.end(), 403 std::back_inserter<Vec>(results)); 404 Vec::iterator it; // scratch iterator 405 406 // Check the size of the unique'd sequence. 407 // it should only be zero if the input sequence was empty. 408 if (results.size() == 0) 409 return working.size() == 0 ? 0 : 1; 410 411 // 'results' is sorted 412 if (!std::is_sorted(results.begin(), results.end())) return 2; 413 414 // All the elements in 'results' must be different 415 it = results.begin(); 416 uint8_t prev_value = *it++; 417 for (; it != results.end(); ++it) 418 { 419 if (*it == prev_value) return 3; 420 prev_value = *it; 421 } 422 423 // Every element in 'results' must be in 'working' 424 for (auto v : results) 425 if (std::find(working.begin(), working.end(), v) == working.end()) 426 return 4; 427 428 // Every element in 'working' must be in 'results' 429 for (auto v : working) 430 if (std::find(results.begin(), results.end(), v) == results.end()) 431 return 5; 432 433 return 0; 434 } 435 436 437 // -- regex fuzzers 438 static int regex_helper(const uint8_t *data, size_t size, std::regex::flag_type flag) 439 { 440 if (size > 0) 441 { 442 try 443 { 444 std::string s((const char *)data, size); 445 std::regex re(s, flag); 446 return std::regex_match(s, re) ? 1 : 0; 447 } 448 catch (std::regex_error &ex) {} 449 } 450 return 0; 451 } 452 453 454 int regex_ECMAScript (const uint8_t *data, size_t size) 455 { 456 (void) regex_helper(data, size, std::regex_constants::ECMAScript); 457 return 0; 458 } 459 460 int regex_POSIX (const uint8_t *data, size_t size) 461 { 462 (void) regex_helper(data, size, std::regex_constants::basic); 463 return 0; 464 } 465 466 int regex_extended (const uint8_t *data, size_t size) 467 { 468 (void) regex_helper(data, size, std::regex_constants::extended); 469 return 0; 470 } 471 472 int regex_awk (const uint8_t *data, size_t size) 473 { 474 (void) regex_helper(data, size, std::regex_constants::awk); 475 return 0; 476 } 477 478 int regex_grep (const uint8_t *data, size_t size) 479 { 480 (void) regex_helper(data, size, std::regex_constants::grep); 481 return 0; 482 } 483 484 int regex_egrep (const uint8_t *data, size_t size) 485 { 486 (void) regex_helper(data, size, std::regex_constants::egrep); 487 return 0; 488 } 489 490 // -- heap fuzzers 491 int make_heap (const uint8_t *data, size_t size) 492 { 493 Vec working(data, data + size); 494 std::make_heap(working.begin(), working.end()); 495 496 if (!std::is_heap(working.begin(), working.end())) return 1; 497 if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99; 498 return 0; 499 } 500 501 int push_heap (const uint8_t *data, size_t size) 502 { 503 if (size < 2) return 0; 504 505 // Make a heap from the first half of the data 506 Vec working(data, data + size); 507 auto iter = working.begin() + (size / 2); 508 std::make_heap(working.begin(), iter); 509 if (!std::is_heap(working.begin(), iter)) return 1; 510 511 // Now push the rest onto the heap, one at a time 512 ++iter; 513 for (; iter != working.end(); ++iter) { 514 std::push_heap(working.begin(), iter); 515 if (!std::is_heap(working.begin(), iter)) return 2; 516 } 517 518 if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99; 519 return 0; 520 } 521 522 int pop_heap (const uint8_t *data, size_t size) 523 { 524 if (size < 2) return 0; 525 Vec working(data, data + size); 526 std::make_heap(working.begin(), working.end()); 527 528 // Pop things off, one at a time 529 auto iter = --working.end(); 530 while (iter != working.begin()) { 531 std::pop_heap(working.begin(), iter); 532 if (!std::is_heap(working.begin(), --iter)) return 2; 533 } 534 535 return 0; 536 } 537 538 539 // -- search fuzzers 540 int search (const uint8_t *data, size_t size) 541 { 542 if (size < 2) return 0; 543 544 const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max(); 545 assert(pat_size <= size - 1); 546 const uint8_t *pat_begin = data + 1; 547 const uint8_t *pat_end = pat_begin + pat_size; 548 const uint8_t *data_end = data + size; 549 assert(pat_end <= data_end); 550 // std::cerr << "data[0] = " << size_t(data[0]) << " "; 551 // std::cerr << "Pattern size = " << pat_size << "; corpus is " << size - 1 << std::endl; 552 auto it = std::search(pat_end, data_end, pat_begin, pat_end); 553 if (it != data_end) // not found 554 if (!std::equal(pat_begin, pat_end, it)) 555 return 1; 556 return 0; 557 } 558 559 template <typename S> 560 static int search_helper (const uint8_t *data, size_t size) 561 { 562 if (size < 2) return 0; 563 564 const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max(); 565 const uint8_t *pat_begin = data + 1; 566 const uint8_t *pat_end = pat_begin + pat_size; 567 const uint8_t *data_end = data + size; 568 569 auto it = std::search(pat_end, data_end, S(pat_begin, pat_end)); 570 if (it != data_end) // not found 571 if (!std::equal(pat_begin, pat_end, it)) 572 return 1; 573 return 0; 574 } 575 576 // These are still in std::experimental 577 // int search_boyer_moore (const uint8_t *data, size_t size) 578 // { 579 // return search_helper<std::boyer_moore_searcher<const uint8_t *>>(data, size); 580 // } 581 // 582 // int search_boyer_moore_horspool (const uint8_t *data, size_t size) 583 // { 584 // return search_helper<std::boyer_moore_horspool_searcher<const uint8_t *>>(data, size); 585 // } 586 587 588 // -- set operation fuzzers 589 template <typename S> 590 static void set_helper (const uint8_t *data, size_t size, Vec &v1, Vec &v2) 591 { 592 assert(size > 1); 593 594 const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max(); 595 const uint8_t *pat_begin = data + 1; 596 const uint8_t *pat_end = pat_begin + pat_size; 597 const uint8_t *data_end = data + size; 598 v1.assign(pat_begin, pat_end); 599 v2.assign(pat_end, data_end); 600 601 std::sort(v1.begin(), v1.end()); 602 std::sort(v2.begin(), v2.end()); 603 } 604 605 } // namespace fuzzing 606