1 // -*- C++ -*- 2 //===------------------------- fuzzing.cpp -------------------------------===// 3 // 4 // The LLVM Compiler Infrastructure 5 // 6 // This file is dual licensed under the MIT and the University of Illinois Open 7 // Source Licenses. See LICENSE.TXT for details. 8 // 9 //===----------------------------------------------------------------------===// 10 11 // A set of routines to use when fuzzing the algorithms in libc++ 12 // Each one tests a single algorithm. 13 // 14 // They all have the form of: 15 // int `algorithm`(const uint8_t *data, size_t size); 16 // 17 // They perform the operation, and then check to see if the results are correct. 18 // If so, they return zero, and non-zero otherwise. 19 // 20 // For example, sort calls std::sort, then checks two things: 21 // (1) The resulting vector is sorted 22 // (2) The resulting vector contains the same elements as the original data. 23 24 25 26 #include "fuzzing.h" 27 #include <vector> 28 #include <algorithm> 29 #include <functional> 30 #include <regex> 31 #include <cassert> 32 33 #include <iostream> 34 35 // If we had C++14, we could use the four iterator version of is_permutation and equal 36 37 namespace fuzzing { 38 39 // This is a struct we can use to test the stable_XXX algorithms. 40 // perform the operation on the key, then check the order of the payload. 41 42 struct stable_test { 43 uint8_t key; 44 size_t payload; 45 46 stable_test(uint8_t k) : key(k), payload(0) {} 47 stable_test(uint8_t k, size_t p) : key(k), payload(p) {} 48 }; 49 50 void swap(stable_test &lhs, stable_test &rhs) 51 { 52 using std::swap; 53 swap(lhs.key, rhs.key); 54 swap(lhs.payload, rhs.payload); 55 } 56 57 struct key_less 58 { 59 bool operator () (const stable_test &lhs, const stable_test &rhs) const 60 { 61 return lhs.key < rhs.key; 62 } 63 }; 64 65 struct payload_less 66 { 67 bool operator () (const stable_test &lhs, const stable_test &rhs) const 68 { 69 return lhs.payload < rhs.payload; 70 } 71 }; 72 73 struct total_less 74 { 75 bool operator () (const stable_test &lhs, const stable_test &rhs) const 76 { 77 return lhs.key == rhs.key ? lhs.payload < rhs.payload : lhs.key < rhs.key; 78 } 79 }; 80 81 bool operator==(const stable_test &lhs, const stable_test &rhs) 82 { 83 return lhs.key == rhs.key && lhs.payload == rhs.payload; 84 } 85 86 87 template<typename T> 88 struct is_even 89 { 90 bool operator () (const T &t) const 91 { 92 return t % 2 == 0; 93 } 94 }; 95 96 97 template<> 98 struct is_even<stable_test> 99 { 100 bool operator () (const stable_test &t) const 101 { 102 return t.key % 2 == 0; 103 } 104 }; 105 106 typedef std::vector<uint8_t> Vec; 107 typedef std::vector<stable_test> StableVec; 108 typedef StableVec::const_iterator SVIter; 109 110 // Cheap version of is_permutation 111 // Builds a set of buckets for each of the key values. 112 // Sums all the payloads. 113 // Not 100% perfect, but _way_ faster 114 bool is_permutation(SVIter first1, SVIter last1, SVIter first2) 115 { 116 size_t xBuckets[256] = {0}; 117 size_t xPayloads[256] = {0}; 118 size_t yBuckets[256] = {0}; 119 size_t yPayloads[256] = {0}; 120 121 for (; first1 != last1; ++first1, ++first2) 122 { 123 xBuckets [first1->key]++; 124 xPayloads[first1->key] += first1->payload; 125 126 yBuckets [first2->key]++; 127 yPayloads[first2->key] += first2->payload; 128 } 129 130 for (size_t i = 0; i < 256; ++i) 131 { 132 if (xBuckets[i] != yBuckets[i]) 133 return false; 134 if (xPayloads[i] != yPayloads[i]) 135 return false; 136 } 137 138 return true; 139 } 140 141 template <typename Iter1, typename Iter2> 142 bool is_permutation(Iter1 first1, Iter1 last1, Iter2 first2) 143 { 144 static_assert((std::is_same<typename std::iterator_traits<Iter1>::value_type, uint8_t>::value), ""); 145 static_assert((std::is_same<typename std::iterator_traits<Iter2>::value_type, uint8_t>::value), ""); 146 147 size_t xBuckets[256] = {0}; 148 size_t yBuckets[256] = {0}; 149 150 for (; first1 != last1; ++first1, ++first2) 151 { 152 xBuckets [*first1]++; 153 yBuckets [*first2]++; 154 } 155 156 for (size_t i = 0; i < 256; ++i) 157 if (xBuckets[i] != yBuckets[i]) 158 return false; 159 160 return true; 161 } 162 163 // == sort == 164 int sort(const uint8_t *data, size_t size) 165 { 166 Vec working(data, data + size); 167 std::sort(working.begin(), working.end()); 168 169 if (!std::is_sorted(working.begin(), working.end())) return 1; 170 if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99; 171 return 0; 172 } 173 174 175 // == stable_sort == 176 int stable_sort(const uint8_t *data, size_t size) 177 { 178 StableVec input; 179 for (size_t i = 0; i < size; ++i) 180 input.push_back(stable_test(data[i], i)); 181 StableVec working = input; 182 std::stable_sort(working.begin(), working.end(), key_less()); 183 184 if (!std::is_sorted(working.begin(), working.end(), key_less())) return 1; 185 auto iter = working.begin(); 186 while (iter != working.end()) 187 { 188 auto range = std::equal_range(iter, working.end(), *iter, key_less()); 189 if (!std::is_sorted(range.first, range.second, total_less())) return 2; 190 iter = range.second; 191 } 192 if (!fuzzing::is_permutation(input.cbegin(), input.cend(), working.cbegin())) return 99; 193 return 0; 194 } 195 196 // == partition == 197 int partition(const uint8_t *data, size_t size) 198 { 199 Vec working(data, data + size); 200 auto iter = std::partition(working.begin(), working.end(), is_even<uint8_t>()); 201 202 if (!std::all_of (working.begin(), iter, is_even<uint8_t>())) return 1; 203 if (!std::none_of(iter, working.end(), is_even<uint8_t>())) return 2; 204 if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99; 205 return 0; 206 } 207 208 209 // == partition_copy == 210 int partition_copy(const uint8_t *data, size_t size) 211 { 212 Vec v1, v2; 213 auto iter = std::partition_copy(data, data + size, 214 std::back_inserter<Vec>(v1), std::back_inserter<Vec>(v2), 215 is_even<uint8_t>()); 216 217 // The two vectors should add up to the original size 218 if (v1.size() + v2.size() != size) return 1; 219 220 // All of the even values should be in the first vector, and none in the second 221 if (!std::all_of (v1.begin(), v1.end(), is_even<uint8_t>())) return 2; 222 if (!std::none_of(v2.begin(), v2.end(), is_even<uint8_t>())) return 3; 223 224 // Every value in both vectors has to be in the original 225 226 // Make a copy of the input, and sort it 227 Vec v0{data, data + size}; 228 std::sort(v0.begin(), v0.end()); 229 230 // Sort each vector and ensure that all of the elements appear in the original input 231 std::sort(v1.begin(), v1.end()); 232 if (!std::includes(v0.begin(), v0.end(), v1.begin(), v1.end())) return 4; 233 234 std::sort(v2.begin(), v2.end()); 235 if (!std::includes(v0.begin(), v0.end(), v2.begin(), v2.end())) return 5; 236 237 // This, while simple, is really slow - 20 seconds on a 500K element input. 238 // for (auto v: v1) 239 // if (std::find(data, data + size, v) == data + size) return 4; 240 // 241 // for (auto v: v2) 242 // if (std::find(data, data + size, v) == data + size) return 5; 243 244 return 0; 245 } 246 247 // == stable_partition == 248 int stable_partition (const uint8_t *data, size_t size) 249 { 250 StableVec input; 251 for (size_t i = 0; i < size; ++i) 252 input.push_back(stable_test(data[i], i)); 253 StableVec working = input; 254 auto iter = std::stable_partition(working.begin(), working.end(), is_even<stable_test>()); 255 256 if (!std::all_of (working.begin(), iter, is_even<stable_test>())) return 1; 257 if (!std::none_of(iter, working.end(), is_even<stable_test>())) return 2; 258 if (!std::is_sorted(working.begin(), iter, payload_less())) return 3; 259 if (!std::is_sorted(iter, working.end(), payload_less())) return 4; 260 if (!fuzzing::is_permutation(input.cbegin(), input.cend(), working.cbegin())) return 99; 261 return 0; 262 } 263 264 // == nth_element == 265 // use the first element as a position into the data 266 int nth_element (const uint8_t *data, size_t size) 267 { 268 if (size <= 1) return 0; 269 const size_t partition_point = data[0] % size; 270 Vec working(data + 1, data + size); 271 const auto partition_iter = working.begin() + partition_point; 272 std::nth_element(working.begin(), partition_iter, working.end()); 273 274 // nth may be the end iterator, in this case nth_element has no effect. 275 if (partition_iter == working.end()) 276 { 277 if (!std::equal(data + 1, data + size, working.begin())) return 98; 278 } 279 else 280 { 281 const uint8_t nth = *partition_iter; 282 if (!std::all_of(working.begin(), partition_iter, [=](uint8_t v) { return v <= nth; })) 283 return 1; 284 if (!std::all_of(partition_iter, working.end(), [=](uint8_t v) { return v >= nth; })) 285 return 2; 286 if (!fuzzing::is_permutation(data + 1, data + size, working.cbegin())) return 99; 287 } 288 289 return 0; 290 } 291 292 // == partial_sort == 293 // use the first element as a position into the data 294 int partial_sort (const uint8_t *data, size_t size) 295 { 296 if (size <= 1) return 0; 297 const size_t sort_point = data[0] % size; 298 Vec working(data + 1, data + size); 299 const auto sort_iter = working.begin() + sort_point; 300 std::partial_sort(working.begin(), sort_iter, working.end()); 301 302 if (sort_iter != working.end()) 303 { 304 const uint8_t nth = *std::min_element(sort_iter, working.end()); 305 if (!std::all_of(working.begin(), sort_iter, [=](uint8_t v) { return v <= nth; })) 306 return 1; 307 if (!std::all_of(sort_iter, working.end(), [=](uint8_t v) { return v >= nth; })) 308 return 2; 309 } 310 if (!std::is_sorted(working.begin(), sort_iter)) return 3; 311 if (!fuzzing::is_permutation(data + 1, data + size, working.cbegin())) return 99; 312 313 return 0; 314 } 315 316 317 // == partial_sort_copy == 318 // use the first element as a count 319 int partial_sort_copy (const uint8_t *data, size_t size) 320 { 321 if (size <= 1) return 0; 322 const size_t num_results = data[0] % size; 323 Vec results(num_results); 324 (void) std::partial_sort_copy(data + 1, data + size, results.begin(), results.end()); 325 326 // The results have to be sorted 327 if (!std::is_sorted(results.begin(), results.end())) return 1; 328 // All the values in results have to be in the original data 329 for (auto v: results) 330 if (std::find(data + 1, data + size, v) == data + size) return 2; 331 332 // The things in results have to be the smallest N in the original data 333 Vec sorted(data + 1, data + size); 334 std::sort(sorted.begin(), sorted.end()); 335 if (!std::equal(results.begin(), results.end(), sorted.begin())) return 3; 336 return 0; 337 } 338 339 // The second sequence has been "uniqued" 340 template <typename Iter1, typename Iter2> 341 static bool compare_unique(Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2) 342 { 343 assert(first1 != last1 && first2 != last2); 344 if (*first1 != *first2) return false; 345 346 uint8_t last_value = *first1; 347 ++first1; ++first2; 348 while(first1 != last1 && first2 != last2) 349 { 350 // Skip over dups in the first sequence 351 while (*first1 == last_value) 352 if (++first1 == last1) return false; 353 if (*first1 != *first2) return false; 354 last_value = *first1; 355 ++first1; ++first2; 356 } 357 358 // Still stuff left in the 'uniqued' sequence - oops 359 if (first1 == last1 && first2 != last2) return false; 360 361 // Still stuff left in the original sequence - better be all the same 362 while (first1 != last1) 363 { 364 if (*first1 != last_value) return false; 365 ++first1; 366 } 367 return true; 368 } 369 370 // == unique == 371 int unique (const uint8_t *data, size_t size) 372 { 373 Vec working(data, data + size); 374 std::sort(working.begin(), working.end()); 375 Vec results = working; 376 Vec::iterator new_end = std::unique(results.begin(), results.end()); 377 Vec::iterator it; // scratch iterator 378 379 // Check the size of the unique'd sequence. 380 // it should only be zero if the input sequence was empty. 381 if (results.begin() == new_end) 382 return working.size() == 0 ? 0 : 1; 383 384 // 'results' is sorted 385 if (!std::is_sorted(results.begin(), new_end)) return 2; 386 387 // All the elements in 'results' must be different 388 it = results.begin(); 389 uint8_t prev_value = *it++; 390 for (; it != new_end; ++it) 391 { 392 if (*it == prev_value) return 3; 393 prev_value = *it; 394 } 395 396 // Every element in 'results' must be in 'working' 397 for (it = results.begin(); it != new_end; ++it) 398 if (std::find(working.begin(), working.end(), *it) == working.end()) 399 return 4; 400 401 // Every element in 'working' must be in 'results' 402 for (auto v : working) 403 if (std::find(results.begin(), new_end, v) == new_end) 404 return 5; 405 406 return 0; 407 } 408 409 // == unique_copy == 410 int unique_copy (const uint8_t *data, size_t size) 411 { 412 Vec working(data, data + size); 413 std::sort(working.begin(), working.end()); 414 Vec results; 415 (void) std::unique_copy(working.begin(), working.end(), 416 std::back_inserter<Vec>(results)); 417 Vec::iterator it; // scratch iterator 418 419 // Check the size of the unique'd sequence. 420 // it should only be zero if the input sequence was empty. 421 if (results.size() == 0) 422 return working.size() == 0 ? 0 : 1; 423 424 // 'results' is sorted 425 if (!std::is_sorted(results.begin(), results.end())) return 2; 426 427 // All the elements in 'results' must be different 428 it = results.begin(); 429 uint8_t prev_value = *it++; 430 for (; it != results.end(); ++it) 431 { 432 if (*it == prev_value) return 3; 433 prev_value = *it; 434 } 435 436 // Every element in 'results' must be in 'working' 437 for (auto v : results) 438 if (std::find(working.begin(), working.end(), v) == working.end()) 439 return 4; 440 441 // Every element in 'working' must be in 'results' 442 for (auto v : working) 443 if (std::find(results.begin(), results.end(), v) == results.end()) 444 return 5; 445 446 return 0; 447 } 448 449 450 // -- regex fuzzers 451 static int regex_helper(const uint8_t *data, size_t size, std::regex::flag_type flag) 452 { 453 if (size > 0) 454 { 455 try 456 { 457 std::string s((const char *)data, size); 458 std::regex re(s, flag); 459 return std::regex_match(s, re) ? 1 : 0; 460 } 461 catch (std::regex_error &ex) {} 462 } 463 return 0; 464 } 465 466 467 int regex_ECMAScript (const uint8_t *data, size_t size) 468 { 469 (void) regex_helper(data, size, std::regex_constants::ECMAScript); 470 return 0; 471 } 472 473 int regex_POSIX (const uint8_t *data, size_t size) 474 { 475 (void) regex_helper(data, size, std::regex_constants::basic); 476 return 0; 477 } 478 479 int regex_extended (const uint8_t *data, size_t size) 480 { 481 (void) regex_helper(data, size, std::regex_constants::extended); 482 return 0; 483 } 484 485 int regex_awk (const uint8_t *data, size_t size) 486 { 487 (void) regex_helper(data, size, std::regex_constants::awk); 488 return 0; 489 } 490 491 int regex_grep (const uint8_t *data, size_t size) 492 { 493 (void) regex_helper(data, size, std::regex_constants::grep); 494 return 0; 495 } 496 497 int regex_egrep (const uint8_t *data, size_t size) 498 { 499 (void) regex_helper(data, size, std::regex_constants::egrep); 500 return 0; 501 } 502 503 // -- heap fuzzers 504 int make_heap (const uint8_t *data, size_t size) 505 { 506 Vec working(data, data + size); 507 std::make_heap(working.begin(), working.end()); 508 509 if (!std::is_heap(working.begin(), working.end())) return 1; 510 if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99; 511 return 0; 512 } 513 514 int push_heap (const uint8_t *data, size_t size) 515 { 516 if (size < 2) return 0; 517 518 // Make a heap from the first half of the data 519 Vec working(data, data + size); 520 auto iter = working.begin() + (size / 2); 521 std::make_heap(working.begin(), iter); 522 if (!std::is_heap(working.begin(), iter)) return 1; 523 524 // Now push the rest onto the heap, one at a time 525 ++iter; 526 for (; iter != working.end(); ++iter) { 527 std::push_heap(working.begin(), iter); 528 if (!std::is_heap(working.begin(), iter)) return 2; 529 } 530 531 if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99; 532 return 0; 533 } 534 535 int pop_heap (const uint8_t *data, size_t size) 536 { 537 if (size < 2) return 0; 538 Vec working(data, data + size); 539 std::make_heap(working.begin(), working.end()); 540 541 // Pop things off, one at a time 542 auto iter = --working.end(); 543 while (iter != working.begin()) { 544 std::pop_heap(working.begin(), iter); 545 if (!std::is_heap(working.begin(), --iter)) return 2; 546 } 547 548 return 0; 549 } 550 551 552 // -- search fuzzers 553 int search (const uint8_t *data, size_t size) 554 { 555 if (size < 2) return 0; 556 557 const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max(); 558 assert(pat_size <= size - 1); 559 const uint8_t *pat_begin = data + 1; 560 const uint8_t *pat_end = pat_begin + pat_size; 561 const uint8_t *data_end = data + size; 562 assert(pat_end <= data_end); 563 // std::cerr << "data[0] = " << size_t(data[0]) << " "; 564 // std::cerr << "Pattern size = " << pat_size << "; corpus is " << size - 1 << std::endl; 565 auto it = std::search(pat_end, data_end, pat_begin, pat_end); 566 if (it != data_end) // not found 567 if (!std::equal(pat_begin, pat_end, it)) 568 return 1; 569 return 0; 570 } 571 572 template <typename S> 573 static int search_helper (const uint8_t *data, size_t size) 574 { 575 if (size < 2) return 0; 576 577 const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max(); 578 const uint8_t *pat_begin = data + 1; 579 const uint8_t *pat_end = pat_begin + pat_size; 580 const uint8_t *data_end = data + size; 581 582 auto it = std::search(pat_end, data_end, S(pat_begin, pat_end)); 583 if (it != data_end) // not found 584 if (!std::equal(pat_begin, pat_end, it)) 585 return 1; 586 return 0; 587 } 588 589 // These are still in std::experimental 590 // int search_boyer_moore (const uint8_t *data, size_t size) 591 // { 592 // return search_helper<std::boyer_moore_searcher<const uint8_t *>>(data, size); 593 // } 594 // 595 // int search_boyer_moore_horspool (const uint8_t *data, size_t size) 596 // { 597 // return search_helper<std::boyer_moore_horspool_searcher<const uint8_t *>>(data, size); 598 // } 599 600 601 // -- set operation fuzzers 602 template <typename S> 603 static void set_helper (const uint8_t *data, size_t size, Vec &v1, Vec &v2) 604 { 605 assert(size > 1); 606 607 const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max(); 608 const uint8_t *pat_begin = data + 1; 609 const uint8_t *pat_end = pat_begin + pat_size; 610 const uint8_t *data_end = data + size; 611 v1.assign(pat_begin, pat_end); 612 v2.assign(pat_end, data_end); 613 614 std::sort(v1.begin(), v1.end()); 615 std::sort(v2.begin(), v2.end()); 616 } 617 618 } // namespace fuzzing 619