Home | History | Annotate | Download | only in fuzzing
      1 // -*- C++ -*-
      2 //===------------------------- fuzzing.cpp -------------------------------===//
      3 //
      4 //                     The LLVM Compiler Infrastructure
      5 //
      6 // This file is dual licensed under the MIT and the University of Illinois Open
      7 // Source Licenses. See LICENSE.TXT for details.
      8 //
      9 //===----------------------------------------------------------------------===//
     10 
     11 //	A set of routines to use when fuzzing the algorithms in libc++
     12 //	Each one tests a single algorithm.
     13 //
     14 //	They all have the form of:
     15 //		int `algorithm`(const uint8_t *data, size_t size);
     16 //
     17 //	They perform the operation, and then check to see if the results are correct.
     18 //	If so, they return zero, and non-zero otherwise.
     19 //
     20 //	For example, sort calls std::sort, then checks two things:
     21 //		(1) The resulting vector is sorted
     22 //		(2) The resulting vector contains the same elements as the original data.
     23 
     24 
     25 
     26 #include "fuzzing.h"
     27 #include <vector>
     28 #include <algorithm>
     29 #include <functional>
     30 #include <regex>
     31 #include <cassert>
     32 
     33 #include <iostream>
     34 
     35 //	If we had C++14, we could use the four iterator version of is_permutation and equal
     36 
     37 namespace fuzzing {
     38 
     39 //	This is a struct we can use to test the stable_XXX algorithms.
     40 //	perform the operation on the key, then check the order of the payload.
     41 
     42 struct stable_test {
     43 	uint8_t key;
     44 	size_t payload;
     45 
     46 	stable_test(uint8_t k) : key(k), payload(0) {}
     47 	stable_test(uint8_t k, size_t p) : key(k), payload(p) {}
     48 	};
     49 
     50 void swap(stable_test &lhs, stable_test &rhs)
     51 {
     52 	using std::swap;
     53 	swap(lhs.key,     rhs.key);
     54 	swap(lhs.payload, rhs.payload);
     55 }
     56 
     57 struct key_less
     58 {
     59 	bool operator () (const stable_test &lhs, const stable_test &rhs) const
     60 	{
     61 		return lhs.key < rhs.key;
     62 	}
     63 };
     64 
     65 struct payload_less
     66 {
     67 	bool operator () (const stable_test &lhs, const stable_test &rhs) const
     68 	{
     69 		return lhs.payload < rhs.payload;
     70 	}
     71 };
     72 
     73 struct total_less
     74 {
     75 	bool operator () (const stable_test &lhs, const stable_test &rhs) const
     76 	{
     77 		return lhs.key == rhs.key ? lhs.payload < rhs.payload : lhs.key < rhs.key;
     78 	}
     79 };
     80 
     81 bool operator==(const stable_test &lhs, const stable_test &rhs)
     82 {
     83 	return lhs.key == rhs.key && lhs.payload == rhs.payload;
     84 }
     85 
     86 
     87 template<typename T>
     88 struct is_even
     89 {
     90 	bool operator () (const T &t) const
     91 	{
     92 		return t % 2 == 0;
     93 	}
     94 };
     95 
     96 
     97 template<>
     98 struct is_even<stable_test>
     99 {
    100 	bool operator () (const stable_test &t) const
    101 	{
    102 		return t.key % 2 == 0;
    103 	}
    104 };
    105 
    106 typedef std::vector<uint8_t> Vec;
    107 typedef std::vector<stable_test> StableVec;
    108 typedef StableVec::const_iterator SVIter;
    109 
    110 //	Cheap version of is_permutation
    111 //	Builds a set of buckets for each of the key values.
    112 //	Sums all the payloads.
    113 //	Not 100% perfect, but _way_ faster
    114 bool is_permutation(SVIter first1, SVIter last1, SVIter first2)
    115 {
    116 	size_t xBuckets[256]  = {0};
    117 	size_t xPayloads[256] = {0};
    118 	size_t yBuckets[256]  = {0};
    119 	size_t yPayloads[256] = {0};
    120 
    121 	for (; first1 != last1; ++first1, ++first2)
    122 	{
    123 		xBuckets [first1->key]++;
    124 		xPayloads[first1->key] += first1->payload;
    125 
    126 		yBuckets [first2->key]++;
    127 		yPayloads[first2->key] += first2->payload;
    128 	}
    129 
    130 	for (size_t i = 0; i < 256; ++i)
    131 	{
    132 		if (xBuckets[i]  != yBuckets[i])
    133 			return false;
    134 		if (xPayloads[i] != yPayloads[i])
    135 			return false;
    136 	}
    137 
    138 	return true;
    139 }
    140 
    141 template <typename Iter1, typename Iter2>
    142 bool is_permutation(Iter1 first1, Iter1 last1, Iter2 first2)
    143 {
    144 	static_assert((std::is_same<typename std::iterator_traits<Iter1>::value_type, uint8_t>::value), "");
    145 	static_assert((std::is_same<typename std::iterator_traits<Iter2>::value_type, uint8_t>::value), "");
    146 
    147 	size_t xBuckets[256]  = {0};
    148 	size_t yBuckets[256]  = {0};
    149 
    150 	for (; first1 != last1; ++first1, ++first2)
    151 	{
    152 		xBuckets [*first1]++;
    153 		yBuckets [*first2]++;
    154 	}
    155 
    156 	for (size_t i = 0; i < 256; ++i)
    157 		if (xBuckets[i]  != yBuckets[i])
    158 			return false;
    159 
    160 	return true;
    161 }
    162 
    163 //	== sort ==
    164 int sort(const uint8_t *data, size_t size)
    165 {
    166 	Vec working(data, data + size);
    167 	std::sort(working.begin(), working.end());
    168 
    169 	if (!std::is_sorted(working.begin(), working.end())) return 1;
    170 	if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99;
    171 	return 0;
    172 }
    173 
    174 
    175 //	== stable_sort ==
    176 int stable_sort(const uint8_t *data, size_t size)
    177 {
    178 	StableVec input;
    179 	for (size_t i = 0; i < size; ++i)
    180 		input.push_back(stable_test(data[i], i));
    181 	StableVec working = input;
    182 	std::stable_sort(working.begin(), working.end(), key_less());
    183 
    184 	if (!std::is_sorted(working.begin(), working.end(), key_less()))   return 1;
    185 	auto iter = working.begin();
    186 	while (iter != working.end())
    187 	{
    188 		auto range = std::equal_range(iter, working.end(), *iter, key_less());
    189 		if (!std::is_sorted(range.first, range.second, total_less())) return 2;
    190 		iter = range.second;
    191 	}
    192 	if (!fuzzing::is_permutation(input.cbegin(), input.cend(), working.cbegin())) return 99;
    193 	return 0;
    194 }
    195 
    196 //	== partition ==
    197 int partition(const uint8_t *data, size_t size)
    198 {
    199 	Vec working(data, data + size);
    200 	auto iter = std::partition(working.begin(), working.end(), is_even<uint8_t>());
    201 
    202 	if (!std::all_of (working.begin(), iter, is_even<uint8_t>())) return 1;
    203 	if (!std::none_of(iter,   working.end(), is_even<uint8_t>())) return 2;
    204 	if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99;
    205 	return 0;
    206 }
    207 
    208 
    209 //	== partition_copy ==
    210 int partition_copy(const uint8_t *data, size_t size)
    211 {
    212 	Vec v1, v2;
    213 	auto iter = std::partition_copy(data, data + size,
    214 		std::back_inserter<Vec>(v1), std::back_inserter<Vec>(v2),
    215 		is_even<uint8_t>());
    216 
    217 //	The two vectors should add up to the original size
    218 	if (v1.size() + v2.size() != size) return 1;
    219 
    220 //	All of the even values should be in the first vector, and none in the second
    221 	if (!std::all_of (v1.begin(), v1.end(), is_even<uint8_t>())) return 2;
    222 	if (!std::none_of(v2.begin(), v2.end(), is_even<uint8_t>())) return 3;
    223 
    224 //	Every value in both vectors has to be in the original
    225 	for (auto v: v1)
    226 		if (std::find(data, data + size, v) == data + size) return 4;
    227 
    228 	for (auto v: v2)
    229 		if (std::find(data, data + size, v) == data + size) return 5;
    230 
    231 	return 0;
    232 }
    233 
    234 //	== stable_partition ==
    235 int stable_partition (const uint8_t *data, size_t size)
    236 {
    237 	StableVec input;
    238 	for (size_t i = 0; i < size; ++i)
    239 		input.push_back(stable_test(data[i], i));
    240 	StableVec working = input;
    241 	auto iter = std::stable_partition(working.begin(), working.end(), is_even<stable_test>());
    242 
    243 	if (!std::all_of (working.begin(), iter, is_even<stable_test>())) return 1;
    244 	if (!std::none_of(iter,   working.end(), is_even<stable_test>())) return 2;
    245 	if (!std::is_sorted(working.begin(), iter, payload_less()))   return 3;
    246 	if (!std::is_sorted(iter,   working.end(), payload_less()))   return 4;
    247 	if (!fuzzing::is_permutation(input.cbegin(), input.cend(), working.cbegin())) return 99;
    248 	return 0;
    249 }
    250 
    251 //	== nth_element ==
    252 //	use the first element as a position into the data
    253 int nth_element (const uint8_t *data, size_t size)
    254 {
    255 	if (size <= 1) return 0;
    256 	const size_t partition_point = data[0] % size;
    257 	Vec working(data + 1, data + size);
    258 	const auto partition_iter = working.begin() + partition_point;
    259 	std::nth_element(working.begin(), partition_iter, working.end());
    260 
    261 //	nth may be the end iterator, in this case nth_element has no effect.
    262 	if (partition_iter == working.end())
    263 	{
    264 		if (!std::equal(data + 1, data + size, working.begin())) return 98;
    265 	}
    266 	else
    267 	{
    268 		const uint8_t nth = *partition_iter;
    269 		if (!std::all_of(working.begin(), partition_iter, [=](uint8_t v) { return v <= nth; }))
    270 			return 1;
    271 		if (!std::all_of(partition_iter, working.end(),   [=](uint8_t v) { return v >= nth; }))
    272 			return 2;
    273 		if (!fuzzing::is_permutation(data + 1, data + size, working.cbegin())) return 99;
    274 		}
    275 
    276 	return 0;
    277 }
    278 
    279 //	== partial_sort ==
    280 //	use the first element as a position into the data
    281 int partial_sort (const uint8_t *data, size_t size)
    282 {
    283 	if (size <= 1) return 0;
    284 	const size_t sort_point = data[0] % size;
    285 	Vec working(data + 1, data + size);
    286 	const auto sort_iter = working.begin() + sort_point;
    287 	std::partial_sort(working.begin(), sort_iter, working.end());
    288 
    289 	if (sort_iter != working.end())
    290 	{
    291 		const uint8_t nth = *std::min_element(sort_iter, working.end());
    292 		if (!std::all_of(working.begin(), sort_iter, [=](uint8_t v) { return v <= nth; }))
    293 			return 1;
    294 		if (!std::all_of(sort_iter, working.end(),   [=](uint8_t v) { return v >= nth; }))
    295 			return 2;
    296 	}
    297 	if (!std::is_sorted(working.begin(), sort_iter)) return 3;
    298 	if (!fuzzing::is_permutation(data + 1, data + size, working.cbegin())) return 99;
    299 
    300 	return 0;
    301 }
    302 
    303 
    304 //	== partial_sort_copy ==
    305 //	use the first element as a count
    306 int partial_sort_copy (const uint8_t *data, size_t size)
    307 {
    308 	if (size <= 1) return 0;
    309 	const size_t num_results = data[0] % size;
    310 	Vec results(num_results);
    311 	(void) std::partial_sort_copy(data + 1, data + size, results.begin(), results.end());
    312 
    313 //	The results have to be sorted
    314 	if (!std::is_sorted(results.begin(), results.end())) return 1;
    315 //	All the values in results have to be in the original data
    316 	for (auto v: results)
    317 		if (std::find(data + 1, data + size, v) == data + size) return 2;
    318 
    319 //	The things in results have to be the smallest N in the original data
    320 	Vec sorted(data + 1, data + size);
    321 	std::sort(sorted.begin(), sorted.end());
    322 	if (!std::equal(results.begin(), results.end(), sorted.begin())) return 3;
    323 	return 0;
    324 }
    325 
    326 //	The second sequence has been "uniqued"
    327 template <typename Iter1, typename Iter2>
    328 static bool compare_unique(Iter1 first1, Iter1 last1, Iter2 first2, Iter2 last2)
    329 {
    330 	assert(first1 != last1 && first2 != last2);
    331 	if (*first1 != *first2) return false;
    332 
    333 	uint8_t last_value = *first1;
    334 	++first1; ++first2;
    335 	while(first1 != last1 && first2 != last2)
    336 	{
    337 	//	Skip over dups in the first sequence
    338 		while (*first1 == last_value)
    339 			if (++first1 == last1) return false;
    340 		if (*first1 != *first2) return false;
    341 		last_value = *first1;
    342 		++first1; ++first2;
    343 	}
    344 
    345 //	Still stuff left in the 'uniqued' sequence - oops
    346 	if (first1 == last1 && first2 != last2) return false;
    347 
    348 //	Still stuff left in the original sequence - better be all the same
    349 	while (first1 != last1)
    350 	{
    351 		if (*first1 != last_value) return false;
    352 		++first1;
    353 	}
    354 	return true;
    355 }
    356 
    357 //	== unique ==
    358 int unique (const uint8_t *data, size_t size)
    359 {
    360 	Vec working(data, data + size);
    361 	std::sort(working.begin(), working.end());
    362 	Vec results = working;
    363 	Vec::iterator new_end = std::unique(results.begin(), results.end());
    364 	Vec::iterator it;	// scratch iterator
    365 
    366 //	Check the size of the unique'd sequence.
    367 //	it should only be zero if the input sequence was empty.
    368 	if (results.begin() == new_end)
    369 		return working.size() == 0 ? 0 : 1;
    370 
    371 //	'results' is sorted
    372 	if (!std::is_sorted(results.begin(), new_end)) return 2;
    373 
    374 //	All the elements in 'results' must be different
    375 	it = results.begin();
    376 	uint8_t prev_value = *it++;
    377 	for (; it != new_end; ++it)
    378 	{
    379 		if (*it == prev_value) return 3;
    380 		prev_value = *it;
    381 	}
    382 
    383 //	Every element in 'results' must be in 'working'
    384 	for (it = results.begin(); it != new_end; ++it)
    385 		if (std::find(working.begin(), working.end(), *it) == working.end())
    386 			return 4;
    387 
    388 //	Every element in 'working' must be in 'results'
    389 	for (auto v : working)
    390 		if (std::find(results.begin(), new_end, v) == new_end)
    391 			return 5;
    392 
    393 	return 0;
    394 }
    395 
    396 //	== unique_copy ==
    397 int unique_copy (const uint8_t *data, size_t size)
    398 {
    399 	Vec working(data, data + size);
    400 	std::sort(working.begin(), working.end());
    401 	Vec results;
    402 	(void) std::unique_copy(working.begin(), working.end(),
    403 	                        std::back_inserter<Vec>(results));
    404 	Vec::iterator it;	// scratch iterator
    405 
    406 //	Check the size of the unique'd sequence.
    407 //	it should only be zero if the input sequence was empty.
    408 	if (results.size() == 0)
    409 		return working.size() == 0 ? 0 : 1;
    410 
    411 //	'results' is sorted
    412 	if (!std::is_sorted(results.begin(), results.end())) return 2;
    413 
    414 //	All the elements in 'results' must be different
    415 	it = results.begin();
    416 	uint8_t prev_value = *it++;
    417 	for (; it != results.end(); ++it)
    418 	{
    419 		if (*it == prev_value) return 3;
    420 		prev_value = *it;
    421 	}
    422 
    423 //	Every element in 'results' must be in 'working'
    424 	for (auto v : results)
    425 		if (std::find(working.begin(), working.end(), v) == working.end())
    426 			return 4;
    427 
    428 //	Every element in 'working' must be in 'results'
    429 	for (auto v : working)
    430 		if (std::find(results.begin(), results.end(), v) == results.end())
    431 			return 5;
    432 
    433 	return 0;
    434 }
    435 
    436 
    437 // --	regex fuzzers
    438 static int regex_helper(const uint8_t *data, size_t size, std::regex::flag_type flag)
    439 {
    440 	if (size > 0)
    441 	{
    442 		try
    443 		{
    444 			std::string s((const char *)data, size);
    445 			std::regex re(s, flag);
    446 			return std::regex_match(s, re) ? 1 : 0;
    447 		}
    448 		catch (std::regex_error &ex) {}
    449 	}
    450 	return 0;
    451 }
    452 
    453 
    454 int regex_ECMAScript (const uint8_t *data, size_t size)
    455 {
    456 	(void) regex_helper(data, size, std::regex_constants::ECMAScript);
    457 	return 0;
    458 }
    459 
    460 int regex_POSIX (const uint8_t *data, size_t size)
    461 {
    462 	(void) regex_helper(data, size, std::regex_constants::basic);
    463 	return 0;
    464 }
    465 
    466 int regex_extended (const uint8_t *data, size_t size)
    467 {
    468 	(void) regex_helper(data, size, std::regex_constants::extended);
    469 	return 0;
    470 }
    471 
    472 int regex_awk (const uint8_t *data, size_t size)
    473 {
    474 	(void) regex_helper(data, size, std::regex_constants::awk);
    475 	return 0;
    476 }
    477 
    478 int regex_grep (const uint8_t *data, size_t size)
    479 {
    480 	(void) regex_helper(data, size, std::regex_constants::grep);
    481 	return 0;
    482 }
    483 
    484 int regex_egrep (const uint8_t *data, size_t size)
    485 {
    486 	(void) regex_helper(data, size, std::regex_constants::egrep);
    487 	return 0;
    488 }
    489 
    490 // --	heap fuzzers
    491 int make_heap (const uint8_t *data, size_t size)
    492 {
    493 	Vec working(data, data + size);
    494 	std::make_heap(working.begin(), working.end());
    495 
    496 	if (!std::is_heap(working.begin(), working.end())) return 1;
    497 	if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99;
    498 	return 0;
    499 }
    500 
    501 int push_heap (const uint8_t *data, size_t size)
    502 {
    503 	if (size < 2) return 0;
    504 
    505 //	Make a heap from the first half of the data
    506 	Vec working(data, data + size);
    507 	auto iter = working.begin() + (size / 2);
    508 	std::make_heap(working.begin(), iter);
    509 	if (!std::is_heap(working.begin(), iter)) return 1;
    510 
    511 //	Now push the rest onto the heap, one at a time
    512 	++iter;
    513 	for (; iter != working.end(); ++iter) {
    514 		std::push_heap(working.begin(), iter);
    515 		if (!std::is_heap(working.begin(), iter)) return 2;
    516 		}
    517 
    518 	if (!fuzzing::is_permutation(data, data + size, working.cbegin())) return 99;
    519 	return 0;
    520 }
    521 
    522 int pop_heap (const uint8_t *data, size_t size)
    523 {
    524 	if (size < 2) return 0;
    525 	Vec working(data, data + size);
    526 	std::make_heap(working.begin(), working.end());
    527 
    528 //	Pop things off, one at a time
    529 	auto iter = --working.end();
    530 	while (iter != working.begin()) {
    531 		std::pop_heap(working.begin(), iter);
    532 		if (!std::is_heap(working.begin(), --iter)) return 2;
    533 		}
    534 
    535 	return 0;
    536 }
    537 
    538 
    539 // --	search fuzzers
    540 int search (const uint8_t *data, size_t size)
    541 {
    542 	if (size < 2) return 0;
    543 
    544 	const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max();
    545 	assert(pat_size <= size - 1);
    546 	const uint8_t *pat_begin = data + 1;
    547 	const uint8_t *pat_end   = pat_begin + pat_size;
    548 	const uint8_t *data_end  = data + size;
    549 	assert(pat_end <= data_end);
    550 // 	std::cerr << "data[0] = " << size_t(data[0]) << " ";
    551 // 	std::cerr << "Pattern size = " << pat_size << "; corpus is " << size - 1 << std::endl;
    552 	auto it = std::search(pat_end, data_end, pat_begin, pat_end);
    553 	if (it != data_end) // not found
    554 		if (!std::equal(pat_begin, pat_end, it))
    555 			return 1;
    556 	return 0;
    557 }
    558 
    559 template <typename S>
    560 static int search_helper (const uint8_t *data, size_t size)
    561 {
    562 	if (size < 2) return 0;
    563 
    564 	const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max();
    565 	const uint8_t *pat_begin = data + 1;
    566 	const uint8_t *pat_end   = pat_begin + pat_size;
    567 	const uint8_t *data_end  = data + size;
    568 
    569 	auto it = std::search(pat_end, data_end, S(pat_begin, pat_end));
    570 	if (it != data_end) // not found
    571 		if (!std::equal(pat_begin, pat_end, it))
    572 			return 1;
    573 	return 0;
    574 }
    575 
    576 //	These are still in std::experimental
    577 // int search_boyer_moore (const uint8_t *data, size_t size)
    578 // {
    579 // 	return search_helper<std::boyer_moore_searcher<const uint8_t *>>(data, size);
    580 // }
    581 //
    582 // int search_boyer_moore_horspool (const uint8_t *data, size_t size)
    583 // {
    584 // 	return search_helper<std::boyer_moore_horspool_searcher<const uint8_t *>>(data, size);
    585 // }
    586 
    587 
    588 // --	set operation fuzzers
    589 template <typename S>
    590 static void set_helper (const uint8_t *data, size_t size, Vec &v1, Vec &v2)
    591 {
    592 	assert(size > 1);
    593 
    594 	const size_t pat_size = data[0] * (size - 1) / std::numeric_limits<uint8_t>::max();
    595 	const uint8_t *pat_begin = data + 1;
    596 	const uint8_t *pat_end   = pat_begin + pat_size;
    597 	const uint8_t *data_end  = data + size;
    598 	v1.assign(pat_begin, pat_end);
    599 	v2.assign(pat_end, data_end);
    600 
    601 	std::sort(v1.begin(), v1.end());
    602 	std::sort(v2.begin(), v2.end());
    603 }
    604 
    605 } // namespace fuzzing
    606