Home | History | Annotate | Download | only in spreadsortlib
      1 //Templated spread_sort library
      2 
      3 //          Copyright Steven J. Ross 2001 - 2009.
      4 // Distributed under the Boost Software License, Version 1.0.
      5 //    (See accompanying file LICENSE_1_0.txt or copy at
      6 //          http://www.boost.org/LICENSE_1_0.txt)
      7 
      8 //  See http://www.boost.org/ for updates, documentation, and revision history.
      9 
     10 /*
     11 Some improvements suggested by:
     12 Phil Endecott and Frank Gennari
     13 Cygwin fix provided by:
     14 Scott McMurray
     15 */
     16 
     17 #ifndef BOOST_SPREAD_SORT_H
     18 #define BOOST_SPREAD_SORT_H
     19 #include <algorithm>
     20 #include <vector>
     21 #include "constants.hpp"
     22 #include <cstring>
     23 
     24 namespace boost {
     25   namespace detail {
     26   	//This only works on unsigned data types
     27   	template <typename T>
     28   	inline unsigned
     29   	rough_log_2_size(const T& input)
     30   	{
     31   		unsigned result = 0;
     32   		//The && is necessary on some compilers to avoid infinite loops; it doesn't significantly impair performance
     33   		while((input >> result) && (result < (8*sizeof(T)))) ++result;
     34   		return result;
     35   	}
     36 
     37   	//Gets the maximum size which we'll call spread_sort on to control worst-case performance
     38   	//Maintains both a minimum size to recurse and a check of distribution size versus count
     39   	//This is called for a set of bins, instead of bin-by-bin, to avoid performance overhead
     40   	inline size_t
     41   	get_max_count(unsigned log_range, size_t count)
     42   	{
     43   		unsigned divisor = rough_log_2_size(count);
     44   		//Making sure the divisor is positive
     45   		if(divisor > LOG_MEAN_BIN_SIZE)
     46   			divisor -= LOG_MEAN_BIN_SIZE;
     47   		else
     48   			divisor = 1;
     49   		unsigned relative_width = (LOG_CONST * log_range)/((divisor > MAX_SPLITS) ? MAX_SPLITS : divisor);
     50   		//Don't try to bitshift more than the size of an element
     51   		if((8*sizeof(size_t)) <= relative_width)
     52   			relative_width = (8*sizeof(size_t)) - 1;
     53   		return (size_t)1 << ((relative_width < (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT)) ?
     54   			(LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT) :  relative_width);
     55   	}
     56 
     57   	//Find the minimum and maximum using <
     58   	template <class RandomAccessIter>
     59   	inline void
     60   	find_extremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min)
     61   	{
     62   		min = max = current;
     63   		//Start from the second item, as max and min are initialized to the first
     64   		while(++current < last) {
     65   			if(*max < *current)
     66   				max = current;
     67   			else if(*current < *min)
     68   				min = current;
     69   		}
     70   	}
     71 
     72   	//Uses a user-defined comparison operator to find minimum and maximum
     73   	template <class RandomAccessIter, class compare>
     74   	inline void
     75   	find_extremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min, compare comp)
     76   	{
     77   		min = max = current;
     78   		while(++current < last) {
     79   			if(comp(*max, *current))
     80   				max = current;
     81   			else if(comp(*current, *min))
     82   				min = current;
     83   		}
     84   	}
     85 
     86   	//Gets a non-negative right bit shift to operate as a logarithmic divisor
     87   	inline int
     88   	get_log_divisor(size_t count, unsigned log_range)
     89   	{
     90   		int log_divisor;
     91   		//If we can finish in one iteration without exceeding either (2 to the MAX_SPLITS) or n bins, do so
     92   		if((log_divisor = log_range - rough_log_2_size(count)) <= 0 && log_range < MAX_SPLITS)
     93   			log_divisor = 0;
     94   		else {
     95   			//otherwise divide the data into an optimized number of pieces
     96   			log_divisor += LOG_MEAN_BIN_SIZE;
     97   			if(log_divisor < 0)
     98   				log_divisor = 0;
     99   			//Cannot exceed MAX_SPLITS or cache misses slow down bin lookups dramatically
    100   			if((log_range - log_divisor) > MAX_SPLITS)
    101   				log_divisor = log_range - MAX_SPLITS;
    102   		}
    103   		return log_divisor;
    104   	}
    105 
    106   	template <class RandomAccessIter>
    107   	inline RandomAccessIter *
    108   	size_bins(std::vector<size_t> &bin_sizes, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
    109   	{
    110   		//Assure space for the size of each bin, followed by initializing sizes
    111   		if(bin_count > bin_sizes.size())
    112   			bin_sizes.resize(bin_count);
    113   		for(size_t u = 0; u < bin_count; u++)
    114   			bin_sizes[u] = 0;
    115   		//Make sure there is space for the bins
    116   		cache_end = cache_offset + bin_count;
    117   		if(cache_end > bin_cache.size())
    118   			bin_cache.resize(cache_end);
    119   		return &(bin_cache[cache_offset]);
    120   	}
    121 
    122   	//Implementation for recursive integer sorting
    123   	template <class RandomAccessIter, class div_type, class data_type>
    124   	inline void
    125   	spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    126   				  , std::vector<size_t> &bin_sizes)
    127   	{
    128   		//This step is roughly 10% of runtime, but it helps avoid worst-case behavior and improve behavior with real data
    129   		//If you know the maximum and minimum ahead of time, you can pass those values in and skip this step for the first iteration
    130   		RandomAccessIter max, min;
    131   		find_extremes(first, last, max, min);
    132   		//max and min will be the same (the first item) iff all values are equivalent
    133   		if(max == min)
    134   			return;
    135   		RandomAccessIter * target_bin;
    136   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(*max >> 0) - (*min >> 0)));
    137   		div_type div_min = *min >> log_divisor;
    138   		div_type div_max = *max >> log_divisor;
    139   		unsigned bin_count = div_max - div_min + 1;
    140   		unsigned cache_end;
    141   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    142 
    143   		//Calculating the size of each bin; this takes roughly 10% of runtime
    144   		for (RandomAccessIter current = first; current != last;)
    145   			bin_sizes[(*(current++) >> log_divisor) - div_min]++;
    146   		//Assign the bin positions
    147   		bins[0] = first;
    148   		for(unsigned u = 0; u < bin_count - 1; u++)
    149   			bins[u + 1] = bins[u] + bin_sizes[u];
    150 
    151   		//Swap into place
    152   		//This dominates runtime, mostly in the swap and bin lookups
    153   		RandomAccessIter nextbinstart = first;
    154   		for(unsigned u = 0; u < bin_count - 1; ++u) {
    155   			RandomAccessIter * local_bin = bins + u;
    156   			nextbinstart += bin_sizes[u];
    157   			//Iterating over each element in this bin
    158   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
    159   				//Swapping elements in current into place until the correct element has been swapped in
    160   				for(target_bin = (bins + ((*current >> log_divisor) - div_min));  target_bin != local_bin;
    161   					target_bin = bins + ((*current >> log_divisor) - div_min)) {
    162   					//3-way swap; this is about 1% faster than a 2-way swap with integers
    163   					//The main advantage is less copies are involved per item put in the correct place
    164   					data_type tmp;
    165   					RandomAccessIter b = (*target_bin)++;
    166   					RandomAccessIter * b_bin = bins + ((*b >> log_divisor) - div_min);
    167   					if (b_bin != local_bin) {
    168   						RandomAccessIter c = (*b_bin)++;
    169   						tmp = *c;
    170   						*c = *b;
    171   					}
    172   					else
    173   						tmp = *b;
    174   					*b = *current;
    175   					*current = tmp;
    176   				}
    177   			}
    178   			*local_bin = nextbinstart;
    179   		}
    180   		bins[bin_count - 1] = last;
    181 
    182   		//If we've bucketsorted, the array is sorted and we should skip recursion
    183   		if(!log_divisor)
    184   			return;
    185 
    186   		//Recursing; log_divisor is the remaining range
    187   		size_t max_count = get_max_count(log_divisor, last - first);
    188   		RandomAccessIter lastPos = first;
    189   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
    190   			size_t count = bin_cache[u] - lastPos;
    191   			//don't sort unless there are at least two items to compare
    192   			if(count < 2)
    193   				continue;
    194   			//using std::sort if its worst-case is better
    195   			if(count < max_count)
    196   				std::sort(lastPos, bin_cache[u]);
    197   			else
    198   				spread_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
    199   		}
    200   	}
    201 
    202   	//Generic bitshift-based 3-way swapping code
    203   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
    204   	inline void inner_swap_loop(RandomAccessIter * bins, const RandomAccessIter & nextbinstart, unsigned ii, right_shift &shift
    205   		, const unsigned log_divisor, const div_type div_min)
    206   	{
    207   		RandomAccessIter * local_bin = bins + ii;
    208   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
    209   			for(RandomAccessIter * target_bin = (bins + (shift(*current, log_divisor) - div_min));  target_bin != local_bin;
    210   				target_bin = bins + (shift(*current, log_divisor) - div_min)) {
    211   				data_type tmp;
    212   				RandomAccessIter b = (*target_bin)++;
    213   				RandomAccessIter * b_bin = bins + (shift(*b, log_divisor) - div_min);
    214   				//Three-way swap; if the item to be swapped doesn't belong in the current bin, swap it to where it belongs
    215   				if (b_bin != local_bin) {
    216   					RandomAccessIter c = (*b_bin)++;
    217   					tmp = *c;
    218   					*c = *b;
    219   				}
    220   				//Note: we could increment current once the swap is done in this case, but that seems to impair performance
    221   				else
    222   					tmp = *b;
    223   				*b = *current;
    224   				*current = tmp;
    225   			}
    226   		}
    227   		*local_bin = nextbinstart;
    228   	}
    229 
    230   	//Standard swapping wrapper for ascending values
    231   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
    232   	inline void swap_loop(RandomAccessIter * bins, RandomAccessIter & nextbinstart, unsigned ii, right_shift &shift
    233   		, const std::vector<size_t> &bin_sizes, const unsigned log_divisor, const div_type div_min)
    234   	{
    235   		nextbinstart += bin_sizes[ii];
    236   		inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, log_divisor, div_min);
    237   	}
    238 
    239   	//Functor implementation for recursive sorting
    240   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
    241   	inline void
    242   	spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    243   					, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
    244   	{
    245   		RandomAccessIter max, min;
    246   		find_extremes(first, last, max, min, comp);
    247   		if(max == min)
    248   			return;
    249   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(shift(*max, 0)) - (shift(*min, 0))));
    250   		div_type div_min = shift(*min, log_divisor);
    251   		div_type div_max = shift(*max, log_divisor);
    252   		unsigned bin_count = div_max - div_min + 1;
    253   		unsigned cache_end;
    254   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    255 
    256   		//Calculating the size of each bin
    257   		for (RandomAccessIter current = first; current != last;)
    258   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
    259   		bins[0] = first;
    260   		for(unsigned u = 0; u < bin_count - 1; u++)
    261   			bins[u + 1] = bins[u] + bin_sizes[u];
    262 
    263   		//Swap into place
    264   		RandomAccessIter nextbinstart = first;
    265   		for(unsigned u = 0; u < bin_count - 1; ++u)
    266   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, bin_sizes, log_divisor, div_min);
    267   		bins[bin_count - 1] = last;
    268 
    269   		//If we've bucketsorted, the array is sorted and we should skip recursion
    270   		if(!log_divisor)
    271   			return;
    272 
    273   		//Recursing
    274   		size_t max_count = get_max_count(log_divisor, last - first);
    275   		RandomAccessIter lastPos = first;
    276   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
    277   			size_t count = bin_cache[u] - lastPos;
    278   			if(count < 2)
    279   				continue;
    280   			if(count < max_count)
    281   				std::sort(lastPos, bin_cache[u], comp);
    282   			else
    283   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift, comp);
    284   		}
    285   	}
    286 
    287   	//Functor implementation for recursive sorting with only Shift overridden
    288   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
    289   	inline void
    290   	spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    291   					, std::vector<size_t> &bin_sizes, right_shift shift)
    292   	{
    293   		RandomAccessIter max, min;
    294   		find_extremes(first, last, max, min);
    295   		if(max == min)
    296   			return;
    297   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(shift(*max, 0)) - (shift(*min, 0))));
    298   		div_type div_min = shift(*min, log_divisor);
    299   		div_type div_max = shift(*max, log_divisor);
    300   		unsigned bin_count = div_max - div_min + 1;
    301   		unsigned cache_end;
    302   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    303 
    304   		//Calculating the size of each bin
    305   		for (RandomAccessIter current = first; current != last;)
    306   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
    307   		bins[0] = first;
    308   		for(unsigned u = 0; u < bin_count - 1; u++)
    309   			bins[u + 1] = bins[u] + bin_sizes[u];
    310 
    311   		//Swap into place
    312   		RandomAccessIter nextbinstart = first;
    313   		for(unsigned ii = 0; ii < bin_count - 1; ++ii)
    314   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
    315   		bins[bin_count - 1] = last;
    316 
    317   		//If we've bucketsorted, the array is sorted and we should skip recursion
    318   		if(!log_divisor)
    319   			return;
    320 
    321   		//Recursing
    322   		size_t max_count = get_max_count(log_divisor, last - first);
    323   		RandomAccessIter lastPos = first;
    324   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
    325   			size_t count = bin_cache[u] - lastPos;
    326   			if(count < 2)
    327   				continue;
    328   			if(count < max_count)
    329   				std::sort(lastPos, bin_cache[u]);
    330   			else
    331   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift);
    332   		}
    333   	}
    334 
    335   	//Holds the bin vector and makes the initial recursive call
    336   	template <class RandomAccessIter, class div_type, class data_type>
    337   	inline void
    338   	spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type)
    339   	{
    340   		std::vector<size_t> bin_sizes;
    341   		std::vector<RandomAccessIter> bin_cache;
    342   		spread_sort_rec<RandomAccessIter, div_type, data_type>(first, last, bin_cache, 0, bin_sizes);
    343   	}
    344 
    345   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
    346   	inline void
    347   	spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift, compare comp)
    348   	{
    349   		std::vector<size_t> bin_sizes;
    350   		std::vector<RandomAccessIter> bin_cache;
    351   		spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(first, last, bin_cache, 0, bin_sizes, shift, comp);
    352   	}
    353 
    354   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
    355   	inline void
    356   	spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift)
    357   	{
    358   		std::vector<size_t> bin_sizes;
    359   		std::vector<RandomAccessIter> bin_cache;
    360   		spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift);
    361   	}
    362   }
    363 
    364   //Top-level sorting call for integers
    365   template <class RandomAccessIter>
    366   inline void integer_sort(RandomAccessIter first, RandomAccessIter last)
    367   {
    368   	//Don't sort if it's too small to optimize
    369   	if(last - first < detail::MIN_SORT_SIZE)
    370   		std::sort(first, last);
    371   	else
    372   		detail::spread_sort(first, last, *first >> 0, *first);
    373   }
    374 
    375   //integer_sort with functors
    376   template <class RandomAccessIter, class right_shift, class compare>
    377   inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
    378   						right_shift shift, compare comp) {
    379   	if(last - first < detail::MIN_SORT_SIZE)
    380   		std::sort(first, last, comp);
    381   	else
    382   		detail::spread_sort(first, last, shift(*first, 0), *first, shift, comp);
    383   }
    384 
    385   //integer_sort with right_shift functor
    386   template <class RandomAccessIter, class right_shift>
    387   inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
    388   						right_shift shift) {
    389   	if(last - first < detail::MIN_SORT_SIZE)
    390   		std::sort(first, last);
    391   	else
    392   		detail::spread_sort(first, last, shift(*first, 0), *first, shift);
    393   }
    394 
    395   //------------------------------------------------------ float_sort source --------------------------------------
    396   //Casts a RandomAccessIter to the specified data type
    397   template<class cast_type, class RandomAccessIter>
    398   inline cast_type
    399   cast_float_iter(const RandomAccessIter & floatiter)
    400   {
    401   	cast_type result;
    402   	std::memcpy(&result, &(*floatiter), sizeof(cast_type));
    403   	return result;
    404   }
    405 
    406   //Casts a data element to the specified datinner_float_a type
    407   template<class data_type, class cast_type>
    408   inline cast_type
    409   mem_cast(const data_type & data)
    410   {
    411   	cast_type result;
    412   	std::memcpy(&result, &data, sizeof(cast_type));
    413   	return result;
    414   }
    415 
    416   namespace detail {
    417   	template <class RandomAccessIter, class div_type, class right_shift>
    418   	inline void
    419   	find_extremes(RandomAccessIter current, RandomAccessIter last, div_type & max, div_type & min, right_shift shift)
    420   	{
    421   		min = max = shift(*current, 0);
    422   		while(++current < last) {
    423   			div_type value = shift(*current, 0);
    424   			if(max < value)
    425   				max = value;
    426   			else if(value < min)
    427   				min = value;
    428   		}
    429   	}
    430 
    431   	//Specialized swap loops for floating-point casting
    432   	template <class RandomAccessIter, class div_type, class data_type>
    433   	inline void inner_float_swap_loop(RandomAccessIter * bins, const RandomAccessIter & nextbinstart, unsigned ii
    434   		, const unsigned log_divisor, const div_type div_min)
    435   	{
    436   		RandomAccessIter * local_bin = bins + ii;
    437   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
    438   			for(RandomAccessIter * target_bin = (bins + ((cast_float_iter<div_type, RandomAccessIter>(current) >> log_divisor) - div_min));  target_bin != local_bin;
    439   				target_bin = bins + ((cast_float_iter<div_type, RandomAccessIter>(current) >> log_divisor) - div_min)) {
    440   				data_type tmp;
    441   				RandomAccessIter b = (*target_bin)++;
    442   				RandomAccessIter * b_bin = bins + ((cast_float_iter<div_type, RandomAccessIter>(b) >> log_divisor) - div_min);
    443   				//Three-way swap; if the item to be swapped doesn't belong in the current bin, swap it to where it belongs
    444   				if (b_bin != local_bin) {
    445   					RandomAccessIter c = (*b_bin)++;
    446   					tmp = *c;
    447   					*c = *b;
    448   				}
    449   				else
    450   					tmp = *b;
    451   				*b = *current;
    452   				*current = tmp;
    453   			}
    454   		}
    455   		*local_bin = nextbinstart;
    456   	}
    457 
    458   	template <class RandomAccessIter, class div_type, class data_type>
    459   	inline void float_swap_loop(RandomAccessIter * bins, RandomAccessIter & nextbinstart, unsigned ii
    460   		, const std::vector<size_t> &bin_sizes, const unsigned log_divisor, const div_type div_min)
    461   	{
    462   		nextbinstart += bin_sizes[ii];
    463   		inner_float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, ii, log_divisor, div_min);
    464   	}
    465 
    466   	template <class RandomAccessIter, class cast_type>
    467   	inline void
    468   	find_extremes(RandomAccessIter current, RandomAccessIter last, cast_type & max, cast_type & min)
    469   	{
    470   		min = max = cast_float_iter<cast_type, RandomAccessIter>(current);
    471   		while(++current < last) {
    472   			cast_type value = cast_float_iter<cast_type, RandomAccessIter>(current);
    473   			if(max < value)
    474   				max = value;
    475   			else if(value < min)
    476   				min = value;
    477   		}
    478   	}
    479 
    480   	//Special-case sorting of positive floats with casting instead of a right_shift
    481   	template <class RandomAccessIter, class div_type, class data_type>
    482   	inline void
    483   	positive_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    484   					, std::vector<size_t> &bin_sizes)
    485   	{
    486   		div_type max, min;
    487   		find_extremes(first, last, max, min);
    488   		if(max == min)
    489   			return;
    490   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
    491   		div_type div_min = min >> log_divisor;
    492   		div_type div_max = max >> log_divisor;
    493   		unsigned bin_count = div_max - div_min + 1;
    494   		unsigned cache_end;
    495   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    496 
    497   		//Calculating the size of each bin
    498   		for (RandomAccessIter current = first; current != last;)
    499   			bin_sizes[(cast_float_iter<div_type, RandomAccessIter>(current++) >> log_divisor) - div_min]++;
    500   		bins[0] = first;
    501   		for(unsigned u = 0; u < bin_count - 1; u++)
    502   			bins[u + 1] = bins[u] + bin_sizes[u];
    503 
    504   		//Swap into place
    505   		RandomAccessIter nextbinstart = first;
    506   		for(unsigned u = 0; u < bin_count - 1; ++u)
    507   			float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, u, bin_sizes, log_divisor, div_min);
    508   		bins[bin_count - 1] = last;
    509 
    510   		//Return if we've completed bucketsorting
    511   		if(!log_divisor)
    512   			return;
    513 
    514   		//Recursing
    515   		size_t max_count = get_max_count(log_divisor, last - first);
    516   		RandomAccessIter lastPos = first;
    517   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
    518   			size_t count = bin_cache[u] - lastPos;
    519   			if(count < 2)
    520   				continue;
    521   			if(count < max_count)
    522   				std::sort(lastPos, bin_cache[u]);
    523   			else
    524   				positive_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
    525   		}
    526   	}
    527 
    528   	//Sorting negative_ float_s
    529   	//Note that bins are iterated in reverse order because max_neg_float = min_neg_int
    530   	template <class RandomAccessIter, class div_type, class data_type>
    531   	inline void
    532   	negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    533   					, std::vector<size_t> &bin_sizes)
    534   	{
    535   		div_type max, min;
    536   		find_extremes(first, last, max, min);
    537   		if(max == min)
    538   			return;
    539   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
    540   		div_type div_min = min >> log_divisor;
    541   		div_type div_max = max >> log_divisor;
    542   		unsigned bin_count = div_max - div_min + 1;
    543   		unsigned cache_end;
    544   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    545 
    546   		//Calculating the size of each bin
    547   		for (RandomAccessIter current = first; current != last;)
    548   			bin_sizes[(cast_float_iter<div_type, RandomAccessIter>(current++) >> log_divisor) - div_min]++;
    549   		bins[bin_count - 1] = first;
    550   		for(int ii = bin_count - 2; ii >= 0; --ii)
    551   			bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
    552 
    553   		//Swap into place
    554   		RandomAccessIter nextbinstart = first;
    555   		//The last bin will always have the correct elements in it
    556   		for(int ii = bin_count - 1; ii > 0; --ii)
    557   			float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, ii, bin_sizes, log_divisor, div_min);
    558   		//Since we don't process the last bin, we need to update its end position
    559   		bin_cache[cache_offset] = last;
    560 
    561   		//Return if we've completed bucketsorting
    562   		if(!log_divisor)
    563   			return;
    564 
    565   		//Recursing
    566   		size_t max_count = get_max_count(log_divisor, last - first);
    567   		RandomAccessIter lastPos = first;
    568   		for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) {
    569   			size_t count = bin_cache[ii] - lastPos;
    570   			if(count < 2)
    571   				continue;
    572   			if(count < max_count)
    573   				std::sort(lastPos, bin_cache[ii]);
    574   			else
    575   				negative_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
    576   		}
    577   	}
    578 
    579   	//Sorting negative_ float_s
    580   	//Note that bins are iterated in reverse order because max_neg_float = min_neg_int
    581   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
    582   	inline void
    583   	negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    584   					, std::vector<size_t> &bin_sizes, right_shift shift)
    585   	{
    586   		div_type max, min;
    587   		find_extremes(first, last, max, min, shift);
    588   		if(max == min)
    589   			return;
    590   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
    591   		div_type div_min = min >> log_divisor;
    592   		div_type div_max = max >> log_divisor;
    593   		unsigned bin_count = div_max - div_min + 1;
    594   		unsigned cache_end;
    595   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    596 
    597   		//Calculating the size of each bin
    598   		for (RandomAccessIter current = first; current != last;)
    599   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
    600   		bins[bin_count - 1] = first;
    601   		for(int ii = bin_count - 2; ii >= 0; --ii)
    602   			bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
    603 
    604   		//Swap into place
    605   		RandomAccessIter nextbinstart = first;
    606   		//The last bin will always have the correct elements in it
    607   		for(int ii = bin_count - 1; ii > 0; --ii)
    608   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
    609   		//Since we don't process the last bin, we need to update its end position
    610   		bin_cache[cache_offset] = last;
    611 
    612   		//Return if we've completed bucketsorting
    613   		if(!log_divisor)
    614   			return;
    615 
    616   		//Recursing
    617   		size_t max_count = get_max_count(log_divisor, last - first);
    618   		RandomAccessIter lastPos = first;
    619   		for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) {
    620   			size_t count = bin_cache[ii] - lastPos;
    621   			if(count < 2)
    622   				continue;
    623   			if(count < max_count)
    624   				std::sort(lastPos, bin_cache[ii]);
    625   			else
    626   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift);
    627   		}
    628   	}
    629 
    630   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
    631   	inline void
    632   	negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    633   					, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
    634   	{
    635   		div_type max, min;
    636   		find_extremes(first, last, max, min, shift);
    637   		if(max == min)
    638   			return;
    639   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
    640   		div_type div_min = min >> log_divisor;
    641   		div_type div_max = max >> log_divisor;
    642   		unsigned bin_count = div_max - div_min + 1;
    643   		unsigned cache_end;
    644   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    645 
    646   		//Calculating the size of each bin
    647   		for (RandomAccessIter current = first; current != last;)
    648   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
    649   		bins[bin_count - 1] = first;
    650   		for(int ii = bin_count - 2; ii >= 0; --ii)
    651   			bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
    652 
    653   		//Swap into place
    654   		RandomAccessIter nextbinstart = first;
    655   		//The last bin will always have the correct elements in it
    656   		for(int ii = bin_count - 1; ii > 0; --ii)
    657   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
    658   		//Since we don't process the last bin, we need to update its end position
    659   		bin_cache[cache_offset] = last;
    660 
    661   		//Return if we've completed bucketsorting
    662   		if(!log_divisor)
    663   			return;
    664 
    665   		//Recursing
    666   		size_t max_count = get_max_count(log_divisor, last - first);
    667   		RandomAccessIter lastPos = first;
    668   		for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) {
    669   			size_t count = bin_cache[ii] - lastPos;
    670   			if(count < 2)
    671   				continue;
    672   			if(count < max_count)
    673   				std::sort(lastPos, bin_cache[ii], comp);
    674   			else
    675   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift, comp);
    676   		}
    677   	}
    678 
    679   	//Casting special-case for floating-point sorting
    680   	template <class RandomAccessIter, class div_type, class data_type>
    681   	inline void
    682   	float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    683   					, std::vector<size_t> &bin_sizes)
    684   	{
    685   		div_type max, min;
    686   		find_extremes(first, last, max, min);
    687   		if(max == min)
    688   			return;
    689   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
    690   		div_type div_min = min >> log_divisor;
    691   		div_type div_max = max >> log_divisor;
    692   		unsigned bin_count = div_max - div_min + 1;
    693   		unsigned cache_end;
    694   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    695 
    696   		//Calculating the size of each bin
    697   		for (RandomAccessIter current = first; current != last;)
    698   			bin_sizes[(cast_float_iter<div_type, RandomAccessIter>(current++) >> log_divisor) - div_min]++;
    699   		//The index of the first positive bin
    700   		div_type first_positive = (div_min < 0) ? -div_min : 0;
    701   		//Resetting if all bins are negative
    702   		if(cache_offset + first_positive > cache_end)
    703   			first_positive = cache_end - cache_offset;
    704   		//Reversing the order of the negative bins
    705   		//Note that because of the negative/positive ordering direction flip
    706   		//We can not depend upon bin order and positions matching up
    707   		//so bin_sizes must be reused to contain the end of the bin
    708   		if(first_positive > 0) {
    709   			bins[first_positive - 1] = first;
    710   			for(int ii = first_positive - 2; ii >= 0; --ii) {
    711   				bins[ii] = first + bin_sizes[ii + 1];
    712   				bin_sizes[ii] += bin_sizes[ii + 1];
    713   			}
    714   			//Handling positives following negatives
    715   			if((unsigned)first_positive < bin_count) {
    716   				bins[first_positive] = first + bin_sizes[0];
    717   				bin_sizes[first_positive] += bin_sizes[0];
    718   			}
    719   		}
    720   		else
    721   			bins[0] = first;
    722   		for(unsigned u = first_positive; u < bin_count - 1; u++) {
    723   			bins[u + 1] = first + bin_sizes[u];
    724   			bin_sizes[u + 1] += bin_sizes[u];
    725   		}
    726 
    727   		//Swap into place
    728   		RandomAccessIter nextbinstart = first;
    729   		for(unsigned u = 0; u < bin_count; ++u) {
    730   			nextbinstart = first + bin_sizes[u];
    731   			inner_float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, u, log_divisor, div_min);
    732   		}
    733 
    734   		if(!log_divisor)
    735   			return;
    736 
    737   		//Handling negative values first
    738   		size_t max_count = get_max_count(log_divisor, last - first);
    739   		RandomAccessIter lastPos = first;
    740   		for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) {
    741   			size_t count = bin_cache[ii] - lastPos;
    742   			if(count < 2)
    743   				continue;
    744   			if(count < max_count)
    745   				std::sort(lastPos, bin_cache[ii]);
    746   			//sort negative values using reversed-bin spread_sort
    747   			else
    748   				negative_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
    749   		}
    750 
    751   		for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) {
    752   			size_t count = bin_cache[u] - lastPos;
    753   			if(count < 2)
    754   				continue;
    755   			if(count < max_count)
    756   				std::sort(lastPos, bin_cache[u]);
    757   			//sort positive values using normal spread_sort
    758   			else
    759   				positive_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
    760   		}
    761   	}
    762 
    763   	//Functor implementation for recursive sorting
    764   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
    765   	inline void
    766   	float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    767   					, std::vector<size_t> &bin_sizes, right_shift shift)
    768   	{
    769   		div_type max, min;
    770   		find_extremes(first, last, max, min, shift);
    771   		if(max == min)
    772   			return;
    773   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
    774   		div_type div_min = min >> log_divisor;
    775   		div_type div_max = max >> log_divisor;
    776   		unsigned bin_count = div_max - div_min + 1;
    777   		unsigned cache_end;
    778   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    779 
    780   		//Calculating the size of each bin
    781   		for (RandomAccessIter current = first; current != last;)
    782   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
    783   		//The index of the first positive bin
    784   		div_type first_positive = (div_min < 0) ? -div_min : 0;
    785   		//Resetting if all bins are negative
    786   		if(cache_offset + first_positive > cache_end)
    787   			first_positive = cache_end - cache_offset;
    788   		//Reversing the order of the negative bins
    789   		//Note that because of the negative/positive ordering direction flip
    790   		//We can not depend upon bin order and positions matching up
    791   		//so bin_sizes must be reused to contain the end of the bin
    792   		if(first_positive > 0) {
    793   			bins[first_positive - 1] = first;
    794   			for(int ii = first_positive - 2; ii >= 0; --ii) {
    795   				bins[ii] = first + bin_sizes[ii + 1];
    796   				bin_sizes[ii] += bin_sizes[ii + 1];
    797   			}
    798   			//Handling positives following negatives
    799   			if((unsigned)first_positive < bin_count) {
    800   				bins[first_positive] = first + bin_sizes[0];
    801   				bin_sizes[first_positive] += bin_sizes[0];
    802   			}
    803   		}
    804   		else
    805   			bins[0] = first;
    806   		for(unsigned u = first_positive; u < bin_count - 1; u++) {
    807   			bins[u + 1] = first + bin_sizes[u];
    808   			bin_sizes[u + 1] += bin_sizes[u];
    809   		}
    810 
    811   		//Swap into place
    812   		RandomAccessIter nextbinstart = first;
    813   		for(unsigned u = 0; u < bin_count; ++u) {
    814   			nextbinstart = first + bin_sizes[u];
    815   			inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, log_divisor, div_min);
    816   		}
    817 
    818   		//Return if we've completed bucketsorting
    819   		if(!log_divisor)
    820   			return;
    821 
    822   		//Handling negative values first
    823   		size_t max_count = get_max_count(log_divisor, last - first);
    824   		RandomAccessIter lastPos = first;
    825   		for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) {
    826   			size_t count = bin_cache[ii] - lastPos;
    827   			if(count < 2)
    828   				continue;
    829   			if(count < max_count)
    830   				std::sort(lastPos, bin_cache[ii]);
    831   			//sort negative values using reversed-bin spread_sort
    832   			else
    833   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift);
    834   		}
    835 
    836   		for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) {
    837   			size_t count = bin_cache[u] - lastPos;
    838   			if(count < 2)
    839   				continue;
    840   			if(count < max_count)
    841   				std::sort(lastPos, bin_cache[u]);
    842   			//sort positive values using normal spread_sort
    843   			else
    844   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift);
    845   		}
    846   	}
    847 
    848   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
    849   	inline void
    850   	float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    851   					, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
    852   	{
    853   		div_type max, min;
    854   		find_extremes(first, last, max, min, shift);
    855   		if(max == min)
    856   			return;
    857   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
    858   		div_type div_min = min >> log_divisor;
    859   		div_type div_max = max >> log_divisor;
    860   		unsigned bin_count = div_max - div_min + 1;
    861   		unsigned cache_end;
    862   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    863 
    864   		//Calculating the size of each bin
    865   		for (RandomAccessIter current = first; current != last;)
    866   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
    867   		//The index of the first positive bin
    868   		div_type first_positive = (div_min < 0) ? -div_min : 0;
    869   		//Resetting if all bins are negative
    870   		if(cache_offset + first_positive > cache_end)
    871   			first_positive = cache_end - cache_offset;
    872   		//Reversing the order of the negative bins
    873   		//Note that because of the negative/positive ordering direction flip
    874   		//We can not depend upon bin order and positions matching up
    875   		//so bin_sizes must be reused to contain the end of the bin
    876   		if(first_positive > 0) {
    877   			bins[first_positive - 1] = first;
    878   			for(int ii = first_positive - 2; ii >= 0; --ii) {
    879   				bins[ii] = first + bin_sizes[ii + 1];
    880   				bin_sizes[ii] += bin_sizes[ii + 1];
    881   			}
    882   			//Handling positives following negatives
    883   			if((unsigned)first_positive < bin_count) {
    884   				bins[first_positive] = first + bin_sizes[0];
    885   				bin_sizes[first_positive] += bin_sizes[0];
    886   			}
    887   		}
    888   		else
    889   			bins[0] = first;
    890   		for(unsigned u = first_positive; u < bin_count - 1; u++) {
    891   			bins[u + 1] = first + bin_sizes[u];
    892   			bin_sizes[u + 1] += bin_sizes[u];
    893   		}
    894 
    895   		//Swap into place
    896   		RandomAccessIter nextbinstart = first;
    897   		for(unsigned u = 0; u < bin_count; ++u) {
    898   			nextbinstart = first + bin_sizes[u];
    899   			inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, log_divisor, div_min);
    900   		}
    901 
    902   		//Return if we've completed bucketsorting
    903   		if(!log_divisor)
    904   			return;
    905 
    906   		//Handling negative values first
    907   		size_t max_count = get_max_count(log_divisor, last - first);
    908   		RandomAccessIter lastPos = first;
    909   		for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) {
    910   			size_t count = bin_cache[ii] - lastPos;
    911   			if(count < 2)
    912   				continue;
    913   			if(count < max_count)
    914   				std::sort(lastPos, bin_cache[ii]);
    915   			//sort negative values using reversed-bin spread_sort
    916   			else
    917   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift, comp);
    918   		}
    919 
    920   		for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) {
    921   			size_t count = bin_cache[u] - lastPos;
    922   			if(count < 2)
    923   				continue;
    924   			if(count < max_count)
    925   				std::sort(lastPos, bin_cache[u]);
    926   			//sort positive values using normal spread_sort
    927   			else
    928   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift, comp);
    929   		}
    930   	}
    931 
    932   	template <class RandomAccessIter, class cast_type, class data_type>
    933   	inline void
    934   	float_Sort(RandomAccessIter first, RandomAccessIter last, cast_type, data_type)
    935   	{
    936   		std::vector<size_t> bin_sizes;
    937   		std::vector<RandomAccessIter> bin_cache;
    938   		float_sort_rec<RandomAccessIter, cast_type, data_type>(first, last, bin_cache, 0, bin_sizes);
    939   	}
    940 
    941   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
    942   	inline void
    943   	float_Sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift)
    944   	{
    945   		std::vector<size_t> bin_sizes;
    946   		std::vector<RandomAccessIter> bin_cache;
    947   		float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift);
    948   	}
    949 
    950   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
    951   	inline void
    952   	float_Sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift, compare comp)
    953   	{
    954   		std::vector<size_t> bin_sizes;
    955   		std::vector<RandomAccessIter> bin_cache;
    956   		float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift, comp);
    957   	}
    958   }
    959 
    960   //float_sort with casting
    961   //The cast_type must be equal in size to the data type, and must be a signed integer
    962   template <class RandomAccessIter, class cast_type>
    963   inline void float_sort_cast(RandomAccessIter first, RandomAccessIter last, cast_type cVal)
    964   {
    965   	if(last - first < detail::MIN_SORT_SIZE)
    966   		std::sort(first, last);
    967   	else
    968   		detail::float_Sort(first, last, cVal, *first);
    969   }
    970 
    971   //float_sort with casting to an int
    972   //Only use this with IEEE floating-point numbers
    973   template <class RandomAccessIter>
    974   inline void float_sort_cast_to_int(RandomAccessIter first, RandomAccessIter last)
    975   {
    976   	int cVal = 0;
    977   	float_sort_cast(first, last, cVal);
    978   }
    979 
    980   //float_sort with functors
    981   template <class RandomAccessIter, class right_shift>
    982   inline void float_sort(RandomAccessIter first, RandomAccessIter last, right_shift shift)
    983   {
    984   	if(last - first < detail::MIN_SORT_SIZE)
    985   		std::sort(first, last);
    986   	else
    987   		detail::float_Sort(first, last, shift(*first, 0), *first, shift);
    988   }
    989 
    990   template <class RandomAccessIter, class right_shift, class compare>
    991   inline void float_sort(RandomAccessIter first, RandomAccessIter last, right_shift shift, compare comp)
    992   {
    993   	if(last - first < detail::MIN_SORT_SIZE)
    994   		std::sort(first, last, comp);
    995   	else
    996   		detail::float_Sort(first, last, shift(*first, 0), *first, shift, comp);
    997   }
    998 
    999   //------------------------------------------------- string_sort source ---------------------------------------------
   1000   namespace detail {
   1001   	//Offsetting on identical characters.  This function works a character at a time for optimal worst-case performance.
   1002   	template<class RandomAccessIter>
   1003   	inline void
   1004   	update_offset(RandomAccessIter first, RandomAccessIter finish, unsigned &char_offset)
   1005   	{
   1006   		unsigned nextOffset = char_offset;
   1007   		bool done = false;
   1008   		while(!done) {
   1009   			RandomAccessIter curr = first;
   1010   			do {
   1011   				//ignore empties, but if the nextOffset would exceed the length or not match, exit; we've found the last matching character
   1012   				if((*curr).size() > char_offset && ((*curr).size() <= (nextOffset + 1) || (*curr)[nextOffset] != (*first)[nextOffset])) {
   1013   					done = true;
   1014   					break;
   1015   				}
   1016   			} while(++curr != finish);
   1017   			if(!done)
   1018   				++nextOffset;
   1019   		}
   1020   		char_offset = nextOffset;
   1021   	}
   1022 
   1023   	//Offsetting on identical characters.  This function works a character at a time for optimal worst-case performance.
   1024   	template<class RandomAccessIter, class get_char, class get_length>
   1025   	inline void
   1026   	update_offset(RandomAccessIter first, RandomAccessIter finish, unsigned &char_offset, get_char getchar, get_length length)
   1027   	{
   1028   		unsigned nextOffset = char_offset;
   1029   		bool done = false;
   1030   		while(!done) {
   1031   			RandomAccessIter curr = first;
   1032   			do {
   1033   				//ignore empties, but if the nextOffset would exceed the length or not match, exit; we've found the last matching character
   1034   				if(length(*curr) > char_offset && (length(*curr) <= (nextOffset + 1) || getchar((*curr), nextOffset) != getchar((*first), nextOffset))) {
   1035   					done = true;
   1036   					break;
   1037   				}
   1038   			} while(++curr != finish);
   1039   			if(!done)
   1040   				++nextOffset;
   1041   		}
   1042   		char_offset = nextOffset;
   1043   	}
   1044 
   1045   	//A comparison functor for strings that assumes they are identical up to char_offset
   1046   	template<class data_type, class unsignedchar_type>
   1047   	struct offset_lessthan {
   1048   		offset_lessthan(unsigned char_offset) : fchar_offset(char_offset){}
   1049   		inline bool operator()(const data_type &x, const data_type &y) const
   1050   		{
   1051   			unsigned minSize = std::min(x.size(), y.size());
   1052   			for(unsigned u = fchar_offset; u < minSize; ++u) {
   1053   				if(static_cast<unsignedchar_type>(x[u]) < static_cast<unsignedchar_type>(y[u]))
   1054   					return true;
   1055   				else if(static_cast<unsignedchar_type>(y[u]) < static_cast<unsignedchar_type>(x[u]))
   1056   					return false;
   1057   			}
   1058   			return x.size() < y.size();
   1059   		}
   1060   		unsigned fchar_offset;
   1061   	};
   1062 
   1063   	//A comparison functor for strings that assumes they are identical up to char_offset
   1064   	template<class data_type, class unsignedchar_type>
   1065   	struct offset_greaterthan {
   1066   		offset_greaterthan(unsigned char_offset) : fchar_offset(char_offset){}
   1067   		inline bool operator()(const data_type &x, const data_type &y) const
   1068   		{
   1069   			unsigned minSize = std::min(x.size(), y.size());
   1070   			for(unsigned u = fchar_offset; u < minSize; ++u) {
   1071   				if(static_cast<unsignedchar_type>(x[u]) > static_cast<unsignedchar_type>(y[u]))
   1072   					return true;
   1073   				else if(static_cast<unsignedchar_type>(y[u]) > static_cast<unsignedchar_type>(x[u]))
   1074   					return false;
   1075   			}
   1076   			return x.size() > y.size();
   1077   		}
   1078   		unsigned fchar_offset;
   1079   	};
   1080 
   1081   	//A comparison functor for strings that assumes they are identical up to char_offset
   1082   	template<class data_type, class get_char, class get_length>
   1083   	struct offset_char_lessthan {
   1084   		offset_char_lessthan(unsigned char_offset) : fchar_offset(char_offset){}
   1085   		inline bool operator()(const data_type &x, const data_type &y) const
   1086   		{
   1087   			unsigned minSize = std::min(length(x), length(y));
   1088   			for(unsigned u = fchar_offset; u < minSize; ++u) {
   1089   				if(getchar(x, u) < getchar(y, u))
   1090   					return true;
   1091   				else if(getchar(y, u) < getchar(x, u))
   1092   					return false;
   1093   			}
   1094   			return length(x) < length(y);
   1095   		}
   1096   		unsigned fchar_offset;
   1097   		get_char getchar;
   1098   		get_length length;
   1099   	};
   1100 
   1101   	//String sorting recursive implementation
   1102   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
   1103   	inline void
   1104   	string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
   1105   		, unsigned cache_offset, std::vector<size_t> &bin_sizes)
   1106   	{
   1107   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
   1108   		//Iterate to the end of the empties.  If all empty, return
   1109   		while((*first).size() <= char_offset) {
   1110   			if(++first == last)
   1111   				return;
   1112   		}
   1113   		RandomAccessIter finish = last - 1;
   1114   		//Getting the last non-empty
   1115   		for(;(*finish).size() <= char_offset; --finish) { }
   1116   		++finish;
   1117   		//Offsetting on identical characters.  This section works a character at a time for optimal worst-case performance.
   1118   		update_offset(first, finish, char_offset);
   1119 
   1120   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
   1121   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
   1122   		const unsigned max_size = bin_count;
   1123   		const unsigned membin_count = bin_count + 1;
   1124   		unsigned cache_end;
   1125   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1;
   1126 
   1127   		//Calculating the size of each bin; this takes roughly 10% of runtime
   1128   		for (RandomAccessIter current = first; current != last; ++current) {
   1129   			if((*current).size() <= char_offset) {
   1130   				bin_sizes[0]++;
   1131   			}
   1132   			else
   1133   				bin_sizes[static_cast<unsignedchar_type>((*current)[char_offset]) + 1]++;
   1134   		}
   1135   		//Assign the bin positions
   1136   		bin_cache[cache_offset] = first;
   1137   		for(unsigned u = 0; u < membin_count - 1; u++)
   1138   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
   1139 
   1140   		//Swap into place
   1141   		RandomAccessIter nextbinstart = first;
   1142   		//handling empty bins
   1143   		RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
   1144   		nextbinstart +=	bin_sizes[0];
   1145   		RandomAccessIter * target_bin;
   1146   		//Iterating over each element in the bin of empties
   1147   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1148   			//empties belong in this bin
   1149   			while((*current).size() > char_offset) {
   1150   				target_bin = bins + static_cast<unsignedchar_type>((*current)[char_offset]);
   1151   				iter_swap(current, (*target_bin)++);
   1152   			}
   1153   		}
   1154   		*local_bin = nextbinstart;
   1155   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
   1156   		unsigned last_bin = bin_count - 1;
   1157   		for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin) { }
   1158   		//This dominates runtime, mostly in the swap and bin lookups
   1159   		for(unsigned u = 0; u < last_bin; ++u) {
   1160   			local_bin = bins + u;
   1161   			nextbinstart += bin_sizes[u + 1];
   1162   			//Iterating over each element in this bin
   1163   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1164   				//Swapping elements in current into place until the correct element has been swapped in
   1165   				for(target_bin = bins + static_cast<unsignedchar_type>((*current)[char_offset]);  target_bin != local_bin;
   1166   					target_bin = bins + static_cast<unsignedchar_type>((*current)[char_offset]))
   1167   					iter_swap(current, (*target_bin)++);
   1168   			}
   1169   			*local_bin = nextbinstart;
   1170   		}
   1171   		bins[last_bin] = last;
   1172   		//Recursing
   1173   		RandomAccessIter lastPos = bin_cache[cache_offset];
   1174   		//Skip this loop for empties
   1175   		for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) {
   1176   			size_t count = bin_cache[u] - lastPos;
   1177   			//don't sort unless there are at least two items to compare
   1178   			if(count < 2)
   1179   				continue;
   1180   			//using std::sort if its worst-case is better
   1181   			if(count < max_size)
   1182   				std::sort(lastPos, bin_cache[u], offset_lessthan<data_type, unsignedchar_type>(char_offset + 1));
   1183   			else
   1184   				string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
   1185   		}
   1186   	}
   1187 
   1188   	//Sorts strings in reverse order, with empties at the end
   1189   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
   1190   	inline void
   1191   	reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
   1192   		, unsigned cache_offset, std::vector<size_t> &bin_sizes)
   1193   	{
   1194   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
   1195   		RandomAccessIter curr = first;
   1196   		//Iterate to the end of the empties.  If all empty, return
   1197   		while((*curr).size() <= char_offset) {
   1198   			if(++curr == last)
   1199   				return;
   1200   		}
   1201   		//Getting the last non-empty
   1202   		while((*(--last)).size() <= char_offset) { }
   1203   		++last;
   1204   		//Offsetting on identical characters.  This section works a character at a time for optimal worst-case performance.
   1205   		update_offset(curr, last, char_offset);
   1206   		RandomAccessIter * target_bin;
   1207 
   1208   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
   1209   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
   1210   		const unsigned max_size = bin_count;
   1211   		const unsigned membin_count = bin_count + 1;
   1212   		const unsigned max_bin = bin_count - 1;
   1213   		unsigned cache_end;
   1214   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count);
   1215   		RandomAccessIter * end_bin = &(bin_cache[cache_offset + max_bin]);
   1216 
   1217   		//Calculating the size of each bin; this takes roughly 10% of runtime
   1218   		for (RandomAccessIter current = first; current != last; ++current) {
   1219   			if((*current).size() <= char_offset) {
   1220   				bin_sizes[bin_count]++;
   1221   			}
   1222   			else
   1223   				bin_sizes[max_bin - static_cast<unsignedchar_type>((*current)[char_offset])]++;
   1224   		}
   1225   		//Assign the bin positions
   1226   		bin_cache[cache_offset] = first;
   1227   		for(unsigned u = 0; u < membin_count - 1; u++)
   1228   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
   1229 
   1230   		//Swap into place
   1231   		RandomAccessIter nextbinstart = last;
   1232   		//handling empty bins
   1233   		RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
   1234   		RandomAccessIter lastFull = *local_bin;
   1235   		//Iterating over each element in the bin of empties
   1236   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1237   			//empties belong in this bin
   1238   			while((*current).size() > char_offset) {
   1239   				target_bin = end_bin - static_cast<unsignedchar_type>((*current)[char_offset]);
   1240   				iter_swap(current, (*target_bin)++);
   1241   			}
   1242   		}
   1243   		*local_bin = nextbinstart;
   1244   		nextbinstart = first;
   1245   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
   1246   		unsigned last_bin = max_bin;
   1247   		for(; last_bin && !bin_sizes[last_bin]; --last_bin) { }
   1248   		//This dominates runtime, mostly in the swap and bin lookups
   1249   		for(unsigned u = 0; u < last_bin; ++u) {
   1250   			local_bin = bins + u;
   1251   			nextbinstart += bin_sizes[u];
   1252   			//Iterating over each element in this bin
   1253   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1254   				//Swapping elements in current into place until the correct element has been swapped in
   1255   				for(target_bin = end_bin - static_cast<unsignedchar_type>((*current)[char_offset]);  target_bin != local_bin;
   1256   					target_bin = end_bin - static_cast<unsignedchar_type>((*current)[char_offset]))
   1257   					iter_swap(current, (*target_bin)++);
   1258   			}
   1259   			*local_bin = nextbinstart;
   1260   		}
   1261   		bins[last_bin] = lastFull;
   1262   		//Recursing
   1263   		RandomAccessIter lastPos = first;
   1264   		//Skip this loop for empties
   1265   		for(unsigned u = cache_offset; u <= cache_offset + last_bin; lastPos = bin_cache[u], ++u) {
   1266   			size_t count = bin_cache[u] - lastPos;
   1267   			//don't sort unless there are at least two items to compare
   1268   			if(count < 2)
   1269   				continue;
   1270   			//using std::sort if its worst-case is better
   1271   			if(count < max_size)
   1272   				std::sort(lastPos, bin_cache[u], offset_greaterthan<data_type, unsignedchar_type>(char_offset + 1));
   1273   			else
   1274   				reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
   1275   		}
   1276   	}
   1277 
   1278   	//String sorting recursive implementation
   1279   	template <class RandomAccessIter, class data_type, class unsignedchar_type, class get_char, class get_length>
   1280   	inline void
   1281   	string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
   1282   		, unsigned cache_offset, std::vector<size_t> &bin_sizes, get_char getchar, get_length length)
   1283   	{
   1284   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
   1285   		//Iterate to the end of the empties.  If all empty, return
   1286   		while(length(*first) <= char_offset) {
   1287   			if(++first == last)
   1288   				return;
   1289   		}
   1290   		RandomAccessIter finish = last - 1;
   1291   		//Getting the last non-empty
   1292   		for(;length(*finish) <= char_offset; --finish) { }
   1293   		++finish;
   1294   		update_offset(first, finish, char_offset, getchar, length);
   1295 
   1296   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
   1297   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
   1298   		const unsigned max_size = bin_count;
   1299   		const unsigned membin_count = bin_count + 1;
   1300   		unsigned cache_end;
   1301   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1;
   1302 
   1303   		//Calculating the size of each bin; this takes roughly 10% of runtime
   1304   		for (RandomAccessIter current = first; current != last; ++current) {
   1305   			if(length(*current) <= char_offset) {
   1306   				bin_sizes[0]++;
   1307   			}
   1308   			else
   1309   				bin_sizes[getchar((*current), char_offset) + 1]++;
   1310   		}
   1311   		//Assign the bin positions
   1312   		bin_cache[cache_offset] = first;
   1313   		for(unsigned u = 0; u < membin_count - 1; u++)
   1314   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
   1315 
   1316   		//Swap into place
   1317   		RandomAccessIter nextbinstart = first;
   1318   		//handling empty bins
   1319   		RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
   1320   		nextbinstart +=	bin_sizes[0];
   1321   		RandomAccessIter * target_bin;
   1322   		//Iterating over each element in the bin of empties
   1323   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1324   			//empties belong in this bin
   1325   			while(length(*current) > char_offset) {
   1326   				target_bin = bins + getchar((*current), char_offset);
   1327   				iter_swap(current, (*target_bin)++);
   1328   			}
   1329   		}
   1330   		*local_bin = nextbinstart;
   1331   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
   1332   		unsigned last_bin = bin_count - 1;
   1333   		for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin) { }
   1334   		//This dominates runtime, mostly in the swap and bin lookups
   1335   		for(unsigned ii = 0; ii < last_bin; ++ii) {
   1336   			local_bin = bins + ii;
   1337   			nextbinstart += bin_sizes[ii + 1];
   1338   			//Iterating over each element in this bin
   1339   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1340   				//Swapping elements in current into place until the correct element has been swapped in
   1341   				for(target_bin = bins + getchar((*current), char_offset);  target_bin != local_bin;
   1342   					target_bin = bins + getchar((*current), char_offset))
   1343   					iter_swap(current, (*target_bin)++);
   1344   			}
   1345   			*local_bin = nextbinstart;
   1346   		}
   1347   		bins[last_bin] = last;
   1348 
   1349   		//Recursing
   1350   		RandomAccessIter lastPos = bin_cache[cache_offset];
   1351   		//Skip this loop for empties
   1352   		for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) {
   1353   			size_t count = bin_cache[u] - lastPos;
   1354   			//don't sort unless there are at least two items to compare
   1355   			if(count < 2)
   1356   				continue;
   1357   			//using std::sort if its worst-case is better
   1358   			if(count < max_size)
   1359   				std::sort(lastPos, bin_cache[u], offset_char_lessthan<data_type, get_char, get_length>(char_offset + 1));
   1360   			else
   1361   				string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length>(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes, getchar, length);
   1362   		}
   1363   	}
   1364 
   1365   	//String sorting recursive implementation
   1366   	template <class RandomAccessIter, class data_type, class unsignedchar_type, class get_char, class get_length, class compare>
   1367   	inline void
   1368   	string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
   1369   		, unsigned cache_offset, std::vector<size_t> &bin_sizes, get_char getchar, get_length length, compare comp)
   1370   	{
   1371   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
   1372   		//Iterate to the end of the empties.  If all empty, return
   1373   		while(length(*first) <= char_offset) {
   1374   			if(++first == last)
   1375   				return;
   1376   		}
   1377   		RandomAccessIter finish = last - 1;
   1378   		//Getting the last non-empty
   1379   		for(;length(*finish) <= char_offset; --finish) { }
   1380   		++finish;
   1381   		update_offset(first, finish, char_offset, getchar, length);
   1382 
   1383   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
   1384   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
   1385   		const unsigned max_size = bin_count;
   1386   		const unsigned membin_count = bin_count + 1;
   1387   		unsigned cache_end;
   1388   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1;
   1389 
   1390   		//Calculating the size of each bin; this takes roughly 10% of runtime
   1391   		for (RandomAccessIter current = first; current != last; ++current) {
   1392   			if(length(*current) <= char_offset) {
   1393   				bin_sizes[0]++;
   1394   			}
   1395   			else
   1396   				bin_sizes[getchar((*current), char_offset) + 1]++;
   1397   		}
   1398   		//Assign the bin positions
   1399   		bin_cache[cache_offset] = first;
   1400   		for(unsigned u = 0; u < membin_count - 1; u++)
   1401   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
   1402 
   1403   		//Swap into place
   1404   		RandomAccessIter nextbinstart = first;
   1405   		//handling empty bins
   1406   		RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
   1407   		nextbinstart +=	bin_sizes[0];
   1408   		RandomAccessIter * target_bin;
   1409   		//Iterating over each element in the bin of empties
   1410   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1411   			//empties belong in this bin
   1412   			while(length(*current) > char_offset) {
   1413   				target_bin = bins + getchar((*current), char_offset);
   1414   				iter_swap(current, (*target_bin)++);
   1415   			}
   1416   		}
   1417   		*local_bin = nextbinstart;
   1418   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
   1419   		unsigned last_bin = bin_count - 1;
   1420   		for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin) { }
   1421   		//This dominates runtime, mostly in the swap and bin lookups
   1422   		for(unsigned u = 0; u < last_bin; ++u) {
   1423   			local_bin = bins + u;
   1424   			nextbinstart += bin_sizes[u + 1];
   1425   			//Iterating over each element in this bin
   1426   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1427   				//Swapping elements in current into place until the correct element has been swapped in
   1428   				for(target_bin = bins + getchar((*current), char_offset);  target_bin != local_bin;
   1429   					target_bin = bins + getchar((*current), char_offset))
   1430   					iter_swap(current, (*target_bin)++);
   1431   			}
   1432   			*local_bin = nextbinstart;
   1433   		}
   1434   		bins[last_bin] = last;
   1435 
   1436   		//Recursing
   1437   		RandomAccessIter lastPos = bin_cache[cache_offset];
   1438   		//Skip this loop for empties
   1439   		for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) {
   1440   			size_t count = bin_cache[u] - lastPos;
   1441   			//don't sort unless there are at least two items to compare
   1442   			if(count < 2)
   1443   				continue;
   1444   			//using std::sort if its worst-case is better
   1445   			if(count < max_size)
   1446   				std::sort(lastPos, bin_cache[u], comp);
   1447   			else
   1448   				string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(lastPos
   1449   					, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes, getchar, length, comp);
   1450   		}
   1451   	}
   1452 
   1453   	//Sorts strings in reverse order, with empties at the end
   1454   	template <class RandomAccessIter, class data_type, class unsignedchar_type, class get_char, class get_length, class compare>
   1455   	inline void
   1456   	reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
   1457   		, unsigned cache_offset, std::vector<size_t> &bin_sizes, get_char getchar, get_length length, compare comp)
   1458   	{
   1459   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
   1460   		RandomAccessIter curr = first;
   1461   		//Iterate to the end of the empties.  If all empty, return
   1462   		while(length(*curr) <= char_offset) {
   1463   			if(++curr == last)
   1464   				return;
   1465   		}
   1466   		//Getting the last non-empty
   1467   		while(length(*(--last)) <= char_offset) { }
   1468   		++last;
   1469   		//Offsetting on identical characters.  This section works a character at a time for optimal worst-case performance.
   1470   		update_offset(first, last, char_offset, getchar, length);
   1471 
   1472   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
   1473   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
   1474   		const unsigned max_size = bin_count;
   1475   		const unsigned membin_count = bin_count + 1;
   1476   		const unsigned max_bin = bin_count - 1;
   1477   		unsigned cache_end;
   1478   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count);
   1479   		RandomAccessIter *end_bin = &(bin_cache[cache_offset + max_bin]);
   1480 
   1481   		//Calculating the size of each bin; this takes roughly 10% of runtime
   1482   		for (RandomAccessIter current = first; current != last; ++current) {
   1483   			if(length(*current) <= char_offset) {
   1484   				bin_sizes[bin_count]++;
   1485   			}
   1486   			else
   1487   				bin_sizes[max_bin - getchar((*current), char_offset)]++;
   1488   		}
   1489   		//Assign the bin positions
   1490   		bin_cache[cache_offset] = first;
   1491   		for(unsigned u = 0; u < membin_count - 1; u++)
   1492   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
   1493 
   1494   		//Swap into place
   1495   		RandomAccessIter nextbinstart = last;
   1496   		//handling empty bins
   1497   		RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
   1498   		RandomAccessIter lastFull = *local_bin;
   1499   		RandomAccessIter * target_bin;
   1500   		//Iterating over each element in the bin of empties
   1501   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1502   			//empties belong in this bin
   1503   			while(length(*current) > char_offset) {
   1504   				target_bin = end_bin - getchar((*current), char_offset);
   1505   				iter_swap(current, (*target_bin)++);
   1506   			}
   1507   		}
   1508   		*local_bin = nextbinstart;
   1509   		nextbinstart = first;
   1510   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
   1511   		unsigned last_bin = max_bin;
   1512   		for(; last_bin && !bin_sizes[last_bin]; --last_bin) { }
   1513   		//This dominates runtime, mostly in the swap and bin lookups
   1514   		for(unsigned u = 0; u < last_bin; ++u) {
   1515   			local_bin = bins + u;
   1516   			nextbinstart += bin_sizes[u];
   1517   			//Iterating over each element in this bin
   1518   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1519   				//Swapping elements in current into place until the correct element has been swapped in
   1520   				for(target_bin = end_bin - getchar((*current), char_offset);  target_bin != local_bin;
   1521   					target_bin = end_bin - getchar((*current), char_offset))
   1522   					iter_swap(current, (*target_bin)++);
   1523   			}
   1524   			*local_bin = nextbinstart;
   1525   		}
   1526   		bins[last_bin] = lastFull;
   1527   		//Recursing
   1528   		RandomAccessIter lastPos = first;
   1529   		//Skip this loop for empties
   1530   		for(unsigned u = cache_offset; u <= cache_offset + last_bin; lastPos = bin_cache[u], ++u) {
   1531   			size_t count = bin_cache[u] - lastPos;
   1532   			//don't sort unless there are at least two items to compare
   1533   			if(count < 2)
   1534   				continue;
   1535   			//using std::sort if its worst-case is better
   1536   			if(count < max_size)
   1537   				std::sort(lastPos, bin_cache[u], comp);
   1538   			else
   1539   				reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(lastPos
   1540   					, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes, getchar, length, comp);
   1541   		}
   1542   	}
   1543 
   1544   	//Holds the bin vector and makes the initial recursive call
   1545   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
   1546   	inline void
   1547   	string_sort(RandomAccessIter first, RandomAccessIter last, data_type, unsignedchar_type)
   1548   	{
   1549   		std::vector<size_t> bin_sizes;
   1550   		std::vector<RandomAccessIter> bin_cache;
   1551   		string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(first, last, 0, bin_cache, 0, bin_sizes);
   1552   	}
   1553 
   1554   	//Holds the bin vector and makes the initial recursive call
   1555   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
   1556   	inline void
   1557   	reverse_string_sort(RandomAccessIter first, RandomAccessIter last, data_type, unsignedchar_type)
   1558   	{
   1559   		std::vector<size_t> bin_sizes;
   1560   		std::vector<RandomAccessIter> bin_cache;
   1561   		reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(first, last, 0, bin_cache, 0, bin_sizes);
   1562   	}
   1563 
   1564   	//Holds the bin vector and makes the initial recursive call
   1565   	template <class RandomAccessIter, class get_char, class get_length, class data_type, class unsignedchar_type>
   1566   	inline void
   1567   	string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, data_type, unsignedchar_type)
   1568   	{
   1569   		std::vector<size_t> bin_sizes;
   1570   		std::vector<RandomAccessIter> bin_cache;
   1571   		string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length);
   1572   	}
   1573 
   1574   	//Holds the bin vector and makes the initial recursive call
   1575   	template <class RandomAccessIter, class get_char, class get_length, class compare, class data_type, class unsignedchar_type>
   1576   	inline void
   1577   	string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp, data_type, unsignedchar_type)
   1578   	{
   1579   		std::vector<size_t> bin_sizes;
   1580   		std::vector<RandomAccessIter> bin_cache;
   1581   		string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
   1582   	}
   1583 
   1584   	//Holds the bin vector and makes the initial recursive call
   1585   	template <class RandomAccessIter, class get_char, class get_length, class compare, class data_type, class unsignedchar_type>
   1586   	inline void
   1587   	reverse_string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp, data_type, unsignedchar_type)
   1588   	{
   1589   		std::vector<size_t> bin_sizes;
   1590   		std::vector<RandomAccessIter> bin_cache;
   1591   		reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
   1592   	}
   1593   }
   1594 
   1595   //Allows character-type overloads
   1596   template <class RandomAccessIter, class unsignedchar_type>
   1597   inline void string_sort(RandomAccessIter first, RandomAccessIter last, unsignedchar_type unused)
   1598   {
   1599   	//Don't sort if it's too small to optimize
   1600   	if(last - first < detail::MIN_SORT_SIZE)
   1601   		std::sort(first, last);
   1602   	else
   1603   		detail::string_sort(first, last, *first, unused);
   1604   }
   1605 
   1606   //Top-level sorting call; wraps using default of unsigned char
   1607   template <class RandomAccessIter>
   1608   inline void string_sort(RandomAccessIter first, RandomAccessIter last)
   1609   {
   1610   	unsigned char unused = '\0';
   1611   	string_sort(first, last, unused);
   1612   }
   1613 
   1614   //Allows character-type overloads
   1615   template <class RandomAccessIter, class compare, class unsignedchar_type>
   1616   inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, compare comp, unsignedchar_type unused)
   1617   {
   1618   	//Don't sort if it's too small to optimize
   1619   	if(last - first < detail::MIN_SORT_SIZE)
   1620   		std::sort(first, last, comp);
   1621   	else
   1622   		detail::reverse_string_sort(first, last, *first, unused);
   1623   }
   1624 
   1625   //Top-level sorting call; wraps using default of unsigned char
   1626   template <class RandomAccessIter, class compare>
   1627   inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, compare comp)
   1628   {
   1629   	unsigned char unused = '\0';
   1630   	reverse_string_sort(first, last, comp, unused);
   1631   }
   1632 
   1633   template <class RandomAccessIter, class get_char, class get_length>
   1634   inline void string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length)
   1635   {
   1636   	//Don't sort if it's too small to optimize
   1637   	if(last - first < detail::MIN_SORT_SIZE)
   1638   		std::sort(first, last);
   1639   	else {
   1640   		//skipping past empties at the beginning, which allows us to get the character type
   1641   		//.empty() is not used so as not to require a user declaration of it
   1642   		while(!length(*first)) {
   1643   			if(++first == last)
   1644   				return;
   1645   		}
   1646   		detail::string_sort(first, last, getchar, length, *first, getchar((*first), 0));
   1647   	}
   1648   }
   1649 
   1650   template <class RandomAccessIter, class get_char, class get_length, class compare>
   1651   inline void string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp)
   1652   {
   1653   	//Don't sort if it's too small to optimize
   1654   	if(last - first < detail::MIN_SORT_SIZE)
   1655   		std::sort(first, last, comp);
   1656   	else {
   1657   		//skipping past empties at the beginning, which allows us to get the character type
   1658   		//.empty() is not used so as not to require a user declaration of it
   1659   		while(!length(*first)) {
   1660   			if(++first == last)
   1661   				return;
   1662   		}
   1663   		detail::string_sort(first, last, getchar, length, comp, *first, getchar((*first), 0));
   1664   	}
   1665   }
   1666 
   1667   template <class RandomAccessIter, class get_char, class get_length, class compare>
   1668   inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp)
   1669   {
   1670   	//Don't sort if it's too small to optimize
   1671   	if(last - first < detail::MIN_SORT_SIZE)
   1672   		std::sort(first, last, comp);
   1673   	else {
   1674   		//skipping past empties at the beginning, which allows us to get the character type
   1675   		//.empty() is not used so as not to require a user declaration of it
   1676   		while(!length(*(--last))) {
   1677   			//Note: if there is just one non-empty, and it's at the beginning, then it's already in sorted order
   1678   			if(first == last)
   1679   				return;
   1680   		}
   1681   		//making last just after the end of the non-empty part of the array
   1682   		++last;
   1683   		detail::reverse_string_sort(first, last, getchar, length, comp, *first, getchar((*first), 0));
   1684   	}
   1685   }
   1686 }
   1687 
   1688 #endif
   1689