Home | History | Annotate | Download | only in spreadsortlib
      1 //Templated spread_sort library
      2 
      3 //          Copyright Steven J. Ross 2001 - 2009.
      4 // Distributed under the Boost Software License, Version 1.0.
      5 //    (See accompanying file LICENSE_1_0.txt or copy at
      6 //          http://www.boost.org/LICENSE_1_0.txt)
      7 
      8 //  See http://www.boost.org/ for updates, documentation, and revision history.
      9 
     10 /*
     11 Some improvements suggested by:
     12 Phil Endecott and Frank Gennari
     13 Cygwin fix provided by:
     14 Scott McMurray
     15 */
     16 
     17 #ifndef BOOST_SPREAD_SORT_H
     18 #define BOOST_SPREAD_SORT_H
     19 #include <algorithm>
     20 #include <cstring>
     21 #include <vector>
     22 #include "webrtc/system_wrappers/source/spreadsortlib/constants.hpp"
     23 
     24 #ifdef getchar
     25 // This file should not use getchar as a template parameter name.
     26 #undef getchar
     27 #endif
     28 
     29 namespace boost {
     30   namespace detail {
     31   	//This only works on unsigned data types
     32   	template <typename T>
     33   	inline unsigned
     34   	rough_log_2_size(const T& input)
     35   	{
     36   		unsigned result = 0;
     37   		//The && is necessary on some compilers to avoid infinite loops; it doesn't significantly impair performance
     38   		while((input >> result) && (result < (8*sizeof(T)))) ++result;
     39   		return result;
     40   	}
     41 
     42   	//Gets the maximum size which we'll call spread_sort on to control worst-case performance
     43   	//Maintains both a minimum size to recurse and a check of distribution size versus count
     44   	//This is called for a set of bins, instead of bin-by-bin, to avoid performance overhead
     45   	inline size_t
     46   	get_max_count(unsigned log_range, size_t count)
     47   	{
     48   		unsigned divisor = rough_log_2_size(count);
     49   		//Making sure the divisor is positive
     50   		if(divisor > LOG_MEAN_BIN_SIZE)
     51   			divisor -= LOG_MEAN_BIN_SIZE;
     52   		else
     53   			divisor = 1;
     54   		unsigned relative_width = (LOG_CONST * log_range)/((divisor > MAX_SPLITS) ? MAX_SPLITS : divisor);
     55   		//Don't try to bitshift more than the size of an element
     56   		if((8*sizeof(size_t)) <= relative_width)
     57   			relative_width = (8*sizeof(size_t)) - 1;
     58   		return (size_t)1 << ((relative_width < (LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT)) ?
     59   			(LOG_MEAN_BIN_SIZE + LOG_MIN_SPLIT_COUNT) :  relative_width);
     60   	}
     61 
     62   	//Find the minimum and maximum using <
     63   	template <class RandomAccessIter>
     64   	inline void
     65   	find_extremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min)
     66   	{
     67   		min = max = current;
     68   		//Start from the second item, as max and min are initialized to the first
     69   		while(++current < last) {
     70   			if(*max < *current)
     71   				max = current;
     72   			else if(*current < *min)
     73   				min = current;
     74   		}
     75   	}
     76 
     77   	//Uses a user-defined comparison operator to find minimum and maximum
     78   	template <class RandomAccessIter, class compare>
     79   	inline void
     80   	find_extremes(RandomAccessIter current, RandomAccessIter last, RandomAccessIter & max, RandomAccessIter & min, compare comp)
     81   	{
     82   		min = max = current;
     83   		while(++current < last) {
     84   			if(comp(*max, *current))
     85   				max = current;
     86   			else if(comp(*current, *min))
     87   				min = current;
     88   		}
     89   	}
     90 
     91   	//Gets a non-negative right bit shift to operate as a logarithmic divisor
     92   	inline int
     93   	get_log_divisor(size_t count, unsigned log_range)
     94   	{
     95   		int log_divisor;
     96   		//If we can finish in one iteration without exceeding either (2 to the MAX_SPLITS) or n bins, do so
     97   		if((log_divisor = log_range - rough_log_2_size(count)) <= 0 && log_range < MAX_SPLITS)
     98   			log_divisor = 0;
     99   		else {
    100   			//otherwise divide the data into an optimized number of pieces
    101   			log_divisor += LOG_MEAN_BIN_SIZE;
    102   			if(log_divisor < 0)
    103   				log_divisor = 0;
    104   			//Cannot exceed MAX_SPLITS or cache misses slow down bin lookups dramatically
    105   			if((log_range - log_divisor) > MAX_SPLITS)
    106   				log_divisor = log_range - MAX_SPLITS;
    107   		}
    108   		return log_divisor;
    109   	}
    110 
    111   	template <class RandomAccessIter>
    112   	inline RandomAccessIter *
    113   	size_bins(std::vector<size_t> &bin_sizes, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset, unsigned &cache_end, unsigned bin_count)
    114   	{
    115   		//Assure space for the size of each bin, followed by initializing sizes
    116   		if(bin_count > bin_sizes.size())
    117   			bin_sizes.resize(bin_count);
    118   		for(size_t u = 0; u < bin_count; u++)
    119   			bin_sizes[u] = 0;
    120   		//Make sure there is space for the bins
    121   		cache_end = cache_offset + bin_count;
    122   		if(cache_end > bin_cache.size())
    123   			bin_cache.resize(cache_end);
    124   		return &(bin_cache[cache_offset]);
    125   	}
    126 
    127   	//Implementation for recursive integer sorting
    128   	template <class RandomAccessIter, class div_type, class data_type>
    129   	inline void
    130   	spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    131   				  , std::vector<size_t> &bin_sizes)
    132   	{
    133   		//This step is roughly 10% of runtime, but it helps avoid worst-case behavior and improve behavior with real data
    134   		//If you know the maximum and minimum ahead of time, you can pass those values in and skip this step for the first iteration
    135   		RandomAccessIter max, min;
    136   		find_extremes(first, last, max, min);
    137   		//max and min will be the same (the first item) iff all values are equivalent
    138   		if(max == min)
    139   			return;
    140   		RandomAccessIter * target_bin;
    141   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(*max >> 0) - (*min >> 0)));
    142   		div_type div_min = *min >> log_divisor;
    143   		div_type div_max = *max >> log_divisor;
    144   		unsigned bin_count = div_max - div_min + 1;
    145   		unsigned cache_end;
    146   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    147 
    148   		//Calculating the size of each bin; this takes roughly 10% of runtime
    149   		for (RandomAccessIter current = first; current != last;)
    150   			bin_sizes[(*(current++) >> log_divisor) - div_min]++;
    151   		//Assign the bin positions
    152   		bins[0] = first;
    153   		for(unsigned u = 0; u < bin_count - 1; u++)
    154   			bins[u + 1] = bins[u] + bin_sizes[u];
    155 
    156   		//Swap into place
    157   		//This dominates runtime, mostly in the swap and bin lookups
    158   		RandomAccessIter nextbinstart = first;
    159   		for(unsigned u = 0; u < bin_count - 1; ++u) {
    160   			RandomAccessIter * local_bin = bins + u;
    161   			nextbinstart += bin_sizes[u];
    162   			//Iterating over each element in this bin
    163   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
    164   				//Swapping elements in current into place until the correct element has been swapped in
    165   				for(target_bin = (bins + ((*current >> log_divisor) - div_min));  target_bin != local_bin;
    166   					target_bin = bins + ((*current >> log_divisor) - div_min)) {
    167   					//3-way swap; this is about 1% faster than a 2-way swap with integers
    168   					//The main advantage is less copies are involved per item put in the correct place
    169   					data_type tmp;
    170   					RandomAccessIter b = (*target_bin)++;
    171   					RandomAccessIter * b_bin = bins + ((*b >> log_divisor) - div_min);
    172   					if (b_bin != local_bin) {
    173   						RandomAccessIter c = (*b_bin)++;
    174   						tmp = *c;
    175   						*c = *b;
    176   					}
    177   					else
    178   						tmp = *b;
    179   					*b = *current;
    180   					*current = tmp;
    181   				}
    182   			}
    183   			*local_bin = nextbinstart;
    184   		}
    185   		bins[bin_count - 1] = last;
    186 
    187   		//If we've bucketsorted, the array is sorted and we should skip recursion
    188   		if(!log_divisor)
    189   			return;
    190 
    191   		//Recursing; log_divisor is the remaining range
    192   		size_t max_count = get_max_count(log_divisor, last - first);
    193   		RandomAccessIter lastPos = first;
    194   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
    195   			size_t count = bin_cache[u] - lastPos;
    196   			//don't sort unless there are at least two items to compare
    197   			if(count < 2)
    198   				continue;
    199   			//using std::sort if its worst-case is better
    200   			if(count < max_count)
    201   				std::sort(lastPos, bin_cache[u]);
    202   			else
    203   				spread_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
    204   		}
    205   	}
    206 
    207   	//Generic bitshift-based 3-way swapping code
    208   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
    209   	inline void inner_swap_loop(RandomAccessIter * bins, const RandomAccessIter & nextbinstart, unsigned ii, right_shift &shift
    210   		, const unsigned log_divisor, const div_type div_min)
    211   	{
    212   		RandomAccessIter * local_bin = bins + ii;
    213   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
    214   			for(RandomAccessIter * target_bin = (bins + (shift(*current, log_divisor) - div_min));  target_bin != local_bin;
    215   				target_bin = bins + (shift(*current, log_divisor) - div_min)) {
    216   				data_type tmp;
    217   				RandomAccessIter b = (*target_bin)++;
    218   				RandomAccessIter * b_bin = bins + (shift(*b, log_divisor) - div_min);
    219   				//Three-way swap; if the item to be swapped doesn't belong in the current bin, swap it to where it belongs
    220   				if (b_bin != local_bin) {
    221   					RandomAccessIter c = (*b_bin)++;
    222   					tmp = *c;
    223   					*c = *b;
    224   				}
    225   				//Note: we could increment current once the swap is done in this case, but that seems to impair performance
    226   				else
    227   					tmp = *b;
    228   				*b = *current;
    229   				*current = tmp;
    230   			}
    231   		}
    232   		*local_bin = nextbinstart;
    233   	}
    234 
    235   	//Standard swapping wrapper for ascending values
    236   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
    237   	inline void swap_loop(RandomAccessIter * bins, RandomAccessIter & nextbinstart, unsigned ii, right_shift &shift
    238   		, const std::vector<size_t> &bin_sizes, const unsigned log_divisor, const div_type div_min)
    239   	{
    240   		nextbinstart += bin_sizes[ii];
    241   		inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, log_divisor, div_min);
    242   	}
    243 
    244   	//Functor implementation for recursive sorting
    245   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
    246   	inline void
    247   	spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    248   					, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
    249   	{
    250   		RandomAccessIter max, min;
    251   		find_extremes(first, last, max, min, comp);
    252   		if(max == min)
    253   			return;
    254   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(shift(*max, 0)) - (shift(*min, 0))));
    255   		div_type div_min = shift(*min, log_divisor);
    256   		div_type div_max = shift(*max, log_divisor);
    257   		unsigned bin_count = div_max - div_min + 1;
    258   		unsigned cache_end;
    259   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    260 
    261   		//Calculating the size of each bin
    262   		for (RandomAccessIter current = first; current != last;)
    263   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
    264   		bins[0] = first;
    265   		for(unsigned u = 0; u < bin_count - 1; u++)
    266   			bins[u + 1] = bins[u] + bin_sizes[u];
    267 
    268   		//Swap into place
    269   		RandomAccessIter nextbinstart = first;
    270   		for(unsigned u = 0; u < bin_count - 1; ++u)
    271   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, bin_sizes, log_divisor, div_min);
    272   		bins[bin_count - 1] = last;
    273 
    274   		//If we've bucketsorted, the array is sorted and we should skip recursion
    275   		if(!log_divisor)
    276   			return;
    277 
    278   		//Recursing
    279   		size_t max_count = get_max_count(log_divisor, last - first);
    280   		RandomAccessIter lastPos = first;
    281   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
    282   			size_t count = bin_cache[u] - lastPos;
    283   			if(count < 2)
    284   				continue;
    285   			if(count < max_count)
    286   				std::sort(lastPos, bin_cache[u], comp);
    287   			else
    288   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift, comp);
    289   		}
    290   	}
    291 
    292   	//Functor implementation for recursive sorting with only Shift overridden
    293   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
    294   	inline void
    295   	spread_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    296   					, std::vector<size_t> &bin_sizes, right_shift shift)
    297   	{
    298   		RandomAccessIter max, min;
    299   		find_extremes(first, last, max, min);
    300   		if(max == min)
    301   			return;
    302   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(shift(*max, 0)) - (shift(*min, 0))));
    303   		div_type div_min = shift(*min, log_divisor);
    304   		div_type div_max = shift(*max, log_divisor);
    305   		unsigned bin_count = div_max - div_min + 1;
    306   		unsigned cache_end;
    307   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    308 
    309   		//Calculating the size of each bin
    310   		for (RandomAccessIter current = first; current != last;)
    311   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
    312   		bins[0] = first;
    313   		for(unsigned u = 0; u < bin_count - 1; u++)
    314   			bins[u + 1] = bins[u] + bin_sizes[u];
    315 
    316   		//Swap into place
    317   		RandomAccessIter nextbinstart = first;
    318   		for(unsigned ii = 0; ii < bin_count - 1; ++ii)
    319   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
    320   		bins[bin_count - 1] = last;
    321 
    322   		//If we've bucketsorted, the array is sorted and we should skip recursion
    323   		if(!log_divisor)
    324   			return;
    325 
    326   		//Recursing
    327   		size_t max_count = get_max_count(log_divisor, last - first);
    328   		RandomAccessIter lastPos = first;
    329   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
    330   			size_t count = bin_cache[u] - lastPos;
    331   			if(count < 2)
    332   				continue;
    333   			if(count < max_count)
    334   				std::sort(lastPos, bin_cache[u]);
    335   			else
    336   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift);
    337   		}
    338   	}
    339 
    340   	//Holds the bin vector and makes the initial recursive call
    341   	template <class RandomAccessIter, class div_type, class data_type>
    342   	inline void
    343   	spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type)
    344   	{
    345   		std::vector<size_t> bin_sizes;
    346   		std::vector<RandomAccessIter> bin_cache;
    347   		spread_sort_rec<RandomAccessIter, div_type, data_type>(first, last, bin_cache, 0, bin_sizes);
    348   	}
    349 
    350   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
    351   	inline void
    352   	spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift, compare comp)
    353   	{
    354   		std::vector<size_t> bin_sizes;
    355   		std::vector<RandomAccessIter> bin_cache;
    356   		spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(first, last, bin_cache, 0, bin_sizes, shift, comp);
    357   	}
    358 
    359   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
    360   	inline void
    361   	spread_sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift)
    362   	{
    363   		std::vector<size_t> bin_sizes;
    364   		std::vector<RandomAccessIter> bin_cache;
    365   		spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift);
    366   	}
    367   }
    368 
    369   //Top-level sorting call for integers
    370   template <class RandomAccessIter>
    371   inline void integer_sort(RandomAccessIter first, RandomAccessIter last)
    372   {
    373   	//Don't sort if it's too small to optimize
    374   	if(last - first < detail::MIN_SORT_SIZE)
    375   		std::sort(first, last);
    376   	else
    377   		detail::spread_sort(first, last, *first >> 0, *first);
    378   }
    379 
    380   //integer_sort with functors
    381   template <class RandomAccessIter, class right_shift, class compare>
    382   inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
    383   						right_shift shift, compare comp) {
    384   	if(last - first < detail::MIN_SORT_SIZE)
    385   		std::sort(first, last, comp);
    386   	else
    387   		detail::spread_sort(first, last, shift(*first, 0), *first, shift, comp);
    388   }
    389 
    390   //integer_sort with right_shift functor
    391   template <class RandomAccessIter, class right_shift>
    392   inline void integer_sort(RandomAccessIter first, RandomAccessIter last,
    393   						right_shift shift) {
    394   	if(last - first < detail::MIN_SORT_SIZE)
    395   		std::sort(first, last);
    396   	else
    397   		detail::spread_sort(first, last, shift(*first, 0), *first, shift);
    398   }
    399 
    400   //------------------------------------------------------ float_sort source --------------------------------------
    401   //Casts a RandomAccessIter to the specified data type
    402   template<class cast_type, class RandomAccessIter>
    403   inline cast_type
    404   cast_float_iter(const RandomAccessIter & floatiter)
    405   {
    406   	cast_type result;
    407   	std::memcpy(&result, &(*floatiter), sizeof(cast_type));
    408   	return result;
    409   }
    410 
    411   //Casts a data element to the specified datinner_float_a type
    412   template<class data_type, class cast_type>
    413   inline cast_type
    414   mem_cast(const data_type & data)
    415   {
    416   	cast_type result;
    417   	std::memcpy(&result, &data, sizeof(cast_type));
    418   	return result;
    419   }
    420 
    421   namespace detail {
    422   	template <class RandomAccessIter, class div_type, class right_shift>
    423   	inline void
    424   	find_extremes(RandomAccessIter current, RandomAccessIter last, div_type & max, div_type & min, right_shift shift)
    425   	{
    426   		min = max = shift(*current, 0);
    427   		while(++current < last) {
    428   			div_type value = shift(*current, 0);
    429   			if(max < value)
    430   				max = value;
    431   			else if(value < min)
    432   				min = value;
    433   		}
    434   	}
    435 
    436   	//Specialized swap loops for floating-point casting
    437   	template <class RandomAccessIter, class div_type, class data_type>
    438   	inline void inner_float_swap_loop(RandomAccessIter * bins, const RandomAccessIter & nextbinstart, unsigned ii
    439   		, const unsigned log_divisor, const div_type div_min)
    440   	{
    441   		RandomAccessIter * local_bin = bins + ii;
    442   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
    443   			for(RandomAccessIter * target_bin = (bins + ((cast_float_iter<div_type, RandomAccessIter>(current) >> log_divisor) - div_min));  target_bin != local_bin;
    444   				target_bin = bins + ((cast_float_iter<div_type, RandomAccessIter>(current) >> log_divisor) - div_min)) {
    445   				data_type tmp;
    446   				RandomAccessIter b = (*target_bin)++;
    447   				RandomAccessIter * b_bin = bins + ((cast_float_iter<div_type, RandomAccessIter>(b) >> log_divisor) - div_min);
    448   				//Three-way swap; if the item to be swapped doesn't belong in the current bin, swap it to where it belongs
    449   				if (b_bin != local_bin) {
    450   					RandomAccessIter c = (*b_bin)++;
    451   					tmp = *c;
    452   					*c = *b;
    453   				}
    454   				else
    455   					tmp = *b;
    456   				*b = *current;
    457   				*current = tmp;
    458   			}
    459   		}
    460   		*local_bin = nextbinstart;
    461   	}
    462 
    463   	template <class RandomAccessIter, class div_type, class data_type>
    464   	inline void float_swap_loop(RandomAccessIter * bins, RandomAccessIter & nextbinstart, unsigned ii
    465   		, const std::vector<size_t> &bin_sizes, const unsigned log_divisor, const div_type div_min)
    466   	{
    467   		nextbinstart += bin_sizes[ii];
    468   		inner_float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, ii, log_divisor, div_min);
    469   	}
    470 
    471   	template <class RandomAccessIter, class cast_type>
    472   	inline void
    473   	find_extremes(RandomAccessIter current, RandomAccessIter last, cast_type & max, cast_type & min)
    474   	{
    475   		min = max = cast_float_iter<cast_type, RandomAccessIter>(current);
    476   		while(++current < last) {
    477   			cast_type value = cast_float_iter<cast_type, RandomAccessIter>(current);
    478   			if(max < value)
    479   				max = value;
    480   			else if(value < min)
    481   				min = value;
    482   		}
    483   	}
    484 
    485   	//Special-case sorting of positive floats with casting instead of a right_shift
    486   	template <class RandomAccessIter, class div_type, class data_type>
    487   	inline void
    488   	positive_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    489   					, std::vector<size_t> &bin_sizes)
    490   	{
    491   		div_type max, min;
    492   		find_extremes(first, last, max, min);
    493   		if(max == min)
    494   			return;
    495   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
    496   		div_type div_min = min >> log_divisor;
    497   		div_type div_max = max >> log_divisor;
    498   		unsigned bin_count = div_max - div_min + 1;
    499   		unsigned cache_end;
    500   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    501 
    502   		//Calculating the size of each bin
    503   		for (RandomAccessIter current = first; current != last;)
    504   			bin_sizes[(cast_float_iter<div_type, RandomAccessIter>(current++) >> log_divisor) - div_min]++;
    505   		bins[0] = first;
    506   		for(unsigned u = 0; u < bin_count - 1; u++)
    507   			bins[u + 1] = bins[u] + bin_sizes[u];
    508 
    509   		//Swap into place
    510   		RandomAccessIter nextbinstart = first;
    511   		for(unsigned u = 0; u < bin_count - 1; ++u)
    512   			float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, u, bin_sizes, log_divisor, div_min);
    513   		bins[bin_count - 1] = last;
    514 
    515   		//Return if we've completed bucketsorting
    516   		if(!log_divisor)
    517   			return;
    518 
    519   		//Recursing
    520   		size_t max_count = get_max_count(log_divisor, last - first);
    521   		RandomAccessIter lastPos = first;
    522   		for(unsigned u = cache_offset; u < cache_end; lastPos = bin_cache[u], ++u) {
    523   			size_t count = bin_cache[u] - lastPos;
    524   			if(count < 2)
    525   				continue;
    526   			if(count < max_count)
    527   				std::sort(lastPos, bin_cache[u]);
    528   			else
    529   				positive_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
    530   		}
    531   	}
    532 
    533   	//Sorting negative_ float_s
    534   	//Note that bins are iterated in reverse order because max_neg_float = min_neg_int
    535   	template <class RandomAccessIter, class div_type, class data_type>
    536   	inline void
    537   	negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    538   					, std::vector<size_t> &bin_sizes)
    539   	{
    540   		div_type max, min;
    541   		find_extremes(first, last, max, min);
    542   		if(max == min)
    543   			return;
    544   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
    545   		div_type div_min = min >> log_divisor;
    546   		div_type div_max = max >> log_divisor;
    547   		unsigned bin_count = div_max - div_min + 1;
    548   		unsigned cache_end;
    549   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    550 
    551   		//Calculating the size of each bin
    552   		for (RandomAccessIter current = first; current != last;)
    553   			bin_sizes[(cast_float_iter<div_type, RandomAccessIter>(current++) >> log_divisor) - div_min]++;
    554   		bins[bin_count - 1] = first;
    555   		for(int ii = bin_count - 2; ii >= 0; --ii)
    556   			bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
    557 
    558   		//Swap into place
    559   		RandomAccessIter nextbinstart = first;
    560   		//The last bin will always have the correct elements in it
    561   		for(int ii = bin_count - 1; ii > 0; --ii)
    562   			float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, ii, bin_sizes, log_divisor, div_min);
    563   		//Since we don't process the last bin, we need to update its end position
    564   		bin_cache[cache_offset] = last;
    565 
    566   		//Return if we've completed bucketsorting
    567   		if(!log_divisor)
    568   			return;
    569 
    570   		//Recursing
    571   		size_t max_count = get_max_count(log_divisor, last - first);
    572   		RandomAccessIter lastPos = first;
    573   		for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) {
    574   			size_t count = bin_cache[ii] - lastPos;
    575   			if(count < 2)
    576   				continue;
    577   			if(count < max_count)
    578   				std::sort(lastPos, bin_cache[ii]);
    579   			else
    580   				negative_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
    581   		}
    582   	}
    583 
    584   	//Sorting negative_ float_s
    585   	//Note that bins are iterated in reverse order because max_neg_float = min_neg_int
    586   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
    587   	inline void
    588   	negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    589   					, std::vector<size_t> &bin_sizes, right_shift shift)
    590   	{
    591   		div_type max, min;
    592   		find_extremes(first, last, max, min, shift);
    593   		if(max == min)
    594   			return;
    595   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
    596   		div_type div_min = min >> log_divisor;
    597   		div_type div_max = max >> log_divisor;
    598   		unsigned bin_count = div_max - div_min + 1;
    599   		unsigned cache_end;
    600   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    601 
    602   		//Calculating the size of each bin
    603   		for (RandomAccessIter current = first; current != last;)
    604   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
    605   		bins[bin_count - 1] = first;
    606   		for(int ii = bin_count - 2; ii >= 0; --ii)
    607   			bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
    608 
    609   		//Swap into place
    610   		RandomAccessIter nextbinstart = first;
    611   		//The last bin will always have the correct elements in it
    612   		for(int ii = bin_count - 1; ii > 0; --ii)
    613   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
    614   		//Since we don't process the last bin, we need to update its end position
    615   		bin_cache[cache_offset] = last;
    616 
    617   		//Return if we've completed bucketsorting
    618   		if(!log_divisor)
    619   			return;
    620 
    621   		//Recursing
    622   		size_t max_count = get_max_count(log_divisor, last - first);
    623   		RandomAccessIter lastPos = first;
    624   		for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) {
    625   			size_t count = bin_cache[ii] - lastPos;
    626   			if(count < 2)
    627   				continue;
    628   			if(count < max_count)
    629   				std::sort(lastPos, bin_cache[ii]);
    630   			else
    631   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift);
    632   		}
    633   	}
    634 
    635   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
    636   	inline void
    637   	negative_float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    638   					, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
    639   	{
    640   		div_type max, min;
    641   		find_extremes(first, last, max, min, shift);
    642   		if(max == min)
    643   			return;
    644   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
    645   		div_type div_min = min >> log_divisor;
    646   		div_type div_max = max >> log_divisor;
    647   		unsigned bin_count = div_max - div_min + 1;
    648   		unsigned cache_end;
    649   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    650 
    651   		//Calculating the size of each bin
    652   		for (RandomAccessIter current = first; current != last;)
    653   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
    654   		bins[bin_count - 1] = first;
    655   		for(int ii = bin_count - 2; ii >= 0; --ii)
    656   			bins[ii] = bins[ii + 1] + bin_sizes[ii + 1];
    657 
    658   		//Swap into place
    659   		RandomAccessIter nextbinstart = first;
    660   		//The last bin will always have the correct elements in it
    661   		for(int ii = bin_count - 1; ii > 0; --ii)
    662   			swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, ii, shift, bin_sizes, log_divisor, div_min);
    663   		//Since we don't process the last bin, we need to update its end position
    664   		bin_cache[cache_offset] = last;
    665 
    666   		//Return if we've completed bucketsorting
    667   		if(!log_divisor)
    668   			return;
    669 
    670   		//Recursing
    671   		size_t max_count = get_max_count(log_divisor, last - first);
    672   		RandomAccessIter lastPos = first;
    673   		for(int ii = cache_end - 1; ii >= (int)cache_offset; lastPos = bin_cache[ii], --ii) {
    674   			size_t count = bin_cache[ii] - lastPos;
    675   			if(count < 2)
    676   				continue;
    677   			if(count < max_count)
    678   				std::sort(lastPos, bin_cache[ii], comp);
    679   			else
    680   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift, compare>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift, comp);
    681   		}
    682   	}
    683 
    684   	//Casting special-case for floating-point sorting
    685   	template <class RandomAccessIter, class div_type, class data_type>
    686   	inline void
    687   	float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    688   					, std::vector<size_t> &bin_sizes)
    689   	{
    690   		div_type max, min;
    691   		find_extremes(first, last, max, min);
    692   		if(max == min)
    693   			return;
    694   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
    695   		div_type div_min = min >> log_divisor;
    696   		div_type div_max = max >> log_divisor;
    697   		unsigned bin_count = div_max - div_min + 1;
    698   		unsigned cache_end;
    699   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    700 
    701   		//Calculating the size of each bin
    702   		for (RandomAccessIter current = first; current != last;)
    703   			bin_sizes[(cast_float_iter<div_type, RandomAccessIter>(current++) >> log_divisor) - div_min]++;
    704   		//The index of the first positive bin
    705   		div_type first_positive = (div_min < 0) ? -div_min : 0;
    706   		//Resetting if all bins are negative
    707   		if(cache_offset + first_positive > cache_end)
    708   			first_positive = cache_end - cache_offset;
    709   		//Reversing the order of the negative bins
    710   		//Note that because of the negative/positive ordering direction flip
    711   		//We can not depend upon bin order and positions matching up
    712   		//so bin_sizes must be reused to contain the end of the bin
    713   		if(first_positive > 0) {
    714   			bins[first_positive - 1] = first;
    715   			for(int ii = first_positive - 2; ii >= 0; --ii) {
    716   				bins[ii] = first + bin_sizes[ii + 1];
    717   				bin_sizes[ii] += bin_sizes[ii + 1];
    718   			}
    719   			//Handling positives following negatives
    720   			if((unsigned)first_positive < bin_count) {
    721   				bins[first_positive] = first + bin_sizes[0];
    722   				bin_sizes[first_positive] += bin_sizes[0];
    723   			}
    724   		}
    725   		else
    726   			bins[0] = first;
    727   		for(unsigned u = first_positive; u < bin_count - 1; u++) {
    728   			bins[u + 1] = first + bin_sizes[u];
    729   			bin_sizes[u + 1] += bin_sizes[u];
    730   		}
    731 
    732   		//Swap into place
    733   		RandomAccessIter nextbinstart = first;
    734   		for(unsigned u = 0; u < bin_count; ++u) {
    735   			nextbinstart = first + bin_sizes[u];
    736   			inner_float_swap_loop<RandomAccessIter, div_type, data_type>(bins, nextbinstart, u, log_divisor, div_min);
    737   		}
    738 
    739   		if(!log_divisor)
    740   			return;
    741 
    742   		//Handling negative values first
    743   		size_t max_count = get_max_count(log_divisor, last - first);
    744   		RandomAccessIter lastPos = first;
    745   		for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) {
    746   			size_t count = bin_cache[ii] - lastPos;
    747   			if(count < 2)
    748   				continue;
    749   			if(count < max_count)
    750   				std::sort(lastPos, bin_cache[ii]);
    751   			//sort negative values using reversed-bin spread_sort
    752   			else
    753   				negative_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes);
    754   		}
    755 
    756   		for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) {
    757   			size_t count = bin_cache[u] - lastPos;
    758   			if(count < 2)
    759   				continue;
    760   			if(count < max_count)
    761   				std::sort(lastPos, bin_cache[u]);
    762   			//sort positive values using normal spread_sort
    763   			else
    764   				positive_float_sort_rec<RandomAccessIter, div_type, data_type>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes);
    765   		}
    766   	}
    767 
    768   	//Functor implementation for recursive sorting
    769   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
    770   	inline void
    771   	float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    772   					, std::vector<size_t> &bin_sizes, right_shift shift)
    773   	{
    774   		div_type max, min;
    775   		find_extremes(first, last, max, min, shift);
    776   		if(max == min)
    777   			return;
    778   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
    779   		div_type div_min = min >> log_divisor;
    780   		div_type div_max = max >> log_divisor;
    781   		unsigned bin_count = div_max - div_min + 1;
    782   		unsigned cache_end;
    783   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    784 
    785   		//Calculating the size of each bin
    786   		for (RandomAccessIter current = first; current != last;)
    787   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
    788   		//The index of the first positive bin
    789   		div_type first_positive = (div_min < 0) ? -div_min : 0;
    790   		//Resetting if all bins are negative
    791   		if(cache_offset + first_positive > cache_end)
    792   			first_positive = cache_end - cache_offset;
    793   		//Reversing the order of the negative bins
    794   		//Note that because of the negative/positive ordering direction flip
    795   		//We can not depend upon bin order and positions matching up
    796   		//so bin_sizes must be reused to contain the end of the bin
    797   		if(first_positive > 0) {
    798   			bins[first_positive - 1] = first;
    799   			for(int ii = first_positive - 2; ii >= 0; --ii) {
    800   				bins[ii] = first + bin_sizes[ii + 1];
    801   				bin_sizes[ii] += bin_sizes[ii + 1];
    802   			}
    803   			//Handling positives following negatives
    804   			if((unsigned)first_positive < bin_count) {
    805   				bins[first_positive] = first + bin_sizes[0];
    806   				bin_sizes[first_positive] += bin_sizes[0];
    807   			}
    808   		}
    809   		else
    810   			bins[0] = first;
    811   		for(unsigned u = first_positive; u < bin_count - 1; u++) {
    812   			bins[u + 1] = first + bin_sizes[u];
    813   			bin_sizes[u + 1] += bin_sizes[u];
    814   		}
    815 
    816   		//Swap into place
    817   		RandomAccessIter nextbinstart = first;
    818   		for(unsigned u = 0; u < bin_count; ++u) {
    819   			nextbinstart = first + bin_sizes[u];
    820   			inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, log_divisor, div_min);
    821   		}
    822 
    823   		//Return if we've completed bucketsorting
    824   		if(!log_divisor)
    825   			return;
    826 
    827   		//Handling negative values first
    828   		size_t max_count = get_max_count(log_divisor, last - first);
    829   		RandomAccessIter lastPos = first;
    830   		for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) {
    831   			size_t count = bin_cache[ii] - lastPos;
    832   			if(count < 2)
    833   				continue;
    834   			if(count < max_count)
    835   				std::sort(lastPos, bin_cache[ii]);
    836   			//sort negative values using reversed-bin spread_sort
    837   			else
    838   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift);
    839   		}
    840 
    841   		for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) {
    842   			size_t count = bin_cache[u] - lastPos;
    843   			if(count < 2)
    844   				continue;
    845   			if(count < max_count)
    846   				std::sort(lastPos, bin_cache[u]);
    847   			//sort positive values using normal spread_sort
    848   			else
    849   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift);
    850   		}
    851   	}
    852 
    853   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
    854   	inline void
    855   	float_sort_rec(RandomAccessIter first, RandomAccessIter last, std::vector<RandomAccessIter> &bin_cache, unsigned cache_offset
    856   					, std::vector<size_t> &bin_sizes, right_shift shift, compare comp)
    857   	{
    858   		div_type max, min;
    859   		find_extremes(first, last, max, min, shift);
    860   		if(max == min)
    861   			return;
    862   		unsigned log_divisor = get_log_divisor(last - first, rough_log_2_size((size_t)(max) - min));
    863   		div_type div_min = min >> log_divisor;
    864   		div_type div_max = max >> log_divisor;
    865   		unsigned bin_count = div_max - div_min + 1;
    866   		unsigned cache_end;
    867   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, bin_count);
    868 
    869   		//Calculating the size of each bin
    870   		for (RandomAccessIter current = first; current != last;)
    871   			bin_sizes[shift(*(current++), log_divisor) - div_min]++;
    872   		//The index of the first positive bin
    873   		div_type first_positive = (div_min < 0) ? -div_min : 0;
    874   		//Resetting if all bins are negative
    875   		if(cache_offset + first_positive > cache_end)
    876   			first_positive = cache_end - cache_offset;
    877   		//Reversing the order of the negative bins
    878   		//Note that because of the negative/positive ordering direction flip
    879   		//We can not depend upon bin order and positions matching up
    880   		//so bin_sizes must be reused to contain the end of the bin
    881   		if(first_positive > 0) {
    882   			bins[first_positive - 1] = first;
    883   			for(int ii = first_positive - 2; ii >= 0; --ii) {
    884   				bins[ii] = first + bin_sizes[ii + 1];
    885   				bin_sizes[ii] += bin_sizes[ii + 1];
    886   			}
    887   			//Handling positives following negatives
    888   			if((unsigned)first_positive < bin_count) {
    889   				bins[first_positive] = first + bin_sizes[0];
    890   				bin_sizes[first_positive] += bin_sizes[0];
    891   			}
    892   		}
    893   		else
    894   			bins[0] = first;
    895   		for(unsigned u = first_positive; u < bin_count - 1; u++) {
    896   			bins[u + 1] = first + bin_sizes[u];
    897   			bin_sizes[u + 1] += bin_sizes[u];
    898   		}
    899 
    900   		//Swap into place
    901   		RandomAccessIter nextbinstart = first;
    902   		for(unsigned u = 0; u < bin_count; ++u) {
    903   			nextbinstart = first + bin_sizes[u];
    904   			inner_swap_loop<RandomAccessIter, div_type, data_type, right_shift>(bins, nextbinstart, u, shift, log_divisor, div_min);
    905   		}
    906 
    907   		//Return if we've completed bucketsorting
    908   		if(!log_divisor)
    909   			return;
    910 
    911   		//Handling negative values first
    912   		size_t max_count = get_max_count(log_divisor, last - first);
    913   		RandomAccessIter lastPos = first;
    914   		for(int ii = cache_offset + first_positive - 1; ii >= (int)cache_offset ; lastPos = bin_cache[ii--]) {
    915   			size_t count = bin_cache[ii] - lastPos;
    916   			if(count < 2)
    917   				continue;
    918   			if(count < max_count)
    919   				std::sort(lastPos, bin_cache[ii]);
    920   			//sort negative values using reversed-bin spread_sort
    921   			else
    922   				negative_float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[ii], bin_cache, cache_end, bin_sizes, shift, comp);
    923   		}
    924 
    925   		for(unsigned u = cache_offset + first_positive; u < cache_end; lastPos = bin_cache[u], ++u) {
    926   			size_t count = bin_cache[u] - lastPos;
    927   			if(count < 2)
    928   				continue;
    929   			if(count < max_count)
    930   				std::sort(lastPos, bin_cache[u]);
    931   			//sort positive values using normal spread_sort
    932   			else
    933   				spread_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(lastPos, bin_cache[u], bin_cache, cache_end, bin_sizes, shift, comp);
    934   		}
    935   	}
    936 
    937   	template <class RandomAccessIter, class cast_type, class data_type>
    938   	inline void
    939   	float_Sort(RandomAccessIter first, RandomAccessIter last, cast_type, data_type)
    940   	{
    941   		std::vector<size_t> bin_sizes;
    942   		std::vector<RandomAccessIter> bin_cache;
    943   		float_sort_rec<RandomAccessIter, cast_type, data_type>(first, last, bin_cache, 0, bin_sizes);
    944   	}
    945 
    946   	template <class RandomAccessIter, class div_type, class data_type, class right_shift>
    947   	inline void
    948   	float_Sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift)
    949   	{
    950   		std::vector<size_t> bin_sizes;
    951   		std::vector<RandomAccessIter> bin_cache;
    952   		float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift);
    953   	}
    954 
    955   	template <class RandomAccessIter, class div_type, class data_type, class right_shift, class compare>
    956   	inline void
    957   	float_Sort(RandomAccessIter first, RandomAccessIter last, div_type, data_type, right_shift shift, compare comp)
    958   	{
    959   		std::vector<size_t> bin_sizes;
    960   		std::vector<RandomAccessIter> bin_cache;
    961   		float_sort_rec<RandomAccessIter, div_type, data_type, right_shift>(first, last, bin_cache, 0, bin_sizes, shift, comp);
    962   	}
    963   }
    964 
    965   //float_sort with casting
    966   //The cast_type must be equal in size to the data type, and must be a signed integer
    967   template <class RandomAccessIter, class cast_type>
    968   inline void float_sort_cast(RandomAccessIter first, RandomAccessIter last, cast_type cVal)
    969   {
    970   	if(last - first < detail::MIN_SORT_SIZE)
    971   		std::sort(first, last);
    972   	else
    973   		detail::float_Sort(first, last, cVal, *first);
    974   }
    975 
    976   //float_sort with casting to an int
    977   //Only use this with IEEE floating-point numbers
    978   template <class RandomAccessIter>
    979   inline void float_sort_cast_to_int(RandomAccessIter first, RandomAccessIter last)
    980   {
    981   	int cVal = 0;
    982   	float_sort_cast(first, last, cVal);
    983   }
    984 
    985   //float_sort with functors
    986   template <class RandomAccessIter, class right_shift>
    987   inline void float_sort(RandomAccessIter first, RandomAccessIter last, right_shift shift)
    988   {
    989   	if(last - first < detail::MIN_SORT_SIZE)
    990   		std::sort(first, last);
    991   	else
    992   		detail::float_Sort(first, last, shift(*first, 0), *first, shift);
    993   }
    994 
    995   template <class RandomAccessIter, class right_shift, class compare>
    996   inline void float_sort(RandomAccessIter first, RandomAccessIter last, right_shift shift, compare comp)
    997   {
    998   	if(last - first < detail::MIN_SORT_SIZE)
    999   		std::sort(first, last, comp);
   1000   	else
   1001   		detail::float_Sort(first, last, shift(*first, 0), *first, shift, comp);
   1002   }
   1003 
   1004   //------------------------------------------------- string_sort source ---------------------------------------------
   1005   namespace detail {
   1006   	//Offsetting on identical characters.  This function works a character at a time for optimal worst-case performance.
   1007   	template<class RandomAccessIter>
   1008   	inline void
   1009   	update_offset(RandomAccessIter first, RandomAccessIter finish, unsigned &char_offset)
   1010   	{
   1011   		unsigned nextOffset = char_offset;
   1012   		bool done = false;
   1013   		while(!done) {
   1014   			RandomAccessIter curr = first;
   1015   			do {
   1016   				//ignore empties, but if the nextOffset would exceed the length or not match, exit; we've found the last matching character
   1017   				if((*curr).size() > char_offset && ((*curr).size() <= (nextOffset + 1) || (*curr)[nextOffset] != (*first)[nextOffset])) {
   1018   					done = true;
   1019   					break;
   1020   				}
   1021   			} while(++curr != finish);
   1022   			if(!done)
   1023   				++nextOffset;
   1024   		}
   1025   		char_offset = nextOffset;
   1026   	}
   1027 
   1028   	//Offsetting on identical characters.  This function works a character at a time for optimal worst-case performance.
   1029   	template<class RandomAccessIter, class get_char, class get_length>
   1030   	inline void
   1031   	update_offset(RandomAccessIter first, RandomAccessIter finish, unsigned &char_offset, get_char getchar, get_length length)
   1032   	{
   1033   		unsigned nextOffset = char_offset;
   1034   		bool done = false;
   1035   		while(!done) {
   1036   			RandomAccessIter curr = first;
   1037   			do {
   1038   				//ignore empties, but if the nextOffset would exceed the length or not match, exit; we've found the last matching character
   1039   				if(length(*curr) > char_offset && (length(*curr) <= (nextOffset + 1) || getchar((*curr), nextOffset) != getchar((*first), nextOffset))) {
   1040   					done = true;
   1041   					break;
   1042   				}
   1043   			} while(++curr != finish);
   1044   			if(!done)
   1045   				++nextOffset;
   1046   		}
   1047   		char_offset = nextOffset;
   1048   	}
   1049 
   1050   	//A comparison functor for strings that assumes they are identical up to char_offset
   1051   	template<class data_type, class unsignedchar_type>
   1052   	struct offset_lessthan {
   1053   		offset_lessthan(unsigned char_offset) : fchar_offset(char_offset){}
   1054   		inline bool operator()(const data_type &x, const data_type &y) const
   1055   		{
   1056   			unsigned minSize = std::min(x.size(), y.size());
   1057   			for(unsigned u = fchar_offset; u < minSize; ++u) {
   1058   				if(static_cast<unsignedchar_type>(x[u]) < static_cast<unsignedchar_type>(y[u]))
   1059   					return true;
   1060   				else if(static_cast<unsignedchar_type>(y[u]) < static_cast<unsignedchar_type>(x[u]))
   1061   					return false;
   1062   			}
   1063   			return x.size() < y.size();
   1064   		}
   1065   		unsigned fchar_offset;
   1066   	};
   1067 
   1068   	//A comparison functor for strings that assumes they are identical up to char_offset
   1069   	template<class data_type, class unsignedchar_type>
   1070   	struct offset_greaterthan {
   1071   		offset_greaterthan(unsigned char_offset) : fchar_offset(char_offset){}
   1072   		inline bool operator()(const data_type &x, const data_type &y) const
   1073   		{
   1074   			unsigned minSize = std::min(x.size(), y.size());
   1075   			for(unsigned u = fchar_offset; u < minSize; ++u) {
   1076   				if(static_cast<unsignedchar_type>(x[u]) > static_cast<unsignedchar_type>(y[u]))
   1077   					return true;
   1078   				else if(static_cast<unsignedchar_type>(y[u]) > static_cast<unsignedchar_type>(x[u]))
   1079   					return false;
   1080   			}
   1081   			return x.size() > y.size();
   1082   		}
   1083   		unsigned fchar_offset;
   1084   	};
   1085 
   1086   	//A comparison functor for strings that assumes they are identical up to char_offset
   1087   	template<class data_type, class get_char, class get_length>
   1088   	struct offset_char_lessthan {
   1089   		offset_char_lessthan(unsigned char_offset) : fchar_offset(char_offset){}
   1090   		inline bool operator()(const data_type &x, const data_type &y) const
   1091   		{
   1092   			unsigned minSize = std::min(length(x), length(y));
   1093   			for(unsigned u = fchar_offset; u < minSize; ++u) {
   1094   				if(getchar(x, u) < getchar(y, u))
   1095   					return true;
   1096   				else if(getchar(y, u) < getchar(x, u))
   1097   					return false;
   1098   			}
   1099   			return length(x) < length(y);
   1100   		}
   1101   		unsigned fchar_offset;
   1102   		get_char getchar;
   1103   		get_length length;
   1104   	};
   1105 
   1106   	//String sorting recursive implementation
   1107   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
   1108   	inline void
   1109   	string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
   1110   		, unsigned cache_offset, std::vector<size_t> &bin_sizes)
   1111   	{
   1112   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
   1113   		//Iterate to the end of the empties.  If all empty, return
   1114   		while((*first).size() <= char_offset) {
   1115   			if(++first == last)
   1116   				return;
   1117   		}
   1118   		RandomAccessIter finish = last - 1;
   1119   		//Getting the last non-empty
   1120   		for(;(*finish).size() <= char_offset; --finish) { }
   1121   		++finish;
   1122   		//Offsetting on identical characters.  This section works a character at a time for optimal worst-case performance.
   1123   		update_offset(first, finish, char_offset);
   1124 
   1125   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
   1126   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
   1127   		const unsigned max_size = bin_count;
   1128   		const unsigned membin_count = bin_count + 1;
   1129   		unsigned cache_end;
   1130   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1;
   1131 
   1132   		//Calculating the size of each bin; this takes roughly 10% of runtime
   1133   		for (RandomAccessIter current = first; current != last; ++current) {
   1134   			if((*current).size() <= char_offset) {
   1135   				bin_sizes[0]++;
   1136   			}
   1137   			else
   1138   				bin_sizes[static_cast<unsignedchar_type>((*current)[char_offset]) + 1]++;
   1139   		}
   1140   		//Assign the bin positions
   1141   		bin_cache[cache_offset] = first;
   1142   		for(unsigned u = 0; u < membin_count - 1; u++)
   1143   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
   1144 
   1145   		//Swap into place
   1146   		RandomAccessIter nextbinstart = first;
   1147   		//handling empty bins
   1148   		RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
   1149   		nextbinstart +=	bin_sizes[0];
   1150   		RandomAccessIter * target_bin;
   1151   		//Iterating over each element in the bin of empties
   1152   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1153   			//empties belong in this bin
   1154   			while((*current).size() > char_offset) {
   1155   				target_bin = bins + static_cast<unsignedchar_type>((*current)[char_offset]);
   1156   				iter_swap(current, (*target_bin)++);
   1157   			}
   1158   		}
   1159   		*local_bin = nextbinstart;
   1160   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
   1161   		unsigned last_bin = bin_count - 1;
   1162   		for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin) { }
   1163   		//This dominates runtime, mostly in the swap and bin lookups
   1164   		for(unsigned u = 0; u < last_bin; ++u) {
   1165   			local_bin = bins + u;
   1166   			nextbinstart += bin_sizes[u + 1];
   1167   			//Iterating over each element in this bin
   1168   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1169   				//Swapping elements in current into place until the correct element has been swapped in
   1170   				for(target_bin = bins + static_cast<unsignedchar_type>((*current)[char_offset]);  target_bin != local_bin;
   1171   					target_bin = bins + static_cast<unsignedchar_type>((*current)[char_offset]))
   1172   					iter_swap(current, (*target_bin)++);
   1173   			}
   1174   			*local_bin = nextbinstart;
   1175   		}
   1176   		bins[last_bin] = last;
   1177   		//Recursing
   1178   		RandomAccessIter lastPos = bin_cache[cache_offset];
   1179   		//Skip this loop for empties
   1180   		for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) {
   1181   			size_t count = bin_cache[u] - lastPos;
   1182   			//don't sort unless there are at least two items to compare
   1183   			if(count < 2)
   1184   				continue;
   1185   			//using std::sort if its worst-case is better
   1186   			if(count < max_size)
   1187   				std::sort(lastPos, bin_cache[u], offset_lessthan<data_type, unsignedchar_type>(char_offset + 1));
   1188   			else
   1189   				string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
   1190   		}
   1191   	}
   1192 
   1193   	//Sorts strings in reverse order, with empties at the end
   1194   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
   1195   	inline void
   1196   	reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
   1197   		, unsigned cache_offset, std::vector<size_t> &bin_sizes)
   1198   	{
   1199   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
   1200   		RandomAccessIter curr = first;
   1201   		//Iterate to the end of the empties.  If all empty, return
   1202   		while((*curr).size() <= char_offset) {
   1203   			if(++curr == last)
   1204   				return;
   1205   		}
   1206   		//Getting the last non-empty
   1207   		while((*(--last)).size() <= char_offset) { }
   1208   		++last;
   1209   		//Offsetting on identical characters.  This section works a character at a time for optimal worst-case performance.
   1210   		update_offset(curr, last, char_offset);
   1211   		RandomAccessIter * target_bin;
   1212 
   1213   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
   1214   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
   1215   		const unsigned max_size = bin_count;
   1216   		const unsigned membin_count = bin_count + 1;
   1217   		const unsigned max_bin = bin_count - 1;
   1218   		unsigned cache_end;
   1219   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count);
   1220   		RandomAccessIter * end_bin = &(bin_cache[cache_offset + max_bin]);
   1221 
   1222   		//Calculating the size of each bin; this takes roughly 10% of runtime
   1223   		for (RandomAccessIter current = first; current != last; ++current) {
   1224   			if((*current).size() <= char_offset) {
   1225   				bin_sizes[bin_count]++;
   1226   			}
   1227   			else
   1228   				bin_sizes[max_bin - static_cast<unsignedchar_type>((*current)[char_offset])]++;
   1229   		}
   1230   		//Assign the bin positions
   1231   		bin_cache[cache_offset] = first;
   1232   		for(unsigned u = 0; u < membin_count - 1; u++)
   1233   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
   1234 
   1235   		//Swap into place
   1236   		RandomAccessIter nextbinstart = last;
   1237   		//handling empty bins
   1238   		RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
   1239   		RandomAccessIter lastFull = *local_bin;
   1240   		//Iterating over each element in the bin of empties
   1241   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1242   			//empties belong in this bin
   1243   			while((*current).size() > char_offset) {
   1244   				target_bin = end_bin - static_cast<unsignedchar_type>((*current)[char_offset]);
   1245   				iter_swap(current, (*target_bin)++);
   1246   			}
   1247   		}
   1248   		*local_bin = nextbinstart;
   1249   		nextbinstart = first;
   1250   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
   1251   		unsigned last_bin = max_bin;
   1252   		for(; last_bin && !bin_sizes[last_bin]; --last_bin) { }
   1253   		//This dominates runtime, mostly in the swap and bin lookups
   1254   		for(unsigned u = 0; u < last_bin; ++u) {
   1255   			local_bin = bins + u;
   1256   			nextbinstart += bin_sizes[u];
   1257   			//Iterating over each element in this bin
   1258   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1259   				//Swapping elements in current into place until the correct element has been swapped in
   1260   				for(target_bin = end_bin - static_cast<unsignedchar_type>((*current)[char_offset]);  target_bin != local_bin;
   1261   					target_bin = end_bin - static_cast<unsignedchar_type>((*current)[char_offset]))
   1262   					iter_swap(current, (*target_bin)++);
   1263   			}
   1264   			*local_bin = nextbinstart;
   1265   		}
   1266   		bins[last_bin] = lastFull;
   1267   		//Recursing
   1268   		RandomAccessIter lastPos = first;
   1269   		//Skip this loop for empties
   1270   		for(unsigned u = cache_offset; u <= cache_offset + last_bin; lastPos = bin_cache[u], ++u) {
   1271   			size_t count = bin_cache[u] - lastPos;
   1272   			//don't sort unless there are at least two items to compare
   1273   			if(count < 2)
   1274   				continue;
   1275   			//using std::sort if its worst-case is better
   1276   			if(count < max_size)
   1277   				std::sort(lastPos, bin_cache[u], offset_greaterthan<data_type, unsignedchar_type>(char_offset + 1));
   1278   			else
   1279   				reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes);
   1280   		}
   1281   	}
   1282 
   1283   	//String sorting recursive implementation
   1284   	template <class RandomAccessIter, class data_type, class unsignedchar_type, class get_char, class get_length>
   1285   	inline void
   1286   	string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
   1287   		, unsigned cache_offset, std::vector<size_t> &bin_sizes, get_char getchar, get_length length)
   1288   	{
   1289   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
   1290   		//Iterate to the end of the empties.  If all empty, return
   1291   		while(length(*first) <= char_offset) {
   1292   			if(++first == last)
   1293   				return;
   1294   		}
   1295   		RandomAccessIter finish = last - 1;
   1296   		//Getting the last non-empty
   1297   		for(;length(*finish) <= char_offset; --finish) { }
   1298   		++finish;
   1299   		update_offset(first, finish, char_offset, getchar, length);
   1300 
   1301   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
   1302   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
   1303   		const unsigned max_size = bin_count;
   1304   		const unsigned membin_count = bin_count + 1;
   1305   		unsigned cache_end;
   1306   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1;
   1307 
   1308   		//Calculating the size of each bin; this takes roughly 10% of runtime
   1309   		for (RandomAccessIter current = first; current != last; ++current) {
   1310   			if(length(*current) <= char_offset) {
   1311   				bin_sizes[0]++;
   1312   			}
   1313   			else
   1314   				bin_sizes[getchar((*current), char_offset) + 1]++;
   1315   		}
   1316   		//Assign the bin positions
   1317   		bin_cache[cache_offset] = first;
   1318   		for(unsigned u = 0; u < membin_count - 1; u++)
   1319   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
   1320 
   1321   		//Swap into place
   1322   		RandomAccessIter nextbinstart = first;
   1323   		//handling empty bins
   1324   		RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
   1325   		nextbinstart +=	bin_sizes[0];
   1326   		RandomAccessIter * target_bin;
   1327   		//Iterating over each element in the bin of empties
   1328   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1329   			//empties belong in this bin
   1330   			while(length(*current) > char_offset) {
   1331   				target_bin = bins + getchar((*current), char_offset);
   1332   				iter_swap(current, (*target_bin)++);
   1333   			}
   1334   		}
   1335   		*local_bin = nextbinstart;
   1336   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
   1337   		unsigned last_bin = bin_count - 1;
   1338   		for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin) { }
   1339   		//This dominates runtime, mostly in the swap and bin lookups
   1340   		for(unsigned ii = 0; ii < last_bin; ++ii) {
   1341   			local_bin = bins + ii;
   1342   			nextbinstart += bin_sizes[ii + 1];
   1343   			//Iterating over each element in this bin
   1344   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1345   				//Swapping elements in current into place until the correct element has been swapped in
   1346   				for(target_bin = bins + getchar((*current), char_offset);  target_bin != local_bin;
   1347   					target_bin = bins + getchar((*current), char_offset))
   1348   					iter_swap(current, (*target_bin)++);
   1349   			}
   1350   			*local_bin = nextbinstart;
   1351   		}
   1352   		bins[last_bin] = last;
   1353 
   1354   		//Recursing
   1355   		RandomAccessIter lastPos = bin_cache[cache_offset];
   1356   		//Skip this loop for empties
   1357   		for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) {
   1358   			size_t count = bin_cache[u] - lastPos;
   1359   			//don't sort unless there are at least two items to compare
   1360   			if(count < 2)
   1361   				continue;
   1362   			//using std::sort if its worst-case is better
   1363   			if(count < max_size)
   1364   				std::sort(lastPos, bin_cache[u], offset_char_lessthan<data_type, get_char, get_length>(char_offset + 1));
   1365   			else
   1366   				string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length>(lastPos, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes, getchar, length);
   1367   		}
   1368   	}
   1369 
   1370   	//String sorting recursive implementation
   1371   	template <class RandomAccessIter, class data_type, class unsignedchar_type, class get_char, class get_length, class compare>
   1372   	inline void
   1373   	string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
   1374   		, unsigned cache_offset, std::vector<size_t> &bin_sizes, get_char getchar, get_length length, compare comp)
   1375   	{
   1376   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
   1377   		//Iterate to the end of the empties.  If all empty, return
   1378   		while(length(*first) <= char_offset) {
   1379   			if(++first == last)
   1380   				return;
   1381   		}
   1382   		RandomAccessIter finish = last - 1;
   1383   		//Getting the last non-empty
   1384   		for(;length(*finish) <= char_offset; --finish) { }
   1385   		++finish;
   1386   		update_offset(first, finish, char_offset, getchar, length);
   1387 
   1388   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
   1389   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
   1390   		const unsigned max_size = bin_count;
   1391   		const unsigned membin_count = bin_count + 1;
   1392   		unsigned cache_end;
   1393   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count) + 1;
   1394 
   1395   		//Calculating the size of each bin; this takes roughly 10% of runtime
   1396   		for (RandomAccessIter current = first; current != last; ++current) {
   1397   			if(length(*current) <= char_offset) {
   1398   				bin_sizes[0]++;
   1399   			}
   1400   			else
   1401   				bin_sizes[getchar((*current), char_offset) + 1]++;
   1402   		}
   1403   		//Assign the bin positions
   1404   		bin_cache[cache_offset] = first;
   1405   		for(unsigned u = 0; u < membin_count - 1; u++)
   1406   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
   1407 
   1408   		//Swap into place
   1409   		RandomAccessIter nextbinstart = first;
   1410   		//handling empty bins
   1411   		RandomAccessIter * local_bin = &(bin_cache[cache_offset]);
   1412   		nextbinstart +=	bin_sizes[0];
   1413   		RandomAccessIter * target_bin;
   1414   		//Iterating over each element in the bin of empties
   1415   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1416   			//empties belong in this bin
   1417   			while(length(*current) > char_offset) {
   1418   				target_bin = bins + getchar((*current), char_offset);
   1419   				iter_swap(current, (*target_bin)++);
   1420   			}
   1421   		}
   1422   		*local_bin = nextbinstart;
   1423   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
   1424   		unsigned last_bin = bin_count - 1;
   1425   		for(; last_bin && !bin_sizes[last_bin + 1]; --last_bin) { }
   1426   		//This dominates runtime, mostly in the swap and bin lookups
   1427   		for(unsigned u = 0; u < last_bin; ++u) {
   1428   			local_bin = bins + u;
   1429   			nextbinstart += bin_sizes[u + 1];
   1430   			//Iterating over each element in this bin
   1431   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1432   				//Swapping elements in current into place until the correct element has been swapped in
   1433   				for(target_bin = bins + getchar((*current), char_offset);  target_bin != local_bin;
   1434   					target_bin = bins + getchar((*current), char_offset))
   1435   					iter_swap(current, (*target_bin)++);
   1436   			}
   1437   			*local_bin = nextbinstart;
   1438   		}
   1439   		bins[last_bin] = last;
   1440 
   1441   		//Recursing
   1442   		RandomAccessIter lastPos = bin_cache[cache_offset];
   1443   		//Skip this loop for empties
   1444   		for(unsigned u = cache_offset + 1; u < cache_offset + last_bin + 2; lastPos = bin_cache[u], ++u) {
   1445   			size_t count = bin_cache[u] - lastPos;
   1446   			//don't sort unless there are at least two items to compare
   1447   			if(count < 2)
   1448   				continue;
   1449   			//using std::sort if its worst-case is better
   1450   			if(count < max_size)
   1451   				std::sort(lastPos, bin_cache[u], comp);
   1452   			else
   1453   				string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(lastPos
   1454   					, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes, getchar, length, comp);
   1455   		}
   1456   	}
   1457 
   1458   	//Sorts strings in reverse order, with empties at the end
   1459   	template <class RandomAccessIter, class data_type, class unsignedchar_type, class get_char, class get_length, class compare>
   1460   	inline void
   1461   	reverse_string_sort_rec(RandomAccessIter first, RandomAccessIter last, unsigned char_offset, std::vector<RandomAccessIter> &bin_cache
   1462   		, unsigned cache_offset, std::vector<size_t> &bin_sizes, get_char getchar, get_length length, compare comp)
   1463   	{
   1464   		//This section is not strictly necessary, but makes handling of long identical substrings much faster, with a mild average performance impact.
   1465   		RandomAccessIter curr = first;
   1466   		//Iterate to the end of the empties.  If all empty, return
   1467   		while(length(*curr) <= char_offset) {
   1468   			if(++curr == last)
   1469   				return;
   1470   		}
   1471   		//Getting the last non-empty
   1472   		while(length(*(--last)) <= char_offset) { }
   1473   		++last;
   1474   		//Offsetting on identical characters.  This section works a character at a time for optimal worst-case performance.
   1475   		update_offset(first, last, char_offset, getchar, length);
   1476 
   1477   		const unsigned bin_count = (1 << (sizeof(unsignedchar_type)*8));
   1478   		//Equal worst-case between radix and comparison-based is when bin_count = n*log(n).
   1479   		const unsigned max_size = bin_count;
   1480   		const unsigned membin_count = bin_count + 1;
   1481   		const unsigned max_bin = bin_count - 1;
   1482   		unsigned cache_end;
   1483   		RandomAccessIter * bins = size_bins(bin_sizes, bin_cache, cache_offset, cache_end, membin_count);
   1484   		RandomAccessIter *end_bin = &(bin_cache[cache_offset + max_bin]);
   1485 
   1486   		//Calculating the size of each bin; this takes roughly 10% of runtime
   1487   		for (RandomAccessIter current = first; current != last; ++current) {
   1488   			if(length(*current) <= char_offset) {
   1489   				bin_sizes[bin_count]++;
   1490   			}
   1491   			else
   1492   				bin_sizes[max_bin - getchar((*current), char_offset)]++;
   1493   		}
   1494   		//Assign the bin positions
   1495   		bin_cache[cache_offset] = first;
   1496   		for(unsigned u = 0; u < membin_count - 1; u++)
   1497   			bin_cache[cache_offset + u + 1] = bin_cache[cache_offset + u] + bin_sizes[u];
   1498 
   1499   		//Swap into place
   1500   		RandomAccessIter nextbinstart = last;
   1501   		//handling empty bins
   1502   		RandomAccessIter * local_bin = &(bin_cache[cache_offset + bin_count]);
   1503   		RandomAccessIter lastFull = *local_bin;
   1504   		RandomAccessIter * target_bin;
   1505   		//Iterating over each element in the bin of empties
   1506   		for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1507   			//empties belong in this bin
   1508   			while(length(*current) > char_offset) {
   1509   				target_bin = end_bin - getchar((*current), char_offset);
   1510   				iter_swap(current, (*target_bin)++);
   1511   			}
   1512   		}
   1513   		*local_bin = nextbinstart;
   1514   		nextbinstart = first;
   1515   		//iterate backwards to find the last bin with elements in it; this saves iterations in multiple loops
   1516   		unsigned last_bin = max_bin;
   1517   		for(; last_bin && !bin_sizes[last_bin]; --last_bin) { }
   1518   		//This dominates runtime, mostly in the swap and bin lookups
   1519   		for(unsigned u = 0; u < last_bin; ++u) {
   1520   			local_bin = bins + u;
   1521   			nextbinstart += bin_sizes[u];
   1522   			//Iterating over each element in this bin
   1523   			for(RandomAccessIter current = *local_bin; current < nextbinstart; ++current) {
   1524   				//Swapping elements in current into place until the correct element has been swapped in
   1525   				for(target_bin = end_bin - getchar((*current), char_offset);  target_bin != local_bin;
   1526   					target_bin = end_bin - getchar((*current), char_offset))
   1527   					iter_swap(current, (*target_bin)++);
   1528   			}
   1529   			*local_bin = nextbinstart;
   1530   		}
   1531   		bins[last_bin] = lastFull;
   1532   		//Recursing
   1533   		RandomAccessIter lastPos = first;
   1534   		//Skip this loop for empties
   1535   		for(unsigned u = cache_offset; u <= cache_offset + last_bin; lastPos = bin_cache[u], ++u) {
   1536   			size_t count = bin_cache[u] - lastPos;
   1537   			//don't sort unless there are at least two items to compare
   1538   			if(count < 2)
   1539   				continue;
   1540   			//using std::sort if its worst-case is better
   1541   			if(count < max_size)
   1542   				std::sort(lastPos, bin_cache[u], comp);
   1543   			else
   1544   				reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(lastPos
   1545   					, bin_cache[u], char_offset + 1, bin_cache, cache_end, bin_sizes, getchar, length, comp);
   1546   		}
   1547   	}
   1548 
   1549   	//Holds the bin vector and makes the initial recursive call
   1550   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
   1551   	inline void
   1552   	string_sort(RandomAccessIter first, RandomAccessIter last, data_type, unsignedchar_type)
   1553   	{
   1554   		std::vector<size_t> bin_sizes;
   1555   		std::vector<RandomAccessIter> bin_cache;
   1556   		string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(first, last, 0, bin_cache, 0, bin_sizes);
   1557   	}
   1558 
   1559   	//Holds the bin vector and makes the initial recursive call
   1560   	template <class RandomAccessIter, class data_type, class unsignedchar_type>
   1561   	inline void
   1562   	reverse_string_sort(RandomAccessIter first, RandomAccessIter last, data_type, unsignedchar_type)
   1563   	{
   1564   		std::vector<size_t> bin_sizes;
   1565   		std::vector<RandomAccessIter> bin_cache;
   1566   		reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type>(first, last, 0, bin_cache, 0, bin_sizes);
   1567   	}
   1568 
   1569   	//Holds the bin vector and makes the initial recursive call
   1570   	template <class RandomAccessIter, class get_char, class get_length, class data_type, class unsignedchar_type>
   1571   	inline void
   1572   	string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, data_type, unsignedchar_type)
   1573   	{
   1574   		std::vector<size_t> bin_sizes;
   1575   		std::vector<RandomAccessIter> bin_cache;
   1576   		string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length);
   1577   	}
   1578 
   1579   	//Holds the bin vector and makes the initial recursive call
   1580   	template <class RandomAccessIter, class get_char, class get_length, class compare, class data_type, class unsignedchar_type>
   1581   	inline void
   1582   	string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp, data_type, unsignedchar_type)
   1583   	{
   1584   		std::vector<size_t> bin_sizes;
   1585   		std::vector<RandomAccessIter> bin_cache;
   1586   		string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
   1587   	}
   1588 
   1589   	//Holds the bin vector and makes the initial recursive call
   1590   	template <class RandomAccessIter, class get_char, class get_length, class compare, class data_type, class unsignedchar_type>
   1591   	inline void
   1592   	reverse_string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp, data_type, unsignedchar_type)
   1593   	{
   1594   		std::vector<size_t> bin_sizes;
   1595   		std::vector<RandomAccessIter> bin_cache;
   1596   		reverse_string_sort_rec<RandomAccessIter, data_type, unsignedchar_type, get_char, get_length, compare>(first, last, 0, bin_cache, 0, bin_sizes, getchar, length, comp);
   1597   	}
   1598   }
   1599 
   1600   //Allows character-type overloads
   1601   template <class RandomAccessIter, class unsignedchar_type>
   1602   inline void string_sort(RandomAccessIter first, RandomAccessIter last, unsignedchar_type unused)
   1603   {
   1604   	//Don't sort if it's too small to optimize
   1605   	if(last - first < detail::MIN_SORT_SIZE)
   1606   		std::sort(first, last);
   1607   	else
   1608   		detail::string_sort(first, last, *first, unused);
   1609   }
   1610 
   1611   //Top-level sorting call; wraps using default of unsigned char
   1612   template <class RandomAccessIter>
   1613   inline void string_sort(RandomAccessIter first, RandomAccessIter last)
   1614   {
   1615   	unsigned char unused = '\0';
   1616   	string_sort(first, last, unused);
   1617   }
   1618 
   1619   //Allows character-type overloads
   1620   template <class RandomAccessIter, class compare, class unsignedchar_type>
   1621   inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, compare comp, unsignedchar_type unused)
   1622   {
   1623   	//Don't sort if it's too small to optimize
   1624   	if(last - first < detail::MIN_SORT_SIZE)
   1625   		std::sort(first, last, comp);
   1626   	else
   1627   		detail::reverse_string_sort(first, last, *first, unused);
   1628   }
   1629 
   1630   //Top-level sorting call; wraps using default of unsigned char
   1631   template <class RandomAccessIter, class compare>
   1632   inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, compare comp)
   1633   {
   1634   	unsigned char unused = '\0';
   1635   	reverse_string_sort(first, last, comp, unused);
   1636   }
   1637 
   1638   template <class RandomAccessIter, class get_char, class get_length>
   1639   inline void string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length)
   1640   {
   1641   	//Don't sort if it's too small to optimize
   1642   	if(last - first < detail::MIN_SORT_SIZE)
   1643   		std::sort(first, last);
   1644   	else {
   1645   		//skipping past empties at the beginning, which allows us to get the character type
   1646   		//.empty() is not used so as not to require a user declaration of it
   1647   		while(!length(*first)) {
   1648   			if(++first == last)
   1649   				return;
   1650   		}
   1651   		detail::string_sort(first, last, getchar, length, *first, getchar((*first), 0));
   1652   	}
   1653   }
   1654 
   1655   template <class RandomAccessIter, class get_char, class get_length, class compare>
   1656   inline void string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp)
   1657   {
   1658   	//Don't sort if it's too small to optimize
   1659   	if(last - first < detail::MIN_SORT_SIZE)
   1660   		std::sort(first, last, comp);
   1661   	else {
   1662   		//skipping past empties at the beginning, which allows us to get the character type
   1663   		//.empty() is not used so as not to require a user declaration of it
   1664   		while(!length(*first)) {
   1665   			if(++first == last)
   1666   				return;
   1667   		}
   1668   		detail::string_sort(first, last, getchar, length, comp, *first, getchar((*first), 0));
   1669   	}
   1670   }
   1671 
   1672   template <class RandomAccessIter, class get_char, class get_length, class compare>
   1673   inline void reverse_string_sort(RandomAccessIter first, RandomAccessIter last, get_char getchar, get_length length, compare comp)
   1674   {
   1675   	//Don't sort if it's too small to optimize
   1676   	if(last - first < detail::MIN_SORT_SIZE)
   1677   		std::sort(first, last, comp);
   1678   	else {
   1679   		//skipping past empties at the beginning, which allows us to get the character type
   1680   		//.empty() is not used so as not to require a user declaration of it
   1681   		while(!length(*(--last))) {
   1682   			//Note: if there is just one non-empty, and it's at the beginning, then it's already in sorted order
   1683   			if(first == last)
   1684   				return;
   1685   		}
   1686   		//making last just after the end of the non-empty part of the array
   1687   		++last;
   1688   		detail::reverse_string_sort(first, last, getchar, length, comp, *first, getchar((*first), 0));
   1689   	}
   1690   }
   1691 }
   1692 
   1693 #endif
   1694