Home | History | Annotate | Download | only in stdlib
      1 /* qsort.c
      2  * (c) 1998 Gareth McCaughan
      3  *
      4  * This is a drop-in replacement for the C library's |qsort()| routine.
      5  *
      6  * Features:
      7  *   - Median-of-three pivoting (and more)
      8  *   - Truncation and final polishing by a single insertion sort
      9  *   - Early truncation when no swaps needed in pivoting step
     10  *   - Explicit recursion, guaranteed not to overflow
     11  *   - A few little wrinkles stolen from the GNU |qsort()|.
     12  *   - separate code for non-aligned / aligned / word-size objects
     13  *
     14  * This code may be reproduced freely provided
     15  *   - this file is retained unaltered apart from minor
     16  *     changes for portability and efficiency
     17  *   - no changes are made to this comment
     18  *   - any changes that *are* made are clearly flagged
     19  *   - the _ID string below is altered by inserting, after
     20  *     the date, the string " altered" followed at your option
     21  *     by other material. (Exceptions: you may change the name
     22  *     of the exported routine without changing the ID string.
     23  *     You may change the values of the macros TRUNC_* and
     24  *     PIVOT_THRESHOLD without changing the ID string, provided
     25  *     they remain constants with TRUNC_nonaligned, TRUNC_aligned
     26  *     and TRUNC_words/WORD_BYTES between 8 and 24, and
     27  *     PIVOT_THRESHOLD between 32 and 200.)
     28  *
     29  * You may use it in anything you like; you may make money
     30  * out of it; you may distribute it in object form or as
     31  * part of an executable without including source code;
     32  * you don't have to credit me. (But it would be nice if
     33  * you did.)
     34  *
     35  * If you find problems with this code, or find ways of
     36  * making it significantly faster, please let me know!
     37  * My e-mail address, valid as of early 1998 and certainly
     38  * OK for at least the next 18 months, is
     39  *    gjm11 (at) dpmms.cam.ac.uk
     40  * Thanks!
     41  *
     42  * Gareth McCaughan   Peterhouse   Cambridge   1998
     43  */
     44 #include "SDL_config.h"
     45 
     46 /*
     47 #include <assert.h>
     48 #include <stdlib.h>
     49 #include <string.h>
     50 */
     51 #include "SDL_stdinc.h"
     52 
     53 #ifdef assert
     54 #undef assert
     55 #endif
     56 #define assert(X)
     57 #ifdef malloc
     58 #undef malloc
     59 #endif
     60 #define malloc	SDL_malloc
     61 #ifdef free
     62 #undef free
     63 #endif
     64 #define free	SDL_free
     65 #ifdef memcpy
     66 #undef memcpy
     67 #endif
     68 #define memcpy	SDL_memcpy
     69 #ifdef memmove
     70 #undef memmove
     71 #endif
     72 #define memmove	SDL_memmove
     73 #ifdef qsort
     74 #undef qsort
     75 #endif
     76 #define qsort	SDL_qsort
     77 
     78 
     79 #ifndef HAVE_QSORT
     80 
     81 static char _ID[]="<qsort.c gjm 1.12 1998-03-19>";
     82 
     83 /* How many bytes are there per word? (Must be a power of 2,
     84  * and must in fact equal sizeof(int).)
     85  */
     86 #define WORD_BYTES sizeof(int)
     87 
     88 /* How big does our stack need to be? Answer: one entry per
     89  * bit in a |size_t|.
     90  */
     91 #define STACK_SIZE (8*sizeof(size_t))
     92 
     93 /* Different situations have slightly different requirements,
     94  * and we make life epsilon easier by using different truncation
     95  * points for the three different cases.
     96  * So far, I have tuned TRUNC_words and guessed that the same
     97  * value might work well for the other two cases. Of course
     98  * what works well on my machine might work badly on yours.
     99  */
    100 #define TRUNC_nonaligned	12
    101 #define TRUNC_aligned		12
    102 #define TRUNC_words		12*WORD_BYTES	/* nb different meaning */
    103 
    104 /* We use a simple pivoting algorithm for shortish sub-arrays
    105  * and a more complicated one for larger ones. The threshold
    106  * is PIVOT_THRESHOLD.
    107  */
    108 #define PIVOT_THRESHOLD 40
    109 
    110 typedef struct { char * first; char * last; } stack_entry;
    111 #define pushLeft {stack[stacktop].first=ffirst;stack[stacktop++].last=last;}
    112 #define pushRight {stack[stacktop].first=first;stack[stacktop++].last=llast;}
    113 #define doLeft {first=ffirst;llast=last;continue;}
    114 #define doRight {ffirst=first;last=llast;continue;}
    115 #define pop {if (--stacktop<0) break;\
    116   first=ffirst=stack[stacktop].first;\
    117   last=llast=stack[stacktop].last;\
    118   continue;}
    119 
    120 /* Some comments on the implementation.
    121  * 1. When we finish partitioning the array into "low"
    122  *    and "high", we forget entirely about short subarrays,
    123  *    because they'll be done later by insertion sort.
    124  *    Doing lots of little insertion sorts might be a win
    125  *    on large datasets for locality-of-reference reasons,
    126  *    but it makes the code much nastier and increases
    127  *    bookkeeping overhead.
    128  * 2. We always save the shorter and get to work on the
    129  *    longer. This guarantees that every time we push
    130  *    an item onto the stack its size is <= 1/2 of that
    131  *    of its parent; so the stack can't need more than
    132  *    log_2(max-array-size) entries.
    133  * 3. We choose a pivot by looking at the first, last
    134  *    and middle elements. We arrange them into order
    135  *    because it's easy to do that in conjunction with
    136  *    choosing the pivot, and it makes things a little
    137  *    easier in the partitioning step. Anyway, the pivot
    138  *    is the middle of these three. It's still possible
    139  *    to construct datasets where the algorithm takes
    140  *    time of order n^2, but it simply never happens in
    141  *    practice.
    142  * 3' Newsflash: On further investigation I find that
    143  *    it's easy to construct datasets where median-of-3
    144  *    simply isn't good enough. So on large-ish subarrays
    145  *    we do a more sophisticated pivoting: we take three
    146  *    sets of 3 elements, find their medians, and then
    147  *    take the median of those.
    148  * 4. We copy the pivot element to a separate place
    149  *    because that way we can always do our comparisons
    150  *    directly against a pointer to that separate place,
    151  *    and don't have to wonder "did we move the pivot
    152  *    element?". This makes the inner loop better.
    153  * 5. It's possible to make the pivoting even more
    154  *    reliable by looking at more candidates when n
    155  *    is larger. (Taking this to its logical conclusion
    156  *    results in a variant of quicksort that doesn't
    157  *    have that n^2 worst case.) However, the overhead
    158  *    from the extra bookkeeping means that it's just
    159  *    not worth while.
    160  * 6. This is pretty clean and portable code. Here are
    161  *    all the potential portability pitfalls and problems
    162  *    I know of:
    163  *      - In one place (the insertion sort) I construct
    164  *        a pointer that points just past the end of the
    165  *        supplied array, and assume that (a) it won't
    166  *        compare equal to any pointer within the array,
    167  *        and (b) it will compare equal to a pointer
    168  *        obtained by stepping off the end of the array.
    169  *        These might fail on some segmented architectures.
    170  *      - I assume that there are 8 bits in a |char| when
    171  *        computing the size of stack needed. This would
    172  *        fail on machines with 9-bit or 16-bit bytes.
    173  *      - I assume that if |((int)base&(sizeof(int)-1))==0|
    174  *        and |(size&(sizeof(int)-1))==0| then it's safe to
    175  *        get at array elements via |int*|s, and that if
    176  *        actually |size==sizeof(int)| as well then it's
    177  *        safe to treat the elements as |int|s. This might
    178  *        fail on systems that convert pointers to integers
    179  *        in non-standard ways.
    180  *      - I assume that |8*sizeof(size_t)<=INT_MAX|. This
    181  *        would be false on a machine with 8-bit |char|s,
    182  *        16-bit |int|s and 4096-bit |size_t|s. :-)
    183  */
    184 
    185 /* The recursion logic is the same in each case: */
    186 #define Recurse(Trunc)				\
    187       { size_t l=last-ffirst,r=llast-first;	\
    188         if (l<Trunc) {				\
    189           if (r>=Trunc) doRight			\
    190           else pop				\
    191         }					\
    192         else if (l<=r) { pushLeft; doRight }	\
    193         else if (r>=Trunc) { pushRight; doLeft }\
    194         else doLeft				\
    195       }
    196 
    197 /* and so is the pivoting logic: */
    198 #define Pivot(swapper,sz)			\
    199   if ((size_t)(last-first)>PIVOT_THRESHOLD*sz) mid=pivot_big(first,mid,last,sz,compare);\
    200   else {	\
    201     if (compare(first,mid)<0) {			\
    202       if (compare(mid,last)>0) {		\
    203         swapper(mid,last);			\
    204         if (compare(first,mid)>0) swapper(first,mid);\
    205       }						\
    206     }						\
    207     else {					\
    208       if (compare(mid,last)>0) swapper(first,last)\
    209       else {					\
    210         swapper(first,mid);			\
    211         if (compare(mid,last)>0) swapper(mid,last);\
    212       }						\
    213     }						\
    214     first+=sz; last-=sz;			\
    215   }
    216 
    217 #ifdef DEBUG_QSORT
    218 #include <stdio.h>
    219 #endif
    220 
    221 /* and so is the partitioning logic: */
    222 #define Partition(swapper,sz) {			\
    223   int swapped=0;				\
    224   do {						\
    225     while (compare(first,pivot)<0) first+=sz;	\
    226     while (compare(pivot,last)<0) last-=sz;	\
    227     if (first<last) {				\
    228       swapper(first,last); swapped=1;		\
    229       first+=sz; last-=sz; }			\
    230     else if (first==last) { first+=sz; last-=sz; break; }\
    231   } while (first<=last);			\
    232   if (!swapped) pop				\
    233 }
    234 
    235 /* and so is the pre-insertion-sort operation of putting
    236  * the smallest element into place as a sentinel.
    237  * Doing this makes the inner loop nicer. I got this
    238  * idea from the GNU implementation of qsort().
    239  */
    240 #define PreInsertion(swapper,limit,sz)		\
    241   first=base;					\
    242   last=first + (nmemb>limit ? limit : nmemb-1)*sz;\
    243   while (last!=base) {				\
    244     if (compare(first,last)>0) first=last;	\
    245     last-=sz; }					\
    246   if (first!=base) swapper(first,(char*)base);
    247 
    248 /* and so is the insertion sort, in the first two cases: */
    249 #define Insertion(swapper)			\
    250   last=((char*)base)+nmemb*size;		\
    251   for (first=((char*)base)+size;first!=last;first+=size) {	\
    252     char *test;					\
    253     /* Find the right place for |first|.	\
    254      * My apologies for var reuse. */		\
    255     for (test=first-size;compare(test,first)>0;test-=size) ;	\
    256     test+=size;					\
    257     if (test!=first) {				\
    258       /* Shift everything in [test,first)	\
    259        * up by one, and place |first|		\
    260        * where |test| is. */			\
    261       memcpy(pivot,first,size);			\
    262       memmove(test+size,test,first-test);	\
    263       memcpy(test,pivot,size);			\
    264     }						\
    265   }
    266 
    267 #define SWAP_nonaligned(a,b) { \
    268   register char *aa=(a),*bb=(b); \
    269   register size_t sz=size; \
    270   do { register char t=*aa; *aa++=*bb; *bb++=t; } while (--sz); }
    271 
    272 #define SWAP_aligned(a,b) { \
    273   register int *aa=(int*)(a),*bb=(int*)(b); \
    274   register size_t sz=size; \
    275   do { register int t=*aa;*aa++=*bb; *bb++=t; } while (sz-=WORD_BYTES); }
    276 
    277 #define SWAP_words(a,b) { \
    278   register int t=*((int*)a); *((int*)a)=*((int*)b); *((int*)b)=t; }
    279 
    280 /* ---------------------------------------------------------------------- */
    281 
    282 static char * pivot_big(char *first, char *mid, char *last, size_t size,
    283                         int compare(const void *, const void *)) {
    284   size_t d=(((last-first)/size)>>3)*size;
    285   char *m1,*m2,*m3;
    286   { char *a=first, *b=first+d, *c=first+2*d;
    287 #ifdef DEBUG_QSORT
    288 fprintf(stderr,"< %d %d %d\n",*(int*)a,*(int*)b,*(int*)c);
    289 #endif
    290     m1 = compare(a,b)<0 ?
    291            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
    292          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
    293   }
    294   { char *a=mid-d, *b=mid, *c=mid+d;
    295 #ifdef DEBUG_QSORT
    296 fprintf(stderr,". %d %d %d\n",*(int*)a,*(int*)b,*(int*)c);
    297 #endif
    298     m2 = compare(a,b)<0 ?
    299            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
    300          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
    301   }
    302   { char *a=last-2*d, *b=last-d, *c=last;
    303 #ifdef DEBUG_QSORT
    304 fprintf(stderr,"> %d %d %d\n",*(int*)a,*(int*)b,*(int*)c);
    305 #endif
    306     m3 = compare(a,b)<0 ?
    307            (compare(b,c)<0 ? b : (compare(a,c)<0 ? c : a))
    308          : (compare(a,c)<0 ? a : (compare(b,c)<0 ? c : b));
    309   }
    310 #ifdef DEBUG_QSORT
    311 fprintf(stderr,"-> %d %d %d\n",*(int*)m1,*(int*)m2,*(int*)m3);
    312 #endif
    313   return compare(m1,m2)<0 ?
    314            (compare(m2,m3)<0 ? m2 : (compare(m1,m3)<0 ? m3 : m1))
    315          : (compare(m1,m3)<0 ? m1 : (compare(m2,m3)<0 ? m3 : m2));
    316 }
    317 
    318 /* ---------------------------------------------------------------------- */
    319 
    320 static void qsort_nonaligned(void *base, size_t nmemb, size_t size,
    321            int (*compare)(const void *, const void *)) {
    322 
    323   stack_entry stack[STACK_SIZE];
    324   int stacktop=0;
    325   char *first,*last;
    326   char *pivot=malloc(size);
    327   size_t trunc=TRUNC_nonaligned*size;
    328   assert(pivot!=0);
    329 
    330   first=(char*)base; last=first+(nmemb-1)*size;
    331 
    332   if ((size_t)(last-first)>trunc) {
    333     char *ffirst=first, *llast=last;
    334     while (1) {
    335       /* Select pivot */
    336       { char * mid=first+size*((last-first)/size >> 1);
    337         Pivot(SWAP_nonaligned,size);
    338         memcpy(pivot,mid,size);
    339       }
    340       /* Partition. */
    341       Partition(SWAP_nonaligned,size);
    342       /* Prepare to recurse/iterate. */
    343       Recurse(trunc)
    344     }
    345   }
    346   PreInsertion(SWAP_nonaligned,TRUNC_nonaligned,size);
    347   Insertion(SWAP_nonaligned);
    348   free(pivot);
    349 }
    350 
    351 static void qsort_aligned(void *base, size_t nmemb, size_t size,
    352            int (*compare)(const void *, const void *)) {
    353 
    354   stack_entry stack[STACK_SIZE];
    355   int stacktop=0;
    356   char *first,*last;
    357   char *pivot=malloc(size);
    358   size_t trunc=TRUNC_aligned*size;
    359   assert(pivot!=0);
    360 
    361   first=(char*)base; last=first+(nmemb-1)*size;
    362 
    363   if ((size_t)(last-first)>trunc) {
    364     char *ffirst=first,*llast=last;
    365     while (1) {
    366       /* Select pivot */
    367       { char * mid=first+size*((last-first)/size >> 1);
    368         Pivot(SWAP_aligned,size);
    369         memcpy(pivot,mid,size);
    370       }
    371       /* Partition. */
    372       Partition(SWAP_aligned,size);
    373       /* Prepare to recurse/iterate. */
    374       Recurse(trunc)
    375     }
    376   }
    377   PreInsertion(SWAP_aligned,TRUNC_aligned,size);
    378   Insertion(SWAP_aligned);
    379   free(pivot);
    380 }
    381 
    382 static void qsort_words(void *base, size_t nmemb,
    383            int (*compare)(const void *, const void *)) {
    384 
    385   stack_entry stack[STACK_SIZE];
    386   int stacktop=0;
    387   char *first,*last;
    388   char *pivot=malloc(WORD_BYTES);
    389   assert(pivot!=0);
    390 
    391   first=(char*)base; last=first+(nmemb-1)*WORD_BYTES;
    392 
    393   if (last-first>TRUNC_words) {
    394     char *ffirst=first, *llast=last;
    395     while (1) {
    396 #ifdef DEBUG_QSORT
    397 fprintf(stderr,"Doing %d:%d: ",
    398         (first-(char*)base)/WORD_BYTES,
    399         (last-(char*)base)/WORD_BYTES);
    400 #endif
    401       /* Select pivot */
    402       { char * mid=first+WORD_BYTES*((last-first) / (2*WORD_BYTES));
    403         Pivot(SWAP_words,WORD_BYTES);
    404         *(int*)pivot=*(int*)mid;
    405       }
    406 #ifdef DEBUG_QSORT
    407 fprintf(stderr,"pivot=%d\n",*(int*)pivot);
    408 #endif
    409       /* Partition. */
    410       Partition(SWAP_words,WORD_BYTES);
    411       /* Prepare to recurse/iterate. */
    412       Recurse(TRUNC_words)
    413     }
    414   }
    415   PreInsertion(SWAP_words,(TRUNC_words/WORD_BYTES),WORD_BYTES);
    416   /* Now do insertion sort. */
    417   last=((char*)base)+nmemb*WORD_BYTES;
    418   for (first=((char*)base)+WORD_BYTES;first!=last;first+=WORD_BYTES) {
    419     /* Find the right place for |first|. My apologies for var reuse */
    420     int *pl=(int*)(first-WORD_BYTES),*pr=(int*)first;
    421     *(int*)pivot=*(int*)first;
    422     for (;compare(pl,pivot)>0;pr=pl,--pl) {
    423       *pr=*pl; }
    424     if (pr!=(int*)first) *pr=*(int*)pivot;
    425   }
    426   free(pivot);
    427 }
    428 
    429 /* ---------------------------------------------------------------------- */
    430 
    431 void qsort(void *base, size_t nmemb, size_t size,
    432            int (*compare)(const void *, const void *)) {
    433 
    434   if (nmemb<=1) return;
    435   if (((uintptr_t)base|size)&(WORD_BYTES-1))
    436     qsort_nonaligned(base,nmemb,size,compare);
    437   else if (size!=WORD_BYTES)
    438     qsort_aligned(base,nmemb,size,compare);
    439   else
    440     qsort_words(base,nmemb,compare);
    441 }
    442 
    443 #endif /* !HAVE_QSORT */
    444