Home | History | Annotate | Download | only in posix
      1 /* sort.c - put input lines into order
      2  *
      3  * Copyright 2004, 2008 Rob Landley <rob (at) landley.net>
      4  *
      5  * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html
      6  *
      7  * Deviations from POSIX: Lots.
      8  * We invented -x
      9 
     10 USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")USE_SORT_BIG("S:T:m" "o:k*t:xbMcszdfi") "run", TOYFLAG_USR|TOYFLAG_BIN))
     11 
     12 config SORT
     13   bool "sort"
     14   default y
     15   help
     16     usage: sort [-run] [FILE...]
     17 
     18     Sort all lines of text from input files (or stdin) to stdout.
     19 
     20     -r	reverse
     21     -u	unique lines only
     22     -n	numeric order (instead of alphabetical)
     23 
     24 config SORT_BIG
     25   bool "SuSv3 options (Support -ktcsbdfiozM)"
     26   default y
     27   depends on SORT
     28   help
     29     usage: sort [-bcdfiMsz] [-k#[,#[x]] [-t X]] [-o FILE]
     30 
     31     -b	ignore leading blanks (or trailing blanks in second part of key)
     32     -c	check whether input is sorted
     33     -d	dictionary order (use alphanumeric and whitespace chars only)
     34     -f	force uppercase (case insensitive sort)
     35     -i	ignore nonprinting characters
     36     -M	month sort (jan, feb, etc).
     37     -x	Hexadecimal numerical sort
     38     -s	skip fallback sort (only sort with keys)
     39     -z	zero (null) terminated lines
     40     -k	sort by "key" (see below)
     41     -t	use a key separator other than whitespace
     42     -o	output to FILE instead of stdout
     43 
     44     Sorting by key looks at a subset of the words on each line.  -k2
     45     uses the second word to the end of the line, -k2,2 looks at only
     46     the second word, -k2,4 looks from the start of the second to the end
     47     of the fourth word.  Specifying multiple keys uses the later keys as
     48     tie breakers, in order.  A type specifier appended to a sort key
     49     (such as -2,2n) applies only to sorting that key.
     50 
     51 config SORT_FLOAT
     52   bool
     53   default y
     54   depends on SORT_BIG && TOYBOX_FLOAT
     55   help
     56     usage: sort [-g]
     57 
     58     -g	general numeric sort (double precision with nan and inf)
     59 */
     60 
     61 #define FOR_sort
     62 #include "toys.h"
     63 
     64 GLOBALS(
     65   char *key_separator;
     66   struct arg_list *raw_keys;
     67   char *outfile;
     68   char *ignore1, ignore2;   // GNU compatability NOPs for -S and -T.
     69 
     70   void *key_list;
     71   int linecount;
     72   char **lines;
     73 )
     74 
     75 // The sort types are n, g, and M.
     76 // u, c, s, and z apply to top level only, not to keys.
     77 // b at top level implies bb.
     78 // The remaining options can be applied to search keys.
     79 
     80 #define FLAG_bb (1<<31)  // Ignore trailing blanks
     81 
     82 struct sort_key
     83 {
     84   struct sort_key *next_key;  // linked list
     85   unsigned range[4];          // start word, start char, end word, end char
     86   int flags;
     87 };
     88 
     89 // Copy of the part of this string corresponding to a key/flags.
     90 
     91 static char *get_key_data(char *str, struct sort_key *key, int flags)
     92 {
     93   int start=0, end, len, i, j;
     94 
     95   // Special case whole string, so we don't have to make a copy
     96 
     97   if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3]
     98     && !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str;
     99 
    100   // Find start of key on first pass, end on second pass
    101 
    102   len = strlen(str);
    103   for (j=0; j<2; j++) {
    104     if (!key->range[2*j]) end=len;
    105 
    106     // Loop through fields
    107     else {
    108       end=0;
    109       for (i=1; i < key->range[2*j]+j; i++) {
    110 
    111         // Skip leading blanks
    112         if (str[end] && !TT.key_separator)
    113           while (isspace(str[end])) end++;
    114 
    115         // Skip body of key
    116         for (; str[end]; end++) {
    117           if (TT.key_separator) {
    118             if (str[end]==*TT.key_separator) {
    119               end++;
    120               break;
    121             }
    122           } else if (isspace(str[end])) break;
    123         }
    124       }
    125     }
    126     if (!j) start=end;
    127   }
    128 
    129   // Key with explicit separator starts after the separator
    130   if (TT.key_separator && str[start]==*TT.key_separator) start++;
    131 
    132   // Strip leading and trailing whitespace if necessary
    133   if (flags&FLAG_b) while (isspace(str[start])) start++;
    134   if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--;
    135 
    136   // Handle offsets on start and end
    137   if (key->range[3]) {
    138     end += key->range[3]-1;
    139     if (end>len) end=len;
    140   }
    141   if (key->range[1]) {
    142     start += key->range[1]-1;
    143     if (start>len) start=len;
    144   }
    145 
    146   // Make the copy
    147   if (end<start) end=start;
    148   str = xstrndup(str+start, end-start);
    149 
    150   // Handle -d
    151   if (flags&FLAG_d) {
    152     for (start = end = 0; str[end]; end++)
    153       if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end];
    154     str[start] = 0;
    155   }
    156 
    157   // Handle -i
    158   if (flags&FLAG_i) {
    159     for (start = end = 0; str[end]; end++)
    160       if (isprint(str[end])) str[start++] = str[end];
    161     str[start] = 0;
    162   }
    163 
    164   return str;
    165 }
    166 
    167 // append a sort_key to key_list.
    168 
    169 static struct sort_key *add_key(void)
    170 {
    171   void **stupid_compiler = &TT.key_list;
    172   struct sort_key **pkey = (struct sort_key **)stupid_compiler;
    173 
    174   while (*pkey) pkey = &((*pkey)->next_key);
    175   return *pkey = xzalloc(sizeof(struct sort_key));
    176 }
    177 
    178 // Perform actual comparison
    179 static int compare_values(int flags, char *x, char *y)
    180 {
    181   int ff = flags & (FLAG_n|FLAG_g|FLAG_M|FLAG_x);
    182 
    183   // Ascii sort
    184   if (!ff) return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y);
    185 
    186   if (CFG_SORT_FLOAT && ff == FLAG_g) {
    187     char *xx,*yy;
    188     double dx = strtod(x,&xx), dy = strtod(y,&yy);
    189     int xinf, yinf;
    190 
    191     // not numbers < NaN < -infinity < numbers < +infinity
    192 
    193     if (x==xx) return y==yy ? 0 : -1;
    194     if (y==yy) return 1;
    195 
    196     // Check for isnan
    197     if (dx!=dx) return (dy!=dy) ? 0 : -1;
    198     if (dy!=dy) return 1;
    199 
    200     // Check for infinity.  (Could underflow, but avoids needing libm.)
    201     xinf = (1.0/dx == 0.0);
    202     yinf = (1.0/dy == 0.0);
    203     if (xinf) {
    204       if(dx<0) return (yinf && dy<0) ? 0 : -1;
    205       return (yinf && dy>0) ? 0 : 1;
    206     }
    207     if (yinf) return dy<0 ? 1 : -1;
    208 
    209     return dx>dy ? 1 : (dx<dy ? -1 : 0);
    210   } else if (CFG_SORT_BIG && ff == FLAG_M) {
    211     struct tm thyme;
    212     int dx;
    213     char *xx,*yy;
    214 
    215     xx = strptime(x,"%b",&thyme);
    216     dx = thyme.tm_mon;
    217     yy = strptime(y,"%b",&thyme);
    218     if (!xx) return !yy ? 0 : -1;
    219     else if (!yy) return 1;
    220     else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon;
    221 
    222   } else if (CFG_SORT_BIG && ff == FLAG_x) {
    223     return strtol(x, NULL, 16)-strtol(y, NULL, 16);
    224   // This has to be ff == FLAG_n
    225   } else {
    226     // Full floating point version of -n
    227     if (CFG_SORT_FLOAT) {
    228       double dx = atof(x), dy = atof(y);
    229 
    230       return dx>dy ? 1 : (dx<dy ? -1 : 0);
    231     // Integer version of -n for tiny systems
    232     } else return atoi(x)-atoi(y);
    233   }
    234 }
    235 
    236 // Callback from qsort(): Iterate through key_list and perform comparisons.
    237 static int compare_keys(const void *xarg, const void *yarg)
    238 {
    239   int flags = toys.optflags, retval = 0;
    240   char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg;
    241   struct sort_key *key;
    242 
    243   if (CFG_SORT_BIG) {
    244     for (key=(struct sort_key *)TT.key_list; !retval && key;
    245        key = key->next_key)
    246     {
    247       flags = key->flags ? key->flags : toys.optflags;
    248 
    249       // Chop out and modify key chunks, handling -dfib
    250 
    251       x = get_key_data(xx, key, flags);
    252       y = get_key_data(yy, key, flags);
    253 
    254       retval = compare_values(flags, x, y);
    255 
    256       // Free the copies get_key_data() made.
    257 
    258       if (x != xx) free(x);
    259       if (y != yy) free(y);
    260 
    261       if (retval) break;
    262     }
    263   } else retval = compare_values(flags, xx, yy);
    264 
    265   // Perform fallback sort if necessary (always case insensitive, no -f,
    266   // the point is to get a stable order even for -f sorts)
    267   if (!retval && !(CFG_SORT_BIG && (toys.optflags&FLAG_s))) {
    268     flags = toys.optflags;
    269     retval = strcmp(xx, yy);
    270   }
    271 
    272   return retval * ((flags&FLAG_r) ? -1 : 1);
    273 }
    274 
    275 // Callback from loopfiles to handle input files.
    276 static void sort_read(int fd, char *name)
    277 {
    278   // Read each line from file, appending to a big array.
    279 
    280   for (;;) {
    281     char * line = (CFG_SORT_BIG && (toys.optflags&FLAG_z))
    282              ? get_rawline(fd, NULL, 0) : get_line(fd);
    283 
    284     if (!line) break;
    285 
    286     // handle -c here so we don't allocate more memory than necessary.
    287     if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) {
    288       int j = (toys.optflags&FLAG_u) ? -1 : 0;
    289 
    290       if (TT.lines && compare_keys((void *)&TT.lines, &line)>j)
    291         error_exit("%s: Check line %d\n", name, TT.linecount);
    292       free(TT.lines);
    293       TT.lines = (char **)line;
    294     } else {
    295       if (!(TT.linecount&63))
    296         TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64));
    297       TT.lines[TT.linecount] = line;
    298     }
    299     TT.linecount++;
    300   }
    301 }
    302 
    303 void sort_main(void)
    304 {
    305   int idx, fd = 1;
    306 
    307   // Open output file if necessary.
    308   if (CFG_SORT_BIG && TT.outfile)
    309     fd = xcreate(TT.outfile, O_CREAT|O_TRUNC|O_WRONLY, 0666);
    310 
    311   // Parse -k sort keys.
    312   if (CFG_SORT_BIG && TT.raw_keys) {
    313     struct arg_list *arg;
    314 
    315     for (arg = TT.raw_keys; arg; arg = arg->next) {
    316       struct sort_key *key = add_key();
    317       char *temp;
    318       int flag;
    319 
    320       idx = 0;
    321       temp = arg->arg;
    322       while (*temp) {
    323         // Start of range
    324         key->range[2*idx] = (unsigned)strtol(temp, &temp, 10);
    325         if (*temp=='.')
    326           key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10);
    327 
    328         // Handle flags appended to a key type.
    329         for (;*temp;temp++) {
    330           char *temp2, *optlist;
    331 
    332           // Note that a second comma becomes an "Unknown key" error.
    333 
    334           if (*temp==',' && !idx++) {
    335             temp++;
    336             break;
    337           }
    338 
    339           // Which flag is this?
    340 
    341           optlist = toys.which->options;
    342           temp2 = strchr(optlist, *temp);
    343           flag = (1<<(optlist-temp2+strlen(optlist)-1));
    344 
    345           // Was it a flag that can apply to a key?
    346 
    347           if (!temp2 || flag>FLAG_b
    348             || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z)))
    349           {
    350             error_exit("Unknown key option.");
    351           }
    352           // b after , means strip _trailing_ space, not leading.
    353           if (idx && flag==FLAG_b) flag = FLAG_bb;
    354           key->flags |= flag;
    355         }
    356       }
    357     }
    358   }
    359 
    360   // global b flag strips both leading and trailing spaces
    361   if (toys.optflags&FLAG_b) toys.optflags |= FLAG_bb;
    362 
    363   // If no keys, perform alphabetic sort over the whole line.
    364   if (CFG_SORT_BIG && !TT.key_list) add_key()->range[0] = 1;
    365 
    366   // Open input files and read data, populating TT.lines[TT.linecount]
    367   loopfiles(toys.optargs, sort_read);
    368 
    369   // The compare (-c) logic was handled in sort_read(),
    370   // so if we got here, we're done.
    371   if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) goto exit_now;
    372 
    373   // Perform the actual sort
    374   qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys);
    375 
    376   // handle unique (-u)
    377   if (toys.optflags&FLAG_u) {
    378     int jdx;
    379 
    380     for (jdx=0, idx=1; idx<TT.linecount; idx++) {
    381       if (!compare_keys(&TT.lines[jdx], &TT.lines[idx]))
    382         free(TT.lines[idx]);
    383       else TT.lines[++jdx] = TT.lines[idx];
    384     }
    385     if (TT.linecount) TT.linecount = jdx+1;
    386   }
    387 
    388   // Output result
    389   for (idx = 0; idx<TT.linecount; idx++) {
    390     char *s = TT.lines[idx];
    391     unsigned i = strlen(s);
    392 
    393     if (!(toys.optflags&FLAG_z)) s[i] = '\n';
    394     xwrite(fd, s, i+1);
    395     if (CFG_TOYBOX_FREE) free(s);
    396   }
    397 
    398 exit_now:
    399   if (CFG_TOYBOX_FREE) {
    400     if (fd != 1) close(fd);
    401     free(TT.lines);
    402   }
    403 }
    404