1 /* sort.c - put input lines into order 2 * 3 * Copyright 2004, 2008 Rob Landley <rob (at) landley.net> 4 * 5 * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html 6 * 7 * Deviations from POSIX: Lots. 8 * We invented -x 9 10 USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")USE_SORT_BIG("S:T:m" "o:k*t:xbMcszdfi") "run", TOYFLAG_USR|TOYFLAG_BIN)) 11 12 config SORT 13 bool "sort" 14 default y 15 help 16 usage: sort [-run] [FILE...] 17 18 Sort all lines of text from input files (or stdin) to stdout. 19 20 -r reverse 21 -u unique lines only 22 -n numeric order (instead of alphabetical) 23 24 config SORT_BIG 25 bool "SuSv3 options (Support -ktcsbdfiozM)" 26 default y 27 depends on SORT 28 help 29 usage: sort [-bcdfiMsz] [-k#[,#[x]] [-t X]] [-o FILE] 30 31 -b ignore leading blanks (or trailing blanks in second part of key) 32 -c check whether input is sorted 33 -d dictionary order (use alphanumeric and whitespace chars only) 34 -f force uppercase (case insensitive sort) 35 -i ignore nonprinting characters 36 -M month sort (jan, feb, etc). 37 -x Hexadecimal numerical sort 38 -s skip fallback sort (only sort with keys) 39 -z zero (null) terminated lines 40 -k sort by "key" (see below) 41 -t use a key separator other than whitespace 42 -o output to FILE instead of stdout 43 44 Sorting by key looks at a subset of the words on each line. -k2 45 uses the second word to the end of the line, -k2,2 looks at only 46 the second word, -k2,4 looks from the start of the second to the end 47 of the fourth word. Specifying multiple keys uses the later keys as 48 tie breakers, in order. A type specifier appended to a sort key 49 (such as -2,2n) applies only to sorting that key. 50 51 config SORT_FLOAT 52 bool 53 default y 54 depends on SORT_BIG && TOYBOX_FLOAT 55 help 56 usage: sort [-g] 57 58 -g general numeric sort (double precision with nan and inf) 59 */ 60 61 #define FOR_sort 62 #include "toys.h" 63 64 GLOBALS( 65 char *key_separator; 66 struct arg_list *raw_keys; 67 char *outfile; 68 char *ignore1, ignore2; // GNU compatability NOPs for -S and -T. 69 70 void *key_list; 71 int linecount; 72 char **lines; 73 ) 74 75 // The sort types are n, g, and M. 76 // u, c, s, and z apply to top level only, not to keys. 77 // b at top level implies bb. 78 // The remaining options can be applied to search keys. 79 80 #define FLAG_bb (1<<31) // Ignore trailing blanks 81 82 struct sort_key 83 { 84 struct sort_key *next_key; // linked list 85 unsigned range[4]; // start word, start char, end word, end char 86 int flags; 87 }; 88 89 // Copy of the part of this string corresponding to a key/flags. 90 91 static char *get_key_data(char *str, struct sort_key *key, int flags) 92 { 93 int start=0, end, len, i, j; 94 95 // Special case whole string, so we don't have to make a copy 96 97 if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3] 98 && !(flags&(FLAG_b|FLAG_d|FLAG_i|FLAG_bb))) return str; 99 100 // Find start of key on first pass, end on second pass 101 102 len = strlen(str); 103 for (j=0; j<2; j++) { 104 if (!key->range[2*j]) end=len; 105 106 // Loop through fields 107 else { 108 end=0; 109 for (i=1; i < key->range[2*j]+j; i++) { 110 111 // Skip leading blanks 112 if (str[end] && !TT.key_separator) 113 while (isspace(str[end])) end++; 114 115 // Skip body of key 116 for (; str[end]; end++) { 117 if (TT.key_separator) { 118 if (str[end]==*TT.key_separator) { 119 end++; 120 break; 121 } 122 } else if (isspace(str[end])) break; 123 } 124 } 125 } 126 if (!j) start=end; 127 } 128 129 // Key with explicit separator starts after the separator 130 if (TT.key_separator && str[start]==*TT.key_separator) start++; 131 132 // Strip leading and trailing whitespace if necessary 133 if (flags&FLAG_b) while (isspace(str[start])) start++; 134 if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--; 135 136 // Handle offsets on start and end 137 if (key->range[3]) { 138 end += key->range[3]-1; 139 if (end>len) end=len; 140 } 141 if (key->range[1]) { 142 start += key->range[1]-1; 143 if (start>len) start=len; 144 } 145 146 // Make the copy 147 if (end<start) end=start; 148 str = xstrndup(str+start, end-start); 149 150 // Handle -d 151 if (flags&FLAG_d) { 152 for (start = end = 0; str[end]; end++) 153 if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end]; 154 str[start] = 0; 155 } 156 157 // Handle -i 158 if (flags&FLAG_i) { 159 for (start = end = 0; str[end]; end++) 160 if (isprint(str[end])) str[start++] = str[end]; 161 str[start] = 0; 162 } 163 164 return str; 165 } 166 167 // append a sort_key to key_list. 168 169 static struct sort_key *add_key(void) 170 { 171 void **stupid_compiler = &TT.key_list; 172 struct sort_key **pkey = (struct sort_key **)stupid_compiler; 173 174 while (*pkey) pkey = &((*pkey)->next_key); 175 return *pkey = xzalloc(sizeof(struct sort_key)); 176 } 177 178 // Perform actual comparison 179 static int compare_values(int flags, char *x, char *y) 180 { 181 int ff = flags & (FLAG_n|FLAG_g|FLAG_M|FLAG_x); 182 183 // Ascii sort 184 if (!ff) return ((flags&FLAG_f) ? strcasecmp : strcmp)(x, y); 185 186 if (CFG_SORT_FLOAT && ff == FLAG_g) { 187 char *xx,*yy; 188 double dx = strtod(x,&xx), dy = strtod(y,&yy); 189 int xinf, yinf; 190 191 // not numbers < NaN < -infinity < numbers < +infinity 192 193 if (x==xx) return y==yy ? 0 : -1; 194 if (y==yy) return 1; 195 196 // Check for isnan 197 if (dx!=dx) return (dy!=dy) ? 0 : -1; 198 if (dy!=dy) return 1; 199 200 // Check for infinity. (Could underflow, but avoids needing libm.) 201 xinf = (1.0/dx == 0.0); 202 yinf = (1.0/dy == 0.0); 203 if (xinf) { 204 if(dx<0) return (yinf && dy<0) ? 0 : -1; 205 return (yinf && dy>0) ? 0 : 1; 206 } 207 if (yinf) return dy<0 ? 1 : -1; 208 209 return dx>dy ? 1 : (dx<dy ? -1 : 0); 210 } else if (CFG_SORT_BIG && ff == FLAG_M) { 211 struct tm thyme; 212 int dx; 213 char *xx,*yy; 214 215 xx = strptime(x,"%b",&thyme); 216 dx = thyme.tm_mon; 217 yy = strptime(y,"%b",&thyme); 218 if (!xx) return !yy ? 0 : -1; 219 else if (!yy) return 1; 220 else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon; 221 222 } else if (CFG_SORT_BIG && ff == FLAG_x) { 223 return strtol(x, NULL, 16)-strtol(y, NULL, 16); 224 // This has to be ff == FLAG_n 225 } else { 226 // Full floating point version of -n 227 if (CFG_SORT_FLOAT) { 228 double dx = atof(x), dy = atof(y); 229 230 return dx>dy ? 1 : (dx<dy ? -1 : 0); 231 // Integer version of -n for tiny systems 232 } else return atoi(x)-atoi(y); 233 } 234 } 235 236 // Callback from qsort(): Iterate through key_list and perform comparisons. 237 static int compare_keys(const void *xarg, const void *yarg) 238 { 239 int flags = toys.optflags, retval = 0; 240 char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg; 241 struct sort_key *key; 242 243 if (CFG_SORT_BIG) { 244 for (key=(struct sort_key *)TT.key_list; !retval && key; 245 key = key->next_key) 246 { 247 flags = key->flags ? key->flags : toys.optflags; 248 249 // Chop out and modify key chunks, handling -dfib 250 251 x = get_key_data(xx, key, flags); 252 y = get_key_data(yy, key, flags); 253 254 retval = compare_values(flags, x, y); 255 256 // Free the copies get_key_data() made. 257 258 if (x != xx) free(x); 259 if (y != yy) free(y); 260 261 if (retval) break; 262 } 263 } else retval = compare_values(flags, xx, yy); 264 265 // Perform fallback sort if necessary (always case insensitive, no -f, 266 // the point is to get a stable order even for -f sorts) 267 if (!retval && !(CFG_SORT_BIG && (toys.optflags&FLAG_s))) { 268 flags = toys.optflags; 269 retval = strcmp(xx, yy); 270 } 271 272 return retval * ((flags&FLAG_r) ? -1 : 1); 273 } 274 275 // Callback from loopfiles to handle input files. 276 static void sort_read(int fd, char *name) 277 { 278 // Read each line from file, appending to a big array. 279 280 for (;;) { 281 char * line = (CFG_SORT_BIG && (toys.optflags&FLAG_z)) 282 ? get_rawline(fd, NULL, 0) : get_line(fd); 283 284 if (!line) break; 285 286 // handle -c here so we don't allocate more memory than necessary. 287 if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) { 288 int j = (toys.optflags&FLAG_u) ? -1 : 0; 289 290 if (TT.lines && compare_keys((void *)&TT.lines, &line)>j) 291 error_exit("%s: Check line %d\n", name, TT.linecount); 292 free(TT.lines); 293 TT.lines = (char **)line; 294 } else { 295 if (!(TT.linecount&63)) 296 TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64)); 297 TT.lines[TT.linecount] = line; 298 } 299 TT.linecount++; 300 } 301 } 302 303 void sort_main(void) 304 { 305 int idx, fd = 1; 306 307 // Open output file if necessary. 308 if (CFG_SORT_BIG && TT.outfile) 309 fd = xcreate(TT.outfile, O_CREAT|O_TRUNC|O_WRONLY, 0666); 310 311 // Parse -k sort keys. 312 if (CFG_SORT_BIG && TT.raw_keys) { 313 struct arg_list *arg; 314 315 for (arg = TT.raw_keys; arg; arg = arg->next) { 316 struct sort_key *key = add_key(); 317 char *temp; 318 int flag; 319 320 idx = 0; 321 temp = arg->arg; 322 while (*temp) { 323 // Start of range 324 key->range[2*idx] = (unsigned)strtol(temp, &temp, 10); 325 if (*temp=='.') 326 key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10); 327 328 // Handle flags appended to a key type. 329 for (;*temp;temp++) { 330 char *temp2, *optlist; 331 332 // Note that a second comma becomes an "Unknown key" error. 333 334 if (*temp==',' && !idx++) { 335 temp++; 336 break; 337 } 338 339 // Which flag is this? 340 341 optlist = toys.which->options; 342 temp2 = strchr(optlist, *temp); 343 flag = (1<<(optlist-temp2+strlen(optlist)-1)); 344 345 // Was it a flag that can apply to a key? 346 347 if (!temp2 || flag>FLAG_b 348 || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z))) 349 { 350 error_exit("Unknown key option."); 351 } 352 // b after , means strip _trailing_ space, not leading. 353 if (idx && flag==FLAG_b) flag = FLAG_bb; 354 key->flags |= flag; 355 } 356 } 357 } 358 } 359 360 // global b flag strips both leading and trailing spaces 361 if (toys.optflags&FLAG_b) toys.optflags |= FLAG_bb; 362 363 // If no keys, perform alphabetic sort over the whole line. 364 if (CFG_SORT_BIG && !TT.key_list) add_key()->range[0] = 1; 365 366 // Open input files and read data, populating TT.lines[TT.linecount] 367 loopfiles(toys.optargs, sort_read); 368 369 // The compare (-c) logic was handled in sort_read(), 370 // so if we got here, we're done. 371 if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) goto exit_now; 372 373 // Perform the actual sort 374 qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys); 375 376 // handle unique (-u) 377 if (toys.optflags&FLAG_u) { 378 int jdx; 379 380 for (jdx=0, idx=1; idx<TT.linecount; idx++) { 381 if (!compare_keys(&TT.lines[jdx], &TT.lines[idx])) 382 free(TT.lines[idx]); 383 else TT.lines[++jdx] = TT.lines[idx]; 384 } 385 if (TT.linecount) TT.linecount = jdx+1; 386 } 387 388 // Output result 389 for (idx = 0; idx<TT.linecount; idx++) { 390 char *s = TT.lines[idx]; 391 unsigned i = strlen(s); 392 393 if (!(toys.optflags&FLAG_z)) s[i] = '\n'; 394 xwrite(fd, s, i+1); 395 if (CFG_TOYBOX_FREE) free(s); 396 } 397 398 exit_now: 399 if (CFG_TOYBOX_FREE) { 400 if (fd != 1) close(fd); 401 free(TT.lines); 402 } 403 } 404