1 /* sort.c - put input lines into order 2 * 3 * Copyright 2004, 2008 Rob Landley <rob (at) landley.net> 4 * 5 * See http://opengroup.org/onlinepubs/007904975/utilities/sort.html 6 7 USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")USE_SORT_BIG("S:T:m" "o:k*t:xbMcszdfi") "run", TOYFLAG_USR|TOYFLAG_BIN)) 8 9 config SORT 10 bool "sort" 11 default y 12 help 13 usage: sort [-run] [FILE...] 14 15 Sort all lines of text from input files (or stdin) to stdout. 16 17 -r reverse 18 -u unique lines only 19 -n numeric order (instead of alphabetical) 20 21 config SORT_BIG 22 bool "SuSv3 options (Support -ktcsbdfiozM)" 23 default y 24 depends on SORT 25 help 26 usage: sort [-bcdfiMsz] [-k#[,#[x]] [-t X]] [-o FILE] 27 28 -b ignore leading blanks (or trailing blanks in second part of key) 29 -c check whether input is sorted 30 -d dictionary order (use alphanumeric and whitespace chars only) 31 -f force uppercase (case insensitive sort) 32 -i ignore nonprinting characters 33 -M month sort (jan, feb, etc). 34 -x Hexadecimal numerical sort 35 -s skip fallback sort (only sort with keys) 36 -z zero (null) terminated input 37 -k sort by "key" (see below) 38 -t use a key separator other than whitespace 39 -o output to FILE instead of stdout 40 41 Sorting by key looks at a subset of the words on each line. -k2 42 uses the second word to the end of the line, -k2,2 looks at only 43 the second word, -k2,4 looks from the start of the second to the end 44 of the fourth word. Specifying multiple keys uses the later keys as 45 tie breakers, in order. A type specifier appended to a sort key 46 (such as -2,2n) applies only to sorting that key. 47 48 config SORT_FLOAT 49 bool 50 default y 51 depends on SORT_BIG && TOYBOX_FLOAT 52 help 53 usage: sort [-g] 54 55 -g general numeric sort (double precision with nan and inf) 56 */ 57 58 #define FOR_sort 59 #include "toys.h" 60 61 GLOBALS( 62 char *key_separator; 63 struct arg_list *raw_keys; 64 char *outfile; 65 char *ignore1, ignore2; // GNU compatability NOPs for -S and -T. 66 67 void *key_list; 68 int linecount; 69 char **lines; 70 ) 71 72 // The sort types are n, g, and M. 73 // u, c, s, and z apply to top level only, not to keys. 74 // b at top level implies bb. 75 // The remaining options can be applied to search keys. 76 77 #define FLAG_bb (1<<31) // Ignore trailing blanks 78 79 struct sort_key 80 { 81 struct sort_key *next_key; // linked list 82 unsigned range[4]; // start word, start char, end word, end char 83 int flags; 84 }; 85 86 // Copy of the part of this string corresponding to a key/flags. 87 88 static char *get_key_data(char *str, struct sort_key *key, int flags) 89 { 90 int start=0, end, len, i, j; 91 92 // Special case whole string, so we don't have to make a copy 93 94 if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3] 95 && !(flags&(FLAG_b&FLAG_d&FLAG_f&FLAG_i&FLAG_bb))) return str; 96 97 // Find start of key on first pass, end on second pass 98 99 len = strlen(str); 100 for (j=0; j<2; j++) { 101 if (!key->range[2*j]) end=len; 102 103 // Loop through fields 104 else { 105 end=0; 106 for (i=1; i < key->range[2*j]+j; i++) { 107 108 // Skip leading blanks 109 if (str[end] && !TT.key_separator) 110 while (isspace(str[end])) end++; 111 112 // Skip body of key 113 for (; str[end]; end++) { 114 if (TT.key_separator) { 115 if (str[end]==*TT.key_separator) break; 116 } else if (isspace(str[end])) break; 117 } 118 } 119 } 120 if (!j) start=end; 121 } 122 123 // Key with explicit separator starts after the separator 124 if (TT.key_separator && str[start]==*TT.key_separator) start++; 125 126 // Strip leading and trailing whitespace if necessary 127 if (flags&FLAG_b) while (isspace(str[start])) start++; 128 if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--; 129 130 // Handle offsets on start and end 131 if (key->range[3]) { 132 end += key->range[3]-1; 133 if (end>len) end=len; 134 } 135 if (key->range[1]) { 136 start += key->range[1]-1; 137 if (start>len) start=len; 138 } 139 140 // Make the copy 141 if (end<start) end=start; 142 str = xstrndup(str+start, end-start); 143 144 // Handle -d 145 if (flags&FLAG_d) { 146 for (start = end = 0; str[end]; end++) 147 if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end]; 148 str[start] = 0; 149 } 150 151 // Handle -i 152 if (flags&FLAG_i) { 153 for (start = end = 0; str[end]; end++) 154 if (isprint(str[end])) str[start++] = str[end]; 155 str[start] = 0; 156 } 157 158 // Handle -f 159 if (flags*FLAG_f) for(i=0; str[i]; i++) str[i] = toupper(str[i]); 160 161 return str; 162 } 163 164 // append a sort_key to key_list. 165 166 static struct sort_key *add_key(void) 167 { 168 void **stupid_compiler = &TT.key_list; 169 struct sort_key **pkey = (struct sort_key **)stupid_compiler; 170 171 while (*pkey) pkey = &((*pkey)->next_key); 172 return *pkey = xzalloc(sizeof(struct sort_key)); 173 } 174 175 // Perform actual comparison 176 static int compare_values(int flags, char *x, char *y) 177 { 178 int ff = flags & (FLAG_n|FLAG_g|FLAG_M|FLAG_x); 179 180 // Ascii sort 181 if (!ff) return strcmp(x, y); 182 183 if (CFG_SORT_FLOAT && ff == FLAG_g) { 184 char *xx,*yy; 185 double dx = strtod(x,&xx), dy = strtod(y,&yy); 186 int xinf, yinf; 187 188 // not numbers < NaN < -infinity < numbers < +infinity 189 190 if (x==xx) return y==yy ? 0 : -1; 191 if (y==yy) return 1; 192 193 // Check for isnan 194 if (dx!=dx) return (dy!=dy) ? 0 : -1; 195 if (dy!=dy) return 1; 196 197 // Check for infinity. (Could underflow, but avoids needing libm.) 198 xinf = (1.0/dx == 0.0); 199 yinf = (1.0/dy == 0.0); 200 if (xinf) { 201 if(dx<0) return (yinf && dy<0) ? 0 : -1; 202 return (yinf && dy>0) ? 0 : 1; 203 } 204 if (yinf) return dy<0 ? 1 : -1; 205 206 return dx>dy ? 1 : (dx<dy ? -1 : 0); 207 } else if (CFG_SORT_BIG && ff == FLAG_M) { 208 struct tm thyme; 209 int dx; 210 char *xx,*yy; 211 212 xx = strptime(x,"%b",&thyme); 213 dx = thyme.tm_mon; 214 yy = strptime(y,"%b",&thyme); 215 if (!xx) return !yy ? 0 : -1; 216 else if (!yy) return 1; 217 else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon; 218 219 } else if (CFG_SORT_BIG && ff == FLAG_x) { 220 return strtol(x, NULL, 16)-strtol(y, NULL, 16); 221 // This has to be ff == FLAG_n 222 } else { 223 // Full floating point version of -n 224 if (CFG_SORT_FLOAT) { 225 double dx = atof(x), dy = atof(y); 226 227 return dx>dy ? 1 : (dx<dy ? -1 : 0); 228 // Integer version of -n for tiny systems 229 } else return atoi(x)-atoi(y); 230 } 231 } 232 233 // Callback from qsort(): Iterate through key_list and perform comparisons. 234 static int compare_keys(const void *xarg, const void *yarg) 235 { 236 int flags = toys.optflags, retval = 0; 237 char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg; 238 struct sort_key *key; 239 240 if (CFG_SORT_BIG) { 241 for (key=(struct sort_key *)TT.key_list; !retval && key; 242 key = key->next_key) 243 { 244 flags = key->flags ? key->flags : toys.optflags; 245 246 // Chop out and modify key chunks, handling -dfib 247 248 x = get_key_data(xx, key, flags); 249 y = get_key_data(yy, key, flags); 250 251 retval = compare_values(flags, x, y); 252 253 // Free the copies get_key_data() made. 254 255 if (x != xx) free(x); 256 if (y != yy) free(y); 257 258 if (retval) break; 259 } 260 } else retval = compare_values(flags, xx, yy); 261 262 // Perform fallback sort if necessary 263 if (!retval && !(CFG_SORT_BIG && (toys.optflags&FLAG_s))) { 264 retval = strcmp(xx, yy); 265 flags = toys.optflags; 266 } 267 268 return retval * ((flags&FLAG_r) ? -1 : 1); 269 } 270 271 // Callback from loopfiles to handle input files. 272 static void sort_read(int fd, char *name) 273 { 274 // Read each line from file, appending to a big array. 275 276 for (;;) { 277 char * line = (CFG_SORT_BIG && (toys.optflags&FLAG_z)) 278 ? get_rawline(fd, NULL, 0) : get_line(fd); 279 280 if (!line) break; 281 282 // handle -c here so we don't allocate more memory than necessary. 283 if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) { 284 int j = (toys.optflags&FLAG_u) ? -1 : 0; 285 286 if (TT.lines && compare_keys((void *)&TT.lines, &line)>j) 287 error_exit("%s: Check line %d\n", name, TT.linecount); 288 free(TT.lines); 289 TT.lines = (char **)line; 290 } else { 291 if (!(TT.linecount&63)) 292 TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64)); 293 TT.lines[TT.linecount] = line; 294 } 295 TT.linecount++; 296 } 297 } 298 299 void sort_main(void) 300 { 301 int idx, fd = 1; 302 303 // Open output file if necessary. 304 if (CFG_SORT_BIG && TT.outfile) 305 fd = xcreate(TT.outfile, O_CREAT|O_TRUNC|O_WRONLY, 0666); 306 307 // Parse -k sort keys. 308 if (CFG_SORT_BIG && TT.raw_keys) { 309 struct arg_list *arg; 310 311 for (arg = TT.raw_keys; arg; arg = arg->next) { 312 struct sort_key *key = add_key(); 313 char *temp; 314 int flag; 315 316 idx = 0; 317 temp = arg->arg; 318 while (*temp) { 319 // Start of range 320 key->range[2*idx] = (unsigned)strtol(temp, &temp, 10); 321 if (*temp=='.') 322 key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10); 323 324 // Handle flags appended to a key type. 325 for (;*temp;temp++) { 326 char *temp2, *optlist; 327 328 // Note that a second comma becomes an "Unknown key" error. 329 330 if (*temp==',' && !idx++) { 331 temp++; 332 break; 333 } 334 335 // Which flag is this? 336 337 optlist = toys.which->options; 338 temp2 = strchr(optlist, *temp); 339 flag = (1<<(optlist-temp2+strlen(optlist)-1)); 340 341 // Was it a flag that can apply to a key? 342 343 if (!temp2 || flag>FLAG_b 344 || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z))) 345 { 346 error_exit("Unknown key option."); 347 } 348 // b after , means strip _trailing_ space, not leading. 349 if (idx && flag==FLAG_b) flag = FLAG_bb; 350 key->flags |= flag; 351 } 352 } 353 } 354 } 355 356 // global b flag strips both leading and trailing spaces 357 if (toys.optflags&FLAG_b) toys.optflags |= FLAG_bb; 358 359 // If no keys, perform alphabetic sort over the whole line. 360 if (CFG_SORT_BIG && !TT.key_list) add_key()->range[0] = 1; 361 362 // Open input files and read data, populating TT.lines[TT.linecount] 363 loopfiles(toys.optargs, sort_read); 364 365 // The compare (-c) logic was handled in sort_read(), 366 // so if we got here, we're done. 367 if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) goto exit_now; 368 369 // Perform the actual sort 370 qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys); 371 372 // handle unique (-u) 373 if (toys.optflags&FLAG_u) { 374 int jdx; 375 376 for (jdx=0, idx=1; idx<TT.linecount; idx++) { 377 if (!compare_keys(&TT.lines[jdx], &TT.lines[idx])) 378 free(TT.lines[idx]); 379 else TT.lines[++jdx] = TT.lines[idx]; 380 } 381 if (TT.linecount) TT.linecount = jdx+1; 382 } 383 384 // Output result 385 for (idx = 0; idx<TT.linecount; idx++) { 386 char *s = TT.lines[idx]; 387 xwrite(fd, s, strlen(s)); 388 if (CFG_TOYBOX_FREE) free(s); 389 xwrite(fd, "\n", 1); 390 } 391 392 exit_now: 393 if (CFG_TOYBOX_FREE) { 394 if (fd != 1) close(fd); 395 free(TT.lines); 396 } 397 } 398