Home | History | Annotate | Download | only in posix
      1 /* uniq.c - report or filter out repeated lines in a file
      2  *
      3  * Copyright 2012 Georgi Chorbadzhiyski <georgi (at) unixsol.org>
      4  *
      5  * See http://opengroup.org/onlinepubs/9699919799/utilities/uniq.html
      6 
      7 USE_UNIQ(NEWTOY(uniq, "f#s#w#zicdu", TOYFLAG_USR|TOYFLAG_BIN))
      8 
      9 config UNIQ
     10   bool "uniq"
     11   default y
     12   help
     13     usage: uniq [-cduiz] [-w maxchars] [-f fields] [-s char] [input_file [output_file]]
     14 
     15     Report or filter out repeated lines in a file
     16 
     17     -c	show counts before each line
     18     -d	show only lines that are repeated
     19     -u	show only lines that are unique
     20     -i	ignore case when comparing lines
     21     -z	lines end with \0 not \n
     22     -w	compare maximum X chars per line
     23     -f	ignore first X fields
     24     -s	ignore first X chars
     25 */
     26 
     27 #define FOR_uniq
     28 #include "toys.h"
     29 
     30 GLOBALS(
     31   long maxchars;
     32   long nchars;
     33   long nfields;
     34   long repeats;
     35 )
     36 
     37 static char *skip(char *str)
     38 {
     39   long nchars = TT.nchars, nfields;
     40 
     41   // Skip fields first
     42   for (nfields = TT.nfields; nfields; str++) {
     43     while (*str && isspace(*str)) str++;
     44     while (*str && !isspace(*str)) str++;
     45     nfields--;
     46   }
     47   // Skip chars
     48   while (*str && nchars--) str++;
     49 
     50   return str;
     51 }
     52 
     53 static void print_line(FILE *f, char *line)
     54 {
     55   if (toys.optflags & (TT.repeats ? FLAG_u : FLAG_d)) return;
     56   if (toys.optflags & FLAG_c) fprintf(f, "%7lu ", TT.repeats + 1);
     57   fputs(line, f);
     58   if (toys.optflags & FLAG_z) fputc(0, f);
     59 }
     60 
     61 void uniq_main(void)
     62 {
     63   FILE *infile = stdin, *outfile = stdout;
     64   char *thisline = NULL, *prevline = NULL, *tmpline, eol = '\n';
     65   size_t thissize, prevsize = 0, tmpsize;
     66 
     67   if (toys.optc >= 1) infile = xfopen(toys.optargs[0], "r");
     68   if (toys.optc >= 2) outfile = xfopen(toys.optargs[1], "w");
     69 
     70   if (toys.optflags & FLAG_z) eol = 0;
     71 
     72   // If first line can't be read
     73   if (getdelim(&prevline, &prevsize, eol, infile) < 0)
     74     return;
     75 
     76   while (getdelim(&thisline, &thissize, eol, infile) > 0) {
     77     int diff;
     78     char *t1, *t2;
     79 
     80     // If requested get the chosen fields + character offsets.
     81     if (TT.nfields || TT.nchars) {
     82       t1 = skip(thisline);
     83       t2 = skip(prevline);
     84     } else {
     85       t1 = thisline;
     86       t2 = prevline;
     87     }
     88 
     89     if (TT.maxchars == 0) {
     90       diff = !(toys.optflags & FLAG_i) ? strcmp(t1, t2) : strcasecmp(t1, t2);
     91     } else {
     92       diff = !(toys.optflags & FLAG_i) ? strncmp(t1, t2, TT.maxchars)
     93               : strncasecmp(t1, t2, TT.maxchars);
     94     }
     95 
     96     if (diff == 0) { // same
     97       TT.repeats++;
     98     } else {
     99       print_line(outfile, prevline);
    100 
    101       TT.repeats = 0;
    102 
    103       tmpline = prevline;
    104       prevline = thisline;
    105       thisline = tmpline;
    106 
    107       tmpsize = prevsize;
    108       prevsize = thissize;
    109       thissize = tmpsize;
    110     }
    111   }
    112 
    113   print_line(outfile, prevline);
    114 
    115   if (CFG_TOYBOX_FREE) {
    116     if (outfile != stdout) fclose(outfile);
    117     if (infile != stdin) fclose(infile);
    118     free(prevline);
    119     free(thisline);
    120   }
    121 }
    122