Home | History | Annotate | Download | only in posix
      1 /* cut.c - Cut from a file.
      2  *
      3  * Copyright 2012 Ranjan Kumar <ranjankumar.bth (at) gmail.com>
      4  * Copyright 2012 Kyungwan Han <asura321 (at) gmail.com>
      5  *
      6  * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/cut.html
      7  *
      8  * TODO: cleanup
      9 
     10 USE_CUT(NEWTOY(cut, "b:|c:|f:|d:sn[!cbf]", TOYFLAG_USR|TOYFLAG_BIN))
     11 
     12 config CUT
     13   bool "cut"
     14   default y
     15   help
     16     usage: cut OPTION... [FILE]...
     17 
     18     Print selected parts of lines from each FILE to standard output.
     19 
     20     -b LIST	select only these bytes from LIST.
     21     -c LIST	select only these characters from LIST.
     22     -f LIST	select only these fields.
     23     -d DELIM	use DELIM instead of TAB for field delimiter.
     24     -s	do not print lines not containing delimiters.
     25     -n	don't split multibyte characters (Ignored).
     26 */
     27 #define FOR_cut
     28 #include "toys.h"
     29 
     30 GLOBALS(
     31   char *delim;
     32   char *flist;
     33   char *clist;
     34   char *blist;
     35 
     36   void *slist_head;
     37   unsigned nelem;
     38   void (*do_cut)(int fd);
     39 )
     40 
     41 struct slist {
     42   struct slist *next;
     43   int start, end;
     44 };
     45 
     46 static void add_to_list(int start, int end)
     47 {
     48   struct slist *current, *head_ref, *temp1_node;
     49 
     50   head_ref = TT.slist_head;
     51   temp1_node = xzalloc(sizeof(struct slist));
     52   temp1_node->start = start;
     53   temp1_node->end = end;
     54 
     55   /* Special case for the head end */
     56   if (!head_ref || head_ref->start >= start) {
     57       temp1_node->next = head_ref;
     58       head_ref = temp1_node;
     59   } else {
     60     /* Locate the node before the point of insertion */
     61     current = head_ref;
     62     while (current->next && current->next->start < temp1_node->start)
     63         current = current->next;
     64     temp1_node->next = current->next;
     65     current->next = temp1_node;
     66   }
     67   TT.slist_head = head_ref;
     68 }
     69 
     70 // parse list and add to slist.
     71 static void parse_list(char *list)
     72 {
     73   for (;;) {
     74     char *ctoken = strsep(&list, ","), *dtoken;
     75     int start = 0, end = INT_MAX;
     76 
     77     if (!ctoken) break;
     78     if (!*ctoken) continue;
     79 
     80     // Get start position.
     81     if (*(dtoken = strsep(&ctoken, "-"))) {
     82       start = atolx_range(dtoken, 0, INT_MAX);
     83       start = (start?(start-1):start);
     84     }
     85 
     86     // Get end position.
     87     if (!ctoken) end = -1; //case e.g. 1,2,3
     88     else if (*ctoken) {//case e.g. N-M
     89       end = atolx_range(ctoken, 0, INT_MAX);
     90       if (!end) end = INT_MAX;
     91       end--;
     92       if(end == start) end = -1;
     93     }
     94     add_to_list(start, end);
     95     TT.nelem++;
     96   }
     97   // if list is missing in command line.
     98   if (!TT.nelem) error_exit("missing positions list");
     99 }
    100 
    101 /*
    102  * retrive data from the file/s.
    103  */
    104 static void get_data(void)
    105 {
    106   char **argv = toys.optargs; //file name.
    107   toys.exitval = EXIT_SUCCESS;
    108 
    109   if(!*argv) TT.do_cut(0); //for stdin
    110   else {
    111     for(; *argv; ++argv) {
    112       if(strcmp(*argv, "-") == 0) TT.do_cut(0); //for stdin
    113       else {
    114         int fd = open(*argv, O_RDONLY, 0);
    115         if (fd < 0) {//if file not present then continue with other files.
    116           perror_msg_raw(*argv);
    117           continue;
    118         }
    119         TT.do_cut(fd);
    120         xclose(fd);
    121       }
    122     }
    123   }
    124 }
    125 
    126 // perform cut operation on the given delimiter.
    127 static void do_fcut(int fd)
    128 {
    129   char *buff, *pfield = 0, *delimiter = TT.delim;
    130 
    131   for (;;) {
    132     unsigned cpos = 0;
    133     int start, ndelimiters = -1;
    134     int  nprinted_fields = 0;
    135     struct slist *temp_node = TT.slist_head;
    136 
    137     free(pfield);
    138     pfield = 0;
    139 
    140     if (!(buff = get_line(fd))) break;
    141 
    142     //does line have any delimiter?.
    143     if (strrchr(buff, (int)delimiter[0]) == NULL) {
    144       //if not then print whole line and move to next line.
    145       if (!(toys.optflags & FLAG_s)) xputs(buff);
    146       continue;
    147     }
    148 
    149     pfield = xzalloc(strlen(buff) + 1);
    150 
    151     if (temp_node) {
    152       //process list on each line.
    153       while (cpos < TT.nelem && buff) {
    154         if (!temp_node) break;
    155         start = temp_node->start;
    156         do {
    157           char *field = 0;
    158 
    159           //count number of delimeters per line.
    160           while (buff) {
    161             if (ndelimiters < start) {
    162               ndelimiters++;
    163               field = strsep(&buff, delimiter);
    164             } else break;
    165           }
    166           //print field (if not yet printed).
    167           if (!pfield[ndelimiters]) {
    168             if (ndelimiters == start) {
    169               //put delimiter.
    170               if (nprinted_fields++ > 0) xputc(delimiter[0]);
    171               if (field) fputs(field, stdout);
    172               //make sure this field won't print again.
    173               pfield[ndelimiters] = (char) 0x23; //put some char at this position.
    174             }
    175           }
    176           start++;
    177           if ((temp_node->end < 0) || !buff) break;
    178         } while(start <= temp_node->end);
    179         temp_node = temp_node->next;
    180         cpos++;
    181       }
    182     }
    183     xputc('\n');
    184   }
    185 }
    186 
    187 // perform cut operation char or byte.
    188 static void do_bccut(int fd)
    189 {
    190   char *buff;
    191 
    192   while ((buff = get_line(fd)) != NULL) {
    193     unsigned cpos = 0;
    194     int buffln = strlen(buff);
    195     char *pfield = xzalloc(buffln + 1);
    196     struct slist *temp_node = TT.slist_head;
    197 
    198     if (temp_node != NULL) {
    199       while (cpos < TT.nelem) {
    200         int start;
    201 
    202         if (!temp_node) break;
    203         start = temp_node->start;
    204         while (start < buffln) {
    205           //to avoid duplicate field printing.
    206           if (pfield[start]) {
    207               if (++start <= temp_node->end) continue;
    208               temp_node = temp_node->next;
    209               break;
    210           } else {
    211             //make sure this field won't print again.
    212             pfield[start] = (char) 0x23; //put some char at this position.
    213             xputc(buff[start]);
    214           }
    215           if (++start > temp_node->end) {
    216             temp_node = temp_node->next;
    217             break;
    218           }
    219         }
    220         cpos++;
    221       }
    222       xputc('\n');
    223     }
    224     free(pfield);
    225     pfield = NULL;
    226   }
    227 }
    228 
    229 void cut_main(void)
    230 {
    231   char delimiter = '\t'; //default delimiter.
    232   char *list;
    233 
    234   TT.nelem = 0;
    235   TT.slist_head = NULL;
    236 
    237   //Get list and assign the function.
    238   if (toys.optflags & FLAG_f) {
    239     list = TT.flist;
    240     TT.do_cut = do_fcut;
    241   } else if (toys.optflags & FLAG_c) {
    242     list = TT.clist;
    243     TT.do_cut = do_bccut;
    244   } else {
    245     list = TT.blist;
    246     TT.do_cut = do_bccut;
    247   }
    248 
    249   if (toys.optflags & FLAG_d) {
    250     //delimiter must be 1 char.
    251     if(TT.delim[0] && TT.delim[1])
    252       perror_exit("the delimiter must be a single character");
    253     delimiter = TT.delim[0];
    254   }
    255 
    256   if(!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) {
    257     TT.delim = xzalloc(2);
    258     TT.delim[0] = delimiter;
    259   }
    260 
    261   //when field is not specified, cutting has some special handling.
    262   if (!(toys.optflags & FLAG_f)) {
    263     if (toys.optflags & FLAG_s)
    264       perror_exit("suppressing non-delimited lines operating on fields");
    265     if (delimiter != '\t')
    266       perror_exit("an input delimiter may be specified only when operating on fields");
    267   }
    268 
    269   parse_list(list);
    270   get_data();
    271   if (!(toys.optflags & FLAG_d) && (toys.optflags & FLAG_f)) {
    272     free(TT.delim);
    273     TT.delim = NULL;
    274   }
    275   llist_traverse(TT.slist_head, free);
    276 }
    277