Home | History | Annotate | Download | only in pending
      1 /* tr.c - translate or delete characters
      2  *
      3  * Copyright 2014 Sandeep Sharma <sandeep.jack2756 (at) gmail.com>
      4  *
      5  * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/tr.html
      6 
      7 USE_TR(NEWTOY(tr, "^>2<1Ccsd[+cC]", TOYFLAG_USR|TOYFLAG_BIN))
      8 
      9 config TR
     10   bool "tr"
     11   default n
     12   help
     13     usage: tr [-cds] SET1 [SET2]
     14 
     15     Translate, squeeze, or delete characters from stdin, writing to stdout
     16 
     17     -c/-C  Take complement of SET1
     18     -d     Delete input characters coded SET1
     19     -s     Squeeze multiple output characters of SET2 into one character
     20 */
     21 
     22 #define FOR_tr
     23 #include "toys.h"
     24 
     25 GLOBALS(
     26   short map[256]; //map of chars
     27   int len1, len2;
     28 )
     29 
     30 enum {
     31   class_alpha, class_alnum, class_digit,
     32   class_lower,class_upper,class_space,class_blank,
     33   class_punct,class_cntrl,class_xdigit,class_invalid
     34 };
     35 
     36 static void map_translation(char *set1 , char *set2)
     37 {
     38   int i = TT.len1, k = 0;
     39 
     40   if (toys.optflags & FLAG_d)
     41     for (; i; i--, k++) TT.map[set1[k]] = set1[k]|0x100; //set delete bit
     42 
     43   if (toys.optflags & FLAG_s) {
     44     for (i = TT.len1, k = 0; i; i--, k++)
     45       TT.map[set1[k]] = TT.map[set1[k]]|0x200;
     46     for (i = TT.len2, k = 0; i; i--, k++)
     47       TT.map[set2[k]] = TT.map[set2[k]]|0x200;
     48   }
     49   i = k = 0;
     50   while (!(toys.optflags & FLAG_d) && set2 && TT.len1--) { //ignore set2 if -d present
     51     TT.map[set1[i]] = ((TT.map[set1[i]] & 0xFF00) | set2[k]);
     52     if (set2[k + 1]) k++;
     53     i++;
     54   }
     55 }
     56 
     57 static int handle_escape_char(char **esc_val) //taken from printf
     58 {
     59   char *ptr = *esc_val;
     60   int esc_length = 0;
     61   unsigned  base = 0, num = 0, result = 0, count = 0;
     62 
     63   if (*ptr == 'x') {
     64     ptr++;
     65     esc_length++;
     66     base = 16;
     67   } else if (isdigit(*ptr)) base = 8;
     68 
     69   while (esc_length < 3 && base) {
     70     num = tolower(*ptr) - '0';
     71     if (num > 10) num += ('0' - 'a' + 10);
     72     if (num >= base) {
     73       if (base == 16) {
     74         esc_length--;
     75         if (!esc_length) {// Invalid hex value eg. /xvd, print as it is /xvd
     76           result = '\\';
     77           ptr--;
     78         }
     79       }
     80       break;
     81     }
     82     esc_length++;
     83     count = result = (count * base) + num;
     84     ptr++;
     85   }
     86   if (base) {
     87     ptr--;
     88     *esc_val = ptr;
     89     return (char)result;
     90   } else {
     91     switch (*ptr) {
     92       case 'n':  result = '\n'; break;
     93       case 't':  result = '\t'; break;
     94       case 'e':  result = (char)27; break;
     95       case 'b':  result = '\b'; break;
     96       case 'a':  result = '\a'; break;
     97       case 'f':  result = '\f'; break;
     98       case 'v':  result = '\v'; break;
     99       case 'r':  result = '\r'; break;
    100       case '\\': result = '\\'; break;
    101       default :
    102         result = '\\';
    103         ptr--; // Let pointer pointing to / we will increment after returning.
    104         break;
    105     }
    106   }
    107   *esc_val = ptr;
    108   return (char)result;
    109 }
    110 
    111 static int find_class(char *class_name)
    112 {
    113   int i;
    114   static char *class[] = {
    115     "[:alpha:]","[:alnum:]","[:digit:]",
    116     "[:lower:]","[:upper:]","[:space:]",
    117     "[:blank:]","[:punct:]","[:cntrl:]",
    118     "[:xdigit:]","NULL"
    119   };
    120 
    121   for (i = 0; i != class_invalid; i++) {
    122     if (!memcmp(class_name, class[i], (class_name[0] == 'x')?10:9)) break;
    123   }
    124   return i;
    125 }
    126 
    127 static char *expand_set(char *arg, int *len)
    128 {
    129   int i = 0, j, k, size = 256;
    130   char *set = xzalloc(size*sizeof(char));
    131 
    132   while (*arg) {
    133 
    134     if (i >= size) {
    135       size += 256;
    136       set = xrealloc(set, size);
    137     }
    138     if (*arg == '\\') {
    139       arg++;
    140       set[i++] = (int)handle_escape_char(&arg);
    141       arg++;
    142       continue;
    143     }
    144     if (arg[1] == '-') {
    145       if (arg[2] == '\0') goto save;
    146       j = arg[0];
    147       k = arg[2];
    148       if (j > k) perror_exit("reverse colating order");
    149       while (j <= k) set[i++] = j++;
    150       arg += 3;
    151       continue;
    152     }
    153     if (arg[0] == '[' && arg[1] == ':') {
    154 
    155       if ((j = find_class(arg)) == class_invalid) goto save;
    156 
    157       if ((j == class_alpha) || (j == class_upper) || (j == class_alnum)) {
    158       for (k = 'A'; k <= 'Z'; k++) set[i++] = k;
    159       }
    160       if ((j == class_alpha) || (j == class_lower) || (j == class_alnum)) {
    161         for (k = 'a'; k <= 'z'; k++) set[i++] = k;
    162       }
    163       if ((j == class_alnum) || (j == class_digit) || (j == class_xdigit)) {
    164         for (k = '0'; k <= '9'; k++) set[i++] = k;
    165       }
    166       if (j == class_space || j == class_blank) {
    167         set[i++] = '\t';
    168         if (j == class_space) {
    169           set[i++] = '\n';
    170           set[i++] = '\f';
    171           set[i++] = '\r';
    172           set[i++] = '\v';
    173         }
    174         set[i++] = ' ';
    175       }
    176       if (j == class_punct) {
    177         for (k = 0; k <= 255; k++)
    178           if (ispunct(k)) set[i++] = k;
    179       }
    180       if (j == class_cntrl) {
    181         for (k = 0; k <= 255; k++)
    182           if (iscntrl(k)) set[i++] = k;
    183       }
    184       if (j == class_xdigit) {
    185         for (k = 'A'; k <= 'F'; k++) {
    186           set[i + 6] = k | 0x20;
    187           set[i++] = k;
    188         }
    189         i += 6;
    190         arg += 10;
    191         continue;
    192       }
    193 
    194       arg += 9; //never here for class_xdigit.
    195       continue;
    196     }
    197     if (arg[0] == '[' && arg[1] == '=') { //[=char=] only
    198       arg += 2;
    199       if (*arg) set[i++] = *arg;
    200       if (!arg[1] || arg[1] != '=' || arg[2] != ']')
    201         error_exit("bad equiv class");
    202       continue;
    203     }
    204 save:
    205     set[i++] = *arg++;
    206   }
    207   *len = i;
    208   return set;
    209 }
    210 
    211 static void print_map(char *set1, char *set2)
    212 {
    213   int r = 0, i, prev_char = -1;
    214 
    215   while (1)
    216   {
    217     i = 0;
    218     r = read(STDIN_FILENO, (toybuf), sizeof(toybuf));
    219     if (!r) break;
    220     for (;r > i;i++) {
    221 
    222       if ((toys.optflags & FLAG_d) && (TT.map[(int)toybuf[i]] & 0x100)) continue;
    223       if (toys.optflags & FLAG_s) {
    224         if ((TT.map[(int)toybuf[i]] & 0x200) &&
    225             (prev_char == TT.map[(int)toybuf[i]])) {
    226           continue;
    227         }
    228       }
    229       xputc(TT.map[(int)toybuf[i]] & 0xFF);
    230       prev_char = TT.map[(int)toybuf[i]];
    231       fflush(stdout);
    232     }
    233   }
    234 }
    235 
    236 static void do_complement(char **set)
    237 {
    238   int i, j;
    239   char *comp = xmalloc(256);
    240 
    241   for (i = 0, j = 0;i < 256; i++) {
    242     if (memchr(*set, i, TT.len1)) continue;
    243     else comp[j++] = (char)i;
    244   }
    245   free(*set);
    246   TT.len1 = j;
    247   *set = comp;
    248 }
    249 
    250 void tr_main(void)
    251 {
    252   char *set1, *set2 = NULL;
    253   int i;
    254 
    255   for (i = 0; i < 256; i++) TT.map[i] = i; //init map
    256 
    257   set1 = expand_set(toys.optargs[0], &TT.len1);
    258   if (toys.optflags & FLAG_c) do_complement(&set1);
    259   if (toys.optargs[1]) {
    260     if (toys.optargs[1][0] == '\0') error_exit("set2 can't be empty string");
    261     set2 = expand_set(toys.optargs[1], &TT.len2);
    262   }
    263   map_translation(set1, set2);
    264 
    265   print_map(set1, set2);
    266   free(set1);
    267   free(set2);
    268 }
    269