Home | History | Annotate | Download | only in lib
      1 /* args.c - Command line argument parsing.
      2  *
      3  * Copyright 2006 Rob Landley <rob (at) landley.net>
      4  */
      5 
      6 // NOTE: If option parsing segfaults, switch on TOYBOX_DEBUG in menuconfig.
      7 
      8 // Enabling TOYBOX_DEBUG in .config adds syntax checks to option string parsing
      9 // which aren't needed in the final code (your option string is hardwired and
     10 // should be correct when you ship), but are useful for development.
     11 
     12 #include "toys.h"
     13 
     14 // Design goals:
     15 //   Don't use getopt() out of libc.
     16 //   Don't permute original arguments (screwing up ps/top output).
     17 //   Integrated --long options "(noshort)a(along)b(blong1)(blong2)"
     18 
     19 /* This uses a getopt-like option string, but not getopt() itself. We call
     20  * it the get_opt string.
     21  *
     22  * Each option in the get_opt string corresponds to a bit position in the
     23  * return value. The rightmost argument is (1<<0), the next to last is (1<<1)
     24  * and so on. If the option isn't seen in argv[], its bit remains 0.
     25  *
     26  * Options which have an argument fill in the corresponding slot in the global
     27  * union "this" (see generated/globals.h), which it treats as an array of longs
     28  * (note that sizeof(long)==sizeof(pointer) is guaranteed by LP64).
     29  *
     30  * You don't have to free the option strings, which point into the environment
     31  * space. List objects should be freed by main() when command_main() returns.
     32  *
     33  * Example:
     34  *   Calling get_optflags() when toys.which->options="ab:c:d" and
     35  *   argv = ["command", "-b", "fruit", "-d", "walrus"] results in:
     36  *
     37  *     Changes to struct toys:
     38  *       toys.optflags = 5 (I.E. 0101 so -b = 4 | -d = 1)
     39  *       toys.optargs[0] = "walrus" (leftover argument)
     40  *       toys.optargs[1] = NULL (end of list)
     41  *       toys.optc = 1 (there was 1 leftover argument)
     42  *
     43  *     Changes to union this:
     44  *       this[0]=NULL (because -c didn't get an argument this time)
     45  *       this[1]="fruit" (argument to -b)
     46  */
     47 
     48 // What you can put in a get_opt string:
     49 //   Any otherwise unused character (all letters, unprefixed numbers) specify
     50 //   an option that sets a flag. The bit value is the same as the binary digit
     51 //   if you string the option characters together in order.
     52 //   So in "abcdefgh" a = 128, h = 1
     53 //
     54 //   Suffixes specify that this option takes an argument (stored in GLOBALS):
     55 //       Note that pointer and long are always the same size, even on 64 bit.
     56 //     : plus a string argument, keep most recent if more than one
     57 //     * plus a string argument, appended to a list
     58 //     # plus a signed long argument
     59 //       <LOW     - die if less than LOW
     60 //       >HIGH    - die if greater than HIGH
     61 //       =DEFAULT - value if not specified
     62 //     - plus a signed long argument defaulting to negative (say + for positive)
     63 //     . plus a double precision floating point argument (with CFG_TOYBOX_FLOAT)
     64 //       Chop this option out with USE_TOYBOX_FLOAT() in option string
     65 //       Same <LOW>HIGH=DEFAULT as #
     66 //     @ plus an occurrence counter (which is a long)
     67 //     (longopt)
     68 //     | this is required. If more than one marked, only one required.
     69 //     ; long option's argument is optional (can only be supplied with --opt=)
     70 //     ^ Stop parsing after encountering this argument
     71 //    " " (space char) the "plus an argument" must be separate
     72 //        I.E. "-j 3" not "-j3". So "kill -stop" != "kill -s top"
     73 //
     74 //   At the beginning of the get_opt string (before any options):
     75 //     ^ stop at first nonoption argument
     76 //     <0 die if less than # leftover arguments (default 0)
     77 //     >9 die if > # leftover arguments (default MAX_INT)
     78 //     ? Allow unknown arguments (pass them through to command).
     79 //     & first argument has imaginary dash (ala tar/ps)
     80 //       If given twice, all arguments have imaginary dash
     81 //
     82 //   At the end: [groups] of previously seen options
     83 //     - Only one in group (switch off)    [-abc] means -ab=-b, -ba=-a, -abc=-c
     84 //     + Synonyms (switch on all)          [+abc] means -ab=-abc, -c=-abc
     85 //     ! More than one in group is error   [!abc] means -ab calls error_exit()
     86 //       primarily useful if you can switch things back off again.
     87 
     88 // Notes from getopt man page
     89 //   - and -- cannot be arguments.
     90 //     -- force end of arguments
     91 //     - is a synonym for stdin in file arguments
     92 //   -abcd means -a -b -c -d (but if -b takes an argument, then it's -a -b cd)
     93 
     94 // Linked list of all known options (option string parsed into this).
     95 // Hangs off getoptflagstate, freed at end of option parsing.
     96 struct opts {
     97   struct opts *next;
     98   long *arg;         // Pointer into union "this" to store arguments at.
     99   int c;             // Argument character to match
    100   int flags;         // |=1, ^=2, " "=4, ;=8
    101   unsigned long long dex[3]; // bits to disable/enable/exclude in toys.optflags
    102   char type;         // Type of arguments to store union "this"
    103   union {
    104     long l;
    105     FLOAT f;
    106   } val[3];          // low, high, default - range of allowed values
    107 };
    108 
    109 // linked list of long options. (Hangs off getoptflagstate, free at end of
    110 // option parsing, details about flag to set and global slot to fill out
    111 // stored in related short option struct, but if opt->c = -1 the long option
    112 // is "bare" (has no corresponding short option).
    113 struct longopts {
    114   struct longopts *next;
    115   struct opts *opt;
    116   char *str;
    117   int len;
    118 };
    119 
    120 // State during argument parsing.
    121 struct getoptflagstate
    122 {
    123   int argc, minargs, maxargs, nodash;
    124   char *arg;
    125   struct opts *opts;
    126   struct longopts *longopts;
    127   int noerror, nodash_now, stopearly;
    128   unsigned excludes, requires;
    129 };
    130 
    131 // Use getoptflagstate to parse parse one command line option from argv
    132 static int gotflag(struct getoptflagstate *gof, struct opts *opt)
    133 {
    134   int type;
    135 
    136   // Did we recognize this option?
    137   if (!opt) {
    138     if (gof->noerror) return 1;
    139     error_exit("Unknown option %s", gof->arg);
    140   }
    141 
    142   // Might enabling this switch off something else?
    143   if (toys.optflags & opt->dex[0]) {
    144     struct opts *clr;
    145     unsigned long long i = 1;
    146 
    147     // Forget saved argument for flag we switch back off
    148     for (clr=gof->opts, i=1; clr; clr = clr->next, i<<=1)
    149       if (clr->arg && (i & toys.optflags & opt->dex[0])) *clr->arg = 0;
    150     toys.optflags &= ~opt->dex[0];
    151   }
    152 
    153   // Set flags
    154   toys.optflags |= opt->dex[1];
    155   gof->excludes |= opt->dex[2];
    156   if (opt->flags&2) gof->stopearly=2;
    157 
    158   if (toys.optflags & gof->excludes) {
    159     struct opts *bad;
    160     unsigned i = 1;
    161 
    162     for (bad=gof->opts, i=1; bad ;bad = bad->next, i<<=1) {
    163       if (opt == bad || !(i & toys.optflags)) continue;
    164       if (toys.optflags & bad->dex[2]) break;
    165     }
    166     if (bad) error_exit("No '%c' with '%c'", opt->c, bad->c);
    167   }
    168 
    169   // Does this option take an argument?
    170   if (!gof->arg) {
    171     if (opt->flags & 8) return 0;
    172     gof->arg = "";
    173   } else gof->arg++;
    174   type = opt->type;
    175 
    176   if (type == '@') ++*(opt->arg);
    177   else if (type) {
    178     char *arg = gof->arg;
    179 
    180     // Handle "-xblah" and "-x blah", but also a third case: "abxc blah"
    181     // to make "tar xCjfv blah1 blah2 thingy" work like
    182     // "tar -x -C blah1 -j -f blah2 -v thingy"
    183 
    184     if (gof->nodash_now || (!arg[0] && !(opt->flags & 8)))
    185       arg = toys.argv[++gof->argc];
    186     if (!arg) {
    187       char *s = "Missing argument to ";
    188       struct longopts *lo;
    189 
    190       if (opt->c != -1) error_exit("%s-%c", s, opt->c);
    191 
    192       for (lo = gof->longopts; lo->opt != opt; lo = lo->next);
    193       error_exit("%s--%.*s", s, lo->len, lo->str);
    194     }
    195 
    196     if (type == ':') *(opt->arg) = (long)arg;
    197     else if (type == '*') {
    198       struct arg_list **list;
    199 
    200       list = (struct arg_list **)opt->arg;
    201       while (*list) list=&((*list)->next);
    202       *list = xzalloc(sizeof(struct arg_list));
    203       (*list)->arg = arg;
    204     } else if (type == '#' || type == '-') {
    205       long l = atolx(arg);
    206       if (type == '-' && !ispunct(*arg)) l*=-1;
    207       if (l < opt->val[0].l) error_exit("-%c < %ld", opt->c, opt->val[0].l);
    208       if (l > opt->val[1].l) error_exit("-%c > %ld", opt->c, opt->val[1].l);
    209 
    210       *(opt->arg) = l;
    211     } else if (CFG_TOYBOX_FLOAT && type == '.') {
    212       FLOAT *f = (FLOAT *)(opt->arg);
    213 
    214       *f = strtod(arg, &arg);
    215       if (opt->val[0].l != LONG_MIN && *f < opt->val[0].f)
    216         error_exit("-%c < %lf", opt->c, (double)opt->val[0].f);
    217       if (opt->val[1].l != LONG_MAX && *f > opt->val[1].f)
    218         error_exit("-%c > %lf", opt->c, (double)opt->val[1].f);
    219     }
    220 
    221     if (!gof->nodash_now) gof->arg = "";
    222   }
    223 
    224   return 0;
    225 }
    226 
    227 // Parse this command's options string into struct getoptflagstate, which
    228 // includes a struct opts linked list in reverse order (I.E. right-to-left)
    229 void parse_optflaglist(struct getoptflagstate *gof)
    230 {
    231   char *options = toys.which->options;
    232   long *nextarg = (long *)&this;
    233   struct opts *new = 0;
    234   int idx;
    235 
    236   // Parse option format string
    237   memset(gof, 0, sizeof(struct getoptflagstate));
    238   gof->maxargs = INT_MAX;
    239   if (!options) return;
    240 
    241   // Parse leading special behavior indicators
    242   for (;;) {
    243     if (*options == '^') gof->stopearly++;
    244     else if (*options == '<') gof->minargs=*(++options)-'0';
    245     else if (*options == '>') gof->maxargs=*(++options)-'0';
    246     else if (*options == '?') gof->noerror++;
    247     else if (*options == '&') gof->nodash++;
    248     else break;
    249     options++;
    250   }
    251 
    252   // Parse option string into a linked list of options with attributes.
    253 
    254   if (!*options) gof->stopearly++;
    255   while (*options) {
    256     char *temp;
    257 
    258     // Option groups come after all options are defined
    259     if (*options == '[') break;
    260 
    261     // Allocate a new list entry when necessary
    262     if (!new) {
    263       new = xzalloc(sizeof(struct opts));
    264       new->next = gof->opts;
    265       gof->opts = new;
    266       new->val[0].l = LONG_MIN;
    267       new->val[1].l = LONG_MAX;
    268     }
    269     // Each option must start with "(" or an option character.  (Bare
    270     // longopts only come at the start of the string.)
    271     if (*options == '(' && new->c != -1) {
    272       char *end;
    273       struct longopts *lo;
    274 
    275       // Find the end of the longopt
    276       for (end = ++options; *end && *end != ')'; end++);
    277       if (CFG_TOYBOX_DEBUG && !*end) error_exit("(longopt) didn't end");
    278 
    279       // init a new struct longopts
    280       lo = xmalloc(sizeof(struct longopts));
    281       lo->next = gof->longopts;
    282       lo->opt = new;
    283       lo->str = options;
    284       lo->len = end-options;
    285       gof->longopts = lo;
    286       options = ++end;
    287 
    288       // Mark this struct opt as used, even when no short opt.
    289       if (!new->c) new->c = -1;
    290 
    291       continue;
    292 
    293     // If this is the start of a new option that wasn't a longopt,
    294 
    295     } else if (strchr(":*#@.-", *options)) {
    296       if (CFG_TOYBOX_DEBUG && new->type)
    297         error_exit("multiple types %c:%c%c", new->c, new->type, *options);
    298       new->type = *options;
    299     } else if (-1 != (idx = stridx("|^ ;", *options))) new->flags |= 1<<idx;
    300     // bounds checking
    301     else if (-1 != (idx = stridx("<>=", *options))) {
    302       if (new->type == '#') {
    303         long l = strtol(++options, &temp, 10);
    304         if (temp != options) new->val[idx].l = l;
    305       } else if (CFG_TOYBOX_FLOAT && new->type == '.') {
    306         FLOAT f = strtod(++options, &temp);
    307         if (temp != options) new->val[idx].f = f;
    308       } else if (CFG_TOYBOX_DEBUG) error_exit("<>= only after .#");
    309       options = --temp;
    310 
    311     // At this point, we've hit the end of the previous option.  The
    312     // current character is the start of a new option.  If we've already
    313     // assigned an option to this struct, loop to allocate a new one.
    314     // (It'll get back here afterwards and fall through to next else.)
    315     } else if (new->c) {
    316       new = 0;
    317       continue;
    318 
    319     // Claim this option, loop to see what's after it.
    320     } else new->c = *options;
    321 
    322     options++;
    323   }
    324 
    325   // Initialize enable/disable/exclude masks and pointers to store arguments.
    326   // (This goes right to left so we need the whole list before we can start.)
    327   idx = 0;
    328   for (new = gof->opts; new; new = new->next) {
    329     unsigned long long u = 1L<<idx++;
    330 
    331     if (new->c == 1) new->c = 0;
    332     new->dex[1] = u;
    333     if (new->flags & 1) gof->requires |= u;
    334     if (new->type) {
    335       new->arg = (void *)nextarg;
    336       *(nextarg++) = new->val[2].l;
    337     }
    338   }
    339 
    340   // Parse trailing group indicators
    341   while (*options) {
    342     unsigned bits = 0;
    343 
    344     if (CFG_TOYBOX_DEBUG && *options != '[') error_exit("trailing %s", options);
    345 
    346     idx = stridx("-+!", *++options);
    347     if (CFG_TOYBOX_DEBUG && idx == -1) error_exit("[ needs +-!");
    348     if (CFG_TOYBOX_DEBUG && (options[1] == ']' || !options[1]))
    349       error_exit("empty []");
    350 
    351     // Don't advance past ] but do process it once in loop.
    352     while (*options++ != ']') {
    353       struct opts *opt;
    354       int i;
    355 
    356       if (CFG_TOYBOX_DEBUG && !*options) error_exit("[ without ]");
    357       // Find this option flag (in previously parsed struct opt)
    358       for (i=0, opt = gof->opts; ; i++, opt = opt->next) {
    359         if (*options == ']') {
    360           if (!opt) break;
    361           if (bits&(1<<i)) opt->dex[idx] |= bits&~(1<<i);
    362         } else {
    363           if (*options==1) break;
    364           if (CFG_TOYBOX_DEBUG && !opt)
    365             error_exit("[] unknown target %c", *options);
    366           if (opt->c == *options) {
    367             bits |= 1<<i;
    368             break;
    369           }
    370         }
    371       }
    372     }
    373   }
    374 }
    375 
    376 // Fill out toys.optflags, toys.optargs, and this[] from toys.argv
    377 
    378 void get_optflags(void)
    379 {
    380   struct getoptflagstate gof;
    381   struct opts *catch;
    382   unsigned long long saveflags;
    383   char *letters[]={"s",""};
    384 
    385   // Option parsing is a two stage process: parse the option string into
    386   // a struct opts list, then use that list to process argv[];
    387 
    388   // Allocate memory for optargs
    389   saveflags = 0;
    390   while (toys.argv[saveflags++]);
    391   toys.optargs = xzalloc(sizeof(char *)*saveflags);
    392 
    393   parse_optflaglist(&gof);
    394 
    395   // Iterate through command line arguments, skipping argv[0]
    396   for (gof.argc=1; toys.argv[gof.argc]; gof.argc++) {
    397     gof.arg = toys.argv[gof.argc];
    398     catch = NULL;
    399 
    400     // Parse this argument
    401     if (gof.stopearly>1) goto notflag;
    402 
    403     gof.nodash_now = 0;
    404 
    405     // Various things with dashes
    406     if (*gof.arg == '-') {
    407 
    408       // Handle -
    409       if (!gof.arg[1]) goto notflag;
    410       gof.arg++;
    411       if (*gof.arg=='-') {
    412         struct longopts *lo;
    413 
    414         gof.arg++;
    415         // Handle --
    416         if (!*gof.arg) {
    417           gof.stopearly += 2;
    418           continue;
    419         }
    420 
    421         // do we match a known --longopt?
    422         for (lo = gof.longopts; lo; lo = lo->next) {
    423           if (!strncmp(gof.arg, lo->str, lo->len)) {
    424             if (!gof.arg[lo->len]) gof.arg = 0;
    425             else if (gof.arg[lo->len] == '=' && lo->opt->type)
    426               gof.arg += lo->len;
    427             else continue;
    428             // It's a match.
    429             catch = lo->opt;
    430             break;
    431           }
    432         }
    433 
    434         // Should we handle this --longopt as a non-option argument?
    435         if (!lo && gof.noerror) {
    436           gof.arg -= 2;
    437           goto notflag;
    438         }
    439 
    440         // Long option parsed, handle option.
    441         gotflag(&gof, catch);
    442         continue;
    443       }
    444 
    445     // Handle things that don't start with a dash.
    446     } else {
    447       if (gof.nodash && (gof.nodash>1 || gof.argc == 1)) gof.nodash_now = 1;
    448       else goto notflag;
    449     }
    450 
    451     // At this point, we have the args part of -args.  Loop through
    452     // each entry (could be -abc meaning -a -b -c)
    453     saveflags = toys.optflags;
    454     while (*gof.arg) {
    455 
    456       // Identify next option char.
    457       for (catch = gof.opts; catch; catch = catch->next)
    458         if (*gof.arg == catch->c)
    459           if (!((catch->flags&4) && gof.arg[1])) break;
    460 
    461       // Handle option char (advancing past what was used)
    462       if (gotflag(&gof, catch) ) {
    463         toys.optflags = saveflags;
    464         gof.arg = toys.argv[gof.argc];
    465         goto notflag;
    466       }
    467     }
    468     continue;
    469 
    470     // Not a flag, save value in toys.optargs[]
    471 notflag:
    472     if (gof.stopearly) gof.stopearly++;
    473     toys.optargs[toys.optc++] = toys.argv[gof.argc];
    474   }
    475 
    476   // Sanity check
    477   if (toys.optc<gof.minargs)
    478     error_exit("Need%s %d argument%s", letters[!!(gof.minargs-1)],
    479       gof.minargs, letters[!(gof.minargs-1)]);
    480   if (toys.optc>gof.maxargs)
    481     error_exit("Max %d argument%s", gof.maxargs, letters[!(gof.maxargs-1)]);
    482   if (gof.requires && !(gof.requires & toys.optflags)) {
    483     struct opts *req;
    484     char needs[32], *s = needs;
    485 
    486     for (req = gof.opts; req; req = req->next)
    487       if (req->flags & 1) *(s++) = req->c;
    488     *s = 0;
    489 
    490     error_exit("Needs %s-%s", s[1] ? "one of " : "", needs);
    491   }
    492 
    493   if (CFG_TOYBOX_FREE) {
    494     llist_traverse(gof.opts, free);
    495     llist_traverse(gof.longopts, free);
    496   }
    497 }
    498