Home | History | Annotate | Download | only in dbus
      1 /* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*- */
      2 /* dbus-shell.c Shell command line utility functions.
      3  *
      4  * Copyright (C) 2002, 2003  Red Hat, Inc.
      5  * Copyright (C) 2003 CodeFactory AB
      6  *
      7  * Licensed under the Academic Free License version 2.1
      8  *
      9  * This program is free software; you can redistribute it and/or modify
     10  * it under the terms of the GNU General Public License as published by
     11  * the Free Software Foundation; either version 2 of the License, or
     12  * (at your option) any later version.
     13  *
     14  * This program is distributed in the hope that it will be useful,
     15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     17  * GNU General Public License for more details.
     18  *
     19  * You should have received a copy of the GNU General Public License
     20  * along with this program; if not, write to the Free Software
     21  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
     22  *
     23  */
     24 
     25 #include <config.h>
     26 #include <string.h>
     27 #include "dbus-internals.h"
     28 #include "dbus-list.h"
     29 #include "dbus-memory.h"
     30 #include "dbus-protocol.h"
     31 #include "dbus-shell.h"
     32 #include "dbus-string.h"
     33 
     34 /* Single quotes preserve the literal string exactly. escape
     35  * sequences are not allowed; not even \' - if you want a '
     36  * in the quoted text, you have to do something like 'foo'\''bar'
     37  *
     38  * Double quotes allow $ ` " \ and newline to be escaped with backslash.
     39  * Otherwise double quotes preserve things literally.
     40  */
     41 
     42 static dbus_bool_t
     43 unquote_string_inplace (char* str, char** end)
     44 {
     45   char* dest;
     46   char* s;
     47   char quote_char;
     48 
     49   dest = s = str;
     50 
     51   quote_char = *s;
     52 
     53   if (!(*s == '"' || *s == '\''))
     54     {
     55       *end = str;
     56       return FALSE;
     57     }
     58 
     59   /* Skip the initial quote mark */
     60   ++s;
     61 
     62   if (quote_char == '"')
     63     {
     64       while (*s)
     65         {
     66           _dbus_assert(s > dest); /* loop invariant */
     67 
     68           switch (*s)
     69             {
     70             case '"':
     71               /* End of the string, return now */
     72               *dest = '\0';
     73               ++s;
     74               *end = s;
     75               return TRUE;
     76 
     77             case '\\':
     78               /* Possible escaped quote or \ */
     79               ++s;
     80               switch (*s)
     81                 {
     82                 case '"':
     83                 case '\\':
     84                 case '`':
     85                 case '$':
     86                 case '\n':
     87                   *dest = *s;
     88                   ++s;
     89                   ++dest;
     90                   break;
     91 
     92                 default:
     93                   /* not an escaped char */
     94                   *dest = '\\';
     95                   ++dest;
     96                   /* ++s already done. */
     97                   break;
     98                 }
     99               break;
    100 
    101             default:
    102               *dest = *s;
    103               ++dest;
    104               ++s;
    105               break;
    106             }
    107 
    108           _dbus_assert(s > dest); /* loop invariant */
    109         }
    110     }
    111   else
    112     {
    113       while (*s)
    114         {
    115           _dbus_assert(s > dest); /* loop invariant */
    116 
    117           if (*s == '\'')
    118             {
    119               /* End of the string, return now */
    120               *dest = '\0';
    121               ++s;
    122               *end = s;
    123               return TRUE;
    124             }
    125           else
    126             {
    127               *dest = *s;
    128               ++dest;
    129               ++s;
    130             }
    131 
    132           _dbus_assert(s > dest); /* loop invariant */
    133         }
    134     }
    135 
    136   /* If we reach here this means the close quote was never encountered */
    137 
    138   *dest = '\0';
    139 
    140   *end = s;
    141   return FALSE;
    142 }
    143 
    144 /**
    145  * Unquotes a string as the shell (/bin/sh) would. Only handles
    146  * quotes; if a string contains file globs, arithmetic operators,
    147  * variables, backticks, redirections, or other special-to-the-shell
    148  * features, the result will be different from the result a real shell
    149  * would produce (the variables, backticks, etc. will be passed
    150  * through literally instead of being expanded). This function is
    151  * guaranteed to succeed if applied to the result of
    152  * _dbus_shell_quote(). If it fails, it returns %NULL.
    153  * The @quoted_string need not actually contain quoted or
    154  * escaped text; _dbus_shell_unquote() simply goes through the string and
    155  * unquotes/unescapes anything that the shell would. Both single and
    156  * double quotes are handled, as are escapes including escaped
    157  * newlines. The return value must be freed with dbus_free().
    158  *
    159  * Shell quoting rules are a bit strange. Single quotes preserve the
    160  * literal string exactly. escape sequences are not allowed; not even
    161  * \' - if you want a ' in the quoted text, you have to do something
    162  * like 'foo'\''bar'.  Double quotes allow $, `, ", \, and newline to
    163  * be escaped with backslash. Otherwise double quotes preserve things
    164  * literally.
    165  *
    166  * @quoted_string: shell-quoted string
    167  **/
    168 char*
    169 _dbus_shell_unquote (const char *quoted_string)
    170 {
    171   char *unquoted;
    172   char *end;
    173   char *start;
    174   char *ret;
    175   DBusString retval;
    176 
    177   unquoted = _dbus_strdup (quoted_string);
    178   if (unquoted == NULL)
    179     return NULL;
    180 
    181   start = unquoted;
    182   end = unquoted;
    183   if (!_dbus_string_init (&retval))
    184     {
    185       dbus_free (unquoted);
    186       return NULL;
    187     }
    188 
    189   /* The loop allows cases such as
    190    * "foo"blah blah'bar'woo foo"baz"la la la\'\''foo'
    191    */
    192   while (*start)
    193     {
    194       /* Append all non-quoted chars, honoring backslash escape
    195        */
    196 
    197       while (*start && !(*start == '"' || *start == '\''))
    198         {
    199           if (*start == '\\')
    200             {
    201               /* all characters can get escaped by backslash,
    202                * except newline, which is removed if it follows
    203                * a backslash outside of quotes
    204                */
    205 
    206               ++start;
    207               if (*start)
    208                 {
    209                   if (*start != '\n')
    210 		    {
    211 		      if (!_dbus_string_append_byte (&retval, *start))
    212 			goto error;
    213 		    }
    214                   ++start;
    215                 }
    216             }
    217           else
    218             {
    219               if (!_dbus_string_append_byte (&retval, *start))
    220 		goto error;
    221               ++start;
    222             }
    223         }
    224 
    225       if (*start)
    226         {
    227           if (!unquote_string_inplace (start, &end))
    228 	    goto error;
    229           else
    230             {
    231               if (!_dbus_string_append (&retval, start))
    232 		goto error;
    233               start = end;
    234             }
    235         }
    236     }
    237 
    238   ret = _dbus_strdup (_dbus_string_get_data (&retval));
    239   if (!ret)
    240     goto error;
    241 
    242   dbus_free (unquoted);
    243   _dbus_string_free (&retval);
    244 
    245   return ret;
    246 
    247  error:
    248   dbus_free (unquoted);
    249   _dbus_string_free (&retval);
    250   return NULL;
    251 }
    252 
    253 /* _dbus_shell_parse_argv() does a semi-arbitrary weird subset of the way
    254  * the shell parses a command line. We don't do variable expansion,
    255  * don't understand that operators are tokens, don't do tilde expansion,
    256  * don't do command substitution, no arithmetic expansion, IFS gets ignored,
    257  * don't do filename globs, don't remove redirection stuff, etc.
    258  *
    259  * READ THE UNIX98 SPEC on "Shell Command Language" before changing
    260  * the behavior of this code.
    261  *
    262  * Steps to parsing the argv string:
    263  *
    264  *  - tokenize the string (but since we ignore operators,
    265  *    our tokenization may diverge from what the shell would do)
    266  *    note that tokenization ignores the internals of a quoted
    267  *    word and it always splits on spaces, not on IFS even
    268  *    if we used IFS. We also ignore "end of input indicator"
    269  *    (I guess this is control-D?)
    270  *
    271  *    Tokenization steps, from UNIX98 with operator stuff removed,
    272  *    are:
    273  *
    274  *    1) "If the current character is backslash, single-quote or
    275  *        double-quote (\, ' or ") and it is not quoted, it will affect
    276  *        quoting for subsequent characters up to the end of the quoted
    277  *        text. The rules for quoting are as described in Quoting
    278  *        . During token recognition no substitutions will be actually
    279  *        performed, and the result token will contain exactly the
    280  *        characters that appear in the input (except for newline
    281  *        character joining), unmodified, including any embedded or
    282  *        enclosing quotes or substitution operators, between the quote
    283  *        mark and the end of the quoted text. The token will not be
    284  *        delimited by the end of the quoted field."
    285  *
    286  *    2) "If the current character is an unquoted newline character,
    287  *        the current token will be delimited."
    288  *
    289  *    3) "If the current character is an unquoted blank character, any
    290  *        token containing the previous character is delimited and the
    291  *        current character will be discarded."
    292  *
    293  *    4) "If the previous character was part of a word, the current
    294  *        character will be appended to that word."
    295  *
    296  *    5) "If the current character is a "#", it and all subsequent
    297  *        characters up to, but excluding, the next newline character
    298  *        will be discarded as a comment. The newline character that
    299  *        ends the line is not considered part of the comment. The
    300  *        "#" starts a comment only when it is at the beginning of a
    301  *        token. Since the search for the end-of-comment does not
    302  *        consider an escaped newline character specially, a comment
    303  *        cannot be continued to the next line."
    304  *
    305  *    6) "The current character will be used as the start of a new word."
    306  *
    307  *
    308  *  - for each token (word), perform portions of word expansion, namely
    309  *    field splitting (using default whitespace IFS) and quote
    310  *    removal.  Field splitting may increase the number of words.
    311  *    Quote removal does not increase the number of words.
    312  *
    313  *   "If the complete expansion appropriate for a word results in an
    314  *   empty field, that empty field will be deleted from the list of
    315  *   fields that form the completely expanded command, unless the
    316  *   original word contained single-quote or double-quote characters."
    317  *    - UNIX98 spec
    318  *
    319  *
    320  */
    321 
    322 static dbus_bool_t
    323 delimit_token (DBusString *token,
    324                DBusList **retval,
    325 	       DBusError *error)
    326 {
    327   char *str;
    328 
    329   str = _dbus_strdup (_dbus_string_get_data (token));
    330   if (!str)
    331     {
    332       _DBUS_SET_OOM (error);
    333       return FALSE;
    334     }
    335 
    336   if (!_dbus_list_append (retval, str))
    337     {
    338       dbus_free (str);
    339       _DBUS_SET_OOM (error);
    340       return FALSE;
    341     }
    342 
    343   return TRUE;
    344 }
    345 
    346 static DBusList*
    347 tokenize_command_line (const char *command_line, DBusError *error)
    348 {
    349   char current_quote;
    350   const char *p;
    351   DBusString current_token;
    352   DBusList *retval = NULL;
    353   dbus_bool_t quoted;;
    354 
    355   current_quote = '\0';
    356   quoted = FALSE;
    357   p = command_line;
    358 
    359   if (!_dbus_string_init (&current_token))
    360     {
    361       _DBUS_SET_OOM (error);
    362       return NULL;
    363     }
    364 
    365   while (*p)
    366     {
    367       if (current_quote == '\\')
    368         {
    369           if (*p == '\n')
    370             {
    371               /* we append nothing; backslash-newline become nothing */
    372             }
    373           else
    374             {
    375 	      if (!_dbus_string_append_byte (&current_token, '\\') ||
    376 	          !_dbus_string_append_byte (&current_token, *p))
    377 		{
    378 		  _DBUS_SET_OOM (error);
    379 		  goto error;
    380 		}
    381             }
    382 
    383           current_quote = '\0';
    384         }
    385       else if (current_quote == '#')
    386         {
    387           /* Discard up to and including next newline */
    388           while (*p && *p != '\n')
    389             ++p;
    390 
    391           current_quote = '\0';
    392 
    393           if (*p == '\0')
    394             break;
    395         }
    396       else if (current_quote)
    397         {
    398           if (*p == current_quote &&
    399               /* check that it isn't an escaped double quote */
    400               !(current_quote == '"' && quoted))
    401             {
    402               /* close the quote */
    403               current_quote = '\0';
    404             }
    405 
    406           /* Everything inside quotes, and the close quote,
    407            * gets appended literally.
    408            */
    409 
    410           if (!_dbus_string_append_byte (&current_token, *p))
    411 	    {
    412 	      _DBUS_SET_OOM (error);
    413 	      goto error;
    414 	    }
    415         }
    416       else
    417         {
    418           switch (*p)
    419             {
    420             case '\n':
    421               if (!delimit_token (&current_token, &retval, error))
    422 		goto error;
    423 
    424 		_dbus_string_free (&current_token);
    425 
    426 		if (!_dbus_string_init (&current_token))
    427 		  {
    428 		    _DBUS_SET_OOM (error);
    429 		    goto init_error;
    430 		  }
    431 
    432               break;
    433 
    434             case ' ':
    435             case '\t':
    436               /* If the current token contains the previous char, delimit
    437                * the current token. A nonzero length
    438                * token should always contain the previous char.
    439                */
    440               if (_dbus_string_get_length (&current_token) > 0)
    441                 {
    442                   if (!delimit_token (&current_token, &retval, error))
    443 		    goto error;
    444 
    445 		  _dbus_string_free (&current_token);
    446 
    447 		  if (!_dbus_string_init (&current_token))
    448 		    {
    449 		      _DBUS_SET_OOM (error);
    450 		      goto init_error;
    451 		    }
    452 
    453                 }
    454 
    455               /* discard all unquoted blanks (don't add them to a token) */
    456               break;
    457 
    458 
    459               /* single/double quotes are appended to the token,
    460                * escapes are maybe appended next time through the loop,
    461                * comment chars are never appended.
    462                */
    463 
    464             case '\'':
    465             case '"':
    466               if (!_dbus_string_append_byte (&current_token, *p))
    467 		{
    468 		  _DBUS_SET_OOM (error);
    469 		  goto error;
    470 		}
    471 
    472               /* FALL THRU */
    473 
    474             case '#':
    475             case '\\':
    476               current_quote = *p;
    477               break;
    478 
    479             default:
    480               /* Combines rules 4) and 6) - if we have a token, append to it,
    481                * otherwise create a new token.
    482                */
    483               if (!_dbus_string_append_byte (&current_token, *p))
    484 		{
    485 		  _DBUS_SET_OOM (error);
    486 		  goto error;
    487 		}
    488               break;
    489             }
    490         }
    491 
    492       /* We need to count consecutive backslashes mod 2,
    493        * to detect escaped doublequotes.
    494        */
    495       if (*p != '\\')
    496 	quoted = FALSE;
    497       else
    498 	quoted = !quoted;
    499 
    500       ++p;
    501     }
    502 
    503   if (!delimit_token (&current_token, &retval, error))
    504     goto error;
    505 
    506   if (current_quote)
    507     {
    508       dbus_set_error_const (error, DBUS_ERROR_INVALID_ARGS, "Unclosed quotes in command line");
    509       goto error;
    510     }
    511 
    512   if (retval == NULL)
    513     {
    514       dbus_set_error_const (error, DBUS_ERROR_INVALID_ARGS, "No tokens found in command line");
    515       goto error;
    516     }
    517 
    518   _dbus_string_free (&current_token);
    519 
    520   return retval;
    521 
    522  error:
    523   _dbus_string_free (&current_token);
    524 
    525  init_error:
    526   if (retval)
    527     {
    528       _dbus_list_foreach (&retval, (DBusForeachFunction) dbus_free, NULL);
    529       _dbus_list_clear (&retval);
    530     }
    531 
    532   return NULL;
    533 }
    534 
    535 /**
    536  * _dbus_shell_parse_argv:
    537  *
    538  * Parses a command line into an argument vector, in much the same way
    539  * the shell would, but without many of the expansions the shell would
    540  * perform (variable expansion, globs, operators, filename expansion,
    541  * etc. are not supported). The results are defined to be the same as
    542  * those you would get from a UNIX98 /bin/sh, as long as the input
    543  * contains none of the unsupported shell expansions. If the input
    544  * does contain such expansions, they are passed through
    545  * literally. Free the returned vector with dbus_free_string_array().
    546  *
    547  * @command_line: command line to parse
    548  * @argcp: return location for number of args
    549  * @argvp: return location for array of args
    550  * @error: error information
    551  **/
    552 dbus_bool_t
    553 _dbus_shell_parse_argv (const char *command_line,
    554 			int        *argcp,
    555 			char     ***argvp,
    556 			DBusError  *error)
    557 {
    558   /* Code based on poptParseArgvString() from libpopt */
    559   int argc = 0;
    560   char **argv = NULL;
    561   DBusList *tokens = NULL;
    562   int i;
    563   DBusList *tmp_list;
    564 
    565   if (!command_line)
    566     {
    567       _dbus_verbose ("Command line is NULL\n");
    568       return FALSE;
    569     }
    570 
    571   tokens = tokenize_command_line (command_line, error);
    572   if (tokens == NULL)
    573     {
    574       _dbus_verbose ("No tokens for command line '%s'\n", command_line);
    575       return FALSE;
    576     }
    577 
    578   /* Because we can't have introduced any new blank space into the
    579    * tokens (we didn't do any new expansions), we don't need to
    580    * perform field splitting. If we were going to honor IFS or do any
    581    * expansions, we would have to do field splitting on each word
    582    * here. Also, if we were going to do any expansion we would need to
    583    * remove any zero-length words that didn't contain quotes
    584    * originally; but since there's no expansion we know all words have
    585    * nonzero length, unless they contain quotes.
    586    *
    587    * So, we simply remove quotes, and don't do any field splitting or
    588    * empty word removal, since we know there was no way to introduce
    589    * such things.
    590    */
    591 
    592   argc = _dbus_list_get_length (&tokens);
    593   argv = dbus_new (char *, argc + 1);
    594   if (!argv)
    595     {
    596       _DBUS_SET_OOM (error);
    597       goto error;
    598     }
    599 
    600   i = 0;
    601   tmp_list = tokens;
    602   while (tmp_list)
    603     {
    604       argv[i] = _dbus_shell_unquote (tmp_list->data);
    605 
    606       if (!argv[i])
    607         {
    608           int j;
    609 	  for (j = 0; j < i; j++)
    610 	    dbus_free(argv[j]);
    611 
    612           dbus_free (argv);
    613 	  _DBUS_SET_OOM (error);
    614 	  goto error;
    615         }
    616 
    617       tmp_list = _dbus_list_get_next_link (&tokens, tmp_list);
    618       ++i;
    619     }
    620   argv[argc] = NULL;
    621 
    622   _dbus_list_foreach (&tokens, (DBusForeachFunction) dbus_free, NULL);
    623   _dbus_list_clear (&tokens);
    624 
    625   if (argcp)
    626     *argcp = argc;
    627 
    628   if (argvp)
    629     *argvp = argv;
    630   else
    631     dbus_free_string_array (argv);
    632 
    633   return TRUE;
    634 
    635  error:
    636   _dbus_list_foreach (&tokens, (DBusForeachFunction) dbus_free, NULL);
    637   _dbus_list_clear (&tokens);
    638 
    639   return FALSE;
    640 
    641 }
    642