Home | History | Annotate | Download | only in lib
      1 /* quotearg.c - quote arguments for output
      2 
      3    Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006 Free
      4    Software Foundation, Inc.
      5 
      6    This program is free software; you can redistribute it and/or modify
      7    it under the terms of the GNU General Public License as published by
      8    the Free Software Foundation; either version 2, or (at your option)
      9    any later version.
     10 
     11    This program is distributed in the hope that it will be useful,
     12    but WITHOUT ANY WARRANTY; without even the implied warranty of
     13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     14    GNU General Public License for more details.
     15 
     16    You should have received a copy of the GNU General Public License
     17    along with this program; if not, write to the Free Software Foundation,
     18    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
     19 
     20 /* Written by Paul Eggert <eggert (at) twinsun.com> */
     21 
     22 #ifdef HAVE_CONFIG_H
     23 # include <config.h>
     24 #endif
     25 
     26 #include "quotearg.h"
     27 
     28 #include "xalloc.h"
     29 
     30 #include <ctype.h>
     31 #include <errno.h>
     32 #include <limits.h>
     33 #include <stdbool.h>
     34 #include <stdlib.h>
     35 #include <string.h>
     36 
     37 #include "gettext.h"
     38 #define _(msgid) gettext (msgid)
     39 #define N_(msgid) msgid
     40 
     41 #if HAVE_WCHAR_H
     42 
     43 /* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared.  */
     44 # include <stdio.h>
     45 # include <time.h>
     46 
     47 # include <wchar.h>
     48 #endif
     49 
     50 #if !HAVE_MBRTOWC
     51 /* Disable multibyte processing entirely.  Since MB_CUR_MAX is 1, the
     52    other macros are defined only for documentation and to satisfy C
     53    syntax.  */
     54 # undef MB_CUR_MAX
     55 # define MB_CUR_MAX 1
     56 # define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
     57 # define iswprint(wc) isprint ((unsigned char) (wc))
     58 # undef HAVE_MBSINIT
     59 #endif
     60 
     61 #if !defined mbsinit && !HAVE_MBSINIT
     62 # define mbsinit(ps) 1
     63 #endif
     64 
     65 #ifndef iswprint
     66 # if HAVE_WCTYPE_H
     67 #  include <wctype.h>
     68 # endif
     69 # if !defined iswprint && !HAVE_ISWPRINT
     70 #  define iswprint(wc) 1
     71 # endif
     72 #endif
     73 
     74 #ifndef SIZE_MAX
     75 # define SIZE_MAX ((size_t) -1)
     76 #endif
     77 
     78 #define INT_BITS (sizeof (int) * CHAR_BIT)
     79 
     80 struct quoting_options
     81 {
     82   /* Basic quoting style.  */
     83   enum quoting_style style;
     84 
     85   /* Quote the characters indicated by this bit vector even if the
     86      quoting style would not normally require them to be quoted.  */
     87   unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
     88 };
     89 
     90 /* Names of quoting styles.  */
     91 char const *const quoting_style_args[] =
     92 {
     93   "literal",
     94   "shell",
     95   "shell-always",
     96   "c",
     97   "escape",
     98   "locale",
     99   "clocale",
    100   0
    101 };
    102 
    103 /* Correspondences to quoting style names.  */
    104 enum quoting_style const quoting_style_vals[] =
    105 {
    106   literal_quoting_style,
    107   shell_quoting_style,
    108   shell_always_quoting_style,
    109   c_quoting_style,
    110   escape_quoting_style,
    111   locale_quoting_style,
    112   clocale_quoting_style
    113 };
    114 
    115 /* The default quoting options.  */
    116 static struct quoting_options default_quoting_options;
    117 
    118 /* Allocate a new set of quoting options, with contents initially identical
    119    to O if O is not null, or to the default if O is null.
    120    It is the caller's responsibility to free the result.  */
    121 struct quoting_options *
    122 clone_quoting_options (struct quoting_options *o)
    123 {
    124   int e = errno;
    125   struct quoting_options *p = xmalloc (sizeof *p);
    126   *p = *(o ? o : &default_quoting_options);
    127   errno = e;
    128   return p;
    129 }
    130 
    131 /* Get the value of O's quoting style.  If O is null, use the default.  */
    132 enum quoting_style
    133 get_quoting_style (struct quoting_options *o)
    134 {
    135   return (o ? o : &default_quoting_options)->style;
    136 }
    137 
    138 /* In O (or in the default if O is null),
    139    set the value of the quoting style to S.  */
    140 void
    141 set_quoting_style (struct quoting_options *o, enum quoting_style s)
    142 {
    143   (o ? o : &default_quoting_options)->style = s;
    144 }
    145 
    146 /* In O (or in the default if O is null),
    147    set the value of the quoting options for character C to I.
    148    Return the old value.  Currently, the only values defined for I are
    149    0 (the default) and 1 (which means to quote the character even if
    150    it would not otherwise be quoted).  */
    151 int
    152 set_char_quoting (struct quoting_options *o, char c, int i)
    153 {
    154   unsigned char uc = c;
    155   unsigned int *p =
    156     (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
    157   int shift = uc % INT_BITS;
    158   int r = (*p >> shift) & 1;
    159   *p ^= ((i & 1) ^ r) << shift;
    160   return r;
    161 }
    162 
    163 /* MSGID approximates a quotation mark.  Return its translation if it
    164    has one; otherwise, return either it or "\"", depending on S.  */
    165 static char const *
    166 gettext_quote (char const *msgid, enum quoting_style s)
    167 {
    168   char const *translation = _(msgid);
    169   if (translation == msgid && s == clocale_quoting_style)
    170     translation = "\"";
    171   return translation;
    172 }
    173 
    174 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
    175    argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
    176    non-quoting-style part of O to control quoting.
    177    Terminate the output with a null character, and return the written
    178    size of the output, not counting the terminating null.
    179    If BUFFERSIZE is too small to store the output string, return the
    180    value that would have been returned had BUFFERSIZE been large enough.
    181    If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
    182 
    183    This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
    184    ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
    185    style specified by O, and O may not be null.  */
    186 
    187 static size_t
    188 quotearg_buffer_restyled (char *buffer, size_t buffersize,
    189 			  char const *arg, size_t argsize,
    190 			  enum quoting_style quoting_style,
    191 			  struct quoting_options const *o)
    192 {
    193   size_t i;
    194   size_t len = 0;
    195   char const *quote_string = 0;
    196   size_t quote_string_len = 0;
    197   bool backslash_escapes = false;
    198   bool unibyte_locale = MB_CUR_MAX == 1;
    199 
    200 #define STORE(c) \
    201     do \
    202       { \
    203 	if (len < buffersize) \
    204 	  buffer[len] = (c); \
    205 	len++; \
    206       } \
    207     while (0)
    208 
    209   switch (quoting_style)
    210     {
    211     case c_quoting_style:
    212       STORE ('"');
    213       backslash_escapes = true;
    214       quote_string = "\"";
    215       quote_string_len = 1;
    216       break;
    217 
    218     case escape_quoting_style:
    219       backslash_escapes = true;
    220       break;
    221 
    222     case locale_quoting_style:
    223     case clocale_quoting_style:
    224       {
    225 	/* TRANSLATORS:
    226 	   Get translations for open and closing quotation marks.
    227 
    228 	   The message catalog should translate "`" to a left
    229 	   quotation mark suitable for the locale, and similarly for
    230 	   "'".  If the catalog has no translation,
    231 	   locale_quoting_style quotes `like this', and
    232 	   clocale_quoting_style quotes "like this".
    233 
    234 	   For example, an American English Unicode locale should
    235 	   translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
    236 	   should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
    237 	   MARK).  A British English Unicode locale should instead
    238 	   translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
    239 	   U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
    240 
    241 	   If you don't know what to put here, please see
    242 	   <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
    243 	   and use glyphs suitable for your language.  */
    244 
    245 	char const *left = gettext_quote (N_("`"), quoting_style);
    246 	char const *right = gettext_quote (N_("'"), quoting_style);
    247 	for (quote_string = left; *quote_string; quote_string++)
    248 	  STORE (*quote_string);
    249 	backslash_escapes = true;
    250 	quote_string = right;
    251 	quote_string_len = strlen (quote_string);
    252       }
    253       break;
    254 
    255     case shell_always_quoting_style:
    256       STORE ('\'');
    257       quote_string = "'";
    258       quote_string_len = 1;
    259       break;
    260 
    261     default:
    262       break;
    263     }
    264 
    265   for (i = 0;  ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize);  i++)
    266     {
    267       unsigned char c;
    268       unsigned char esc;
    269 
    270       if (backslash_escapes
    271 	  && quote_string_len
    272 	  && i + quote_string_len <= argsize
    273 	  && memcmp (arg + i, quote_string, quote_string_len) == 0)
    274 	STORE ('\\');
    275 
    276       c = arg[i];
    277       switch (c)
    278 	{
    279 	case '\0':
    280 	  if (backslash_escapes)
    281 	    {
    282 	      STORE ('\\');
    283 	      STORE ('0');
    284 	      STORE ('0');
    285 	      c = '0';
    286 	    }
    287 	  break;
    288 
    289 	case '?':
    290 	  switch (quoting_style)
    291 	    {
    292 	    case shell_quoting_style:
    293 	      goto use_shell_always_quoting_style;
    294 
    295 	    case c_quoting_style:
    296 	      if (i + 2 < argsize && arg[i + 1] == '?')
    297 		switch (arg[i + 2])
    298 		  {
    299 		  case '!': case '\'':
    300 		  case '(': case ')': case '-': case '/':
    301 		  case '<': case '=': case '>':
    302 		    /* Escape the second '?' in what would otherwise be
    303 		       a trigraph.  */
    304 		    c = arg[i + 2];
    305 		    i += 2;
    306 		    STORE ('?');
    307 		    STORE ('\\');
    308 		    STORE ('?');
    309 		    break;
    310 
    311 		  default:
    312 		    break;
    313 		  }
    314 	      break;
    315 
    316 	    default:
    317 	      break;
    318 	    }
    319 	  break;
    320 
    321 	case '\a': esc = 'a'; goto c_escape;
    322 	case '\b': esc = 'b'; goto c_escape;
    323 	case '\f': esc = 'f'; goto c_escape;
    324 	case '\n': esc = 'n'; goto c_and_shell_escape;
    325 	case '\r': esc = 'r'; goto c_and_shell_escape;
    326 	case '\t': esc = 't'; goto c_and_shell_escape;
    327 	case '\v': esc = 'v'; goto c_escape;
    328 	case '\\': esc = c; goto c_and_shell_escape;
    329 
    330 	c_and_shell_escape:
    331 	  if (quoting_style == shell_quoting_style)
    332 	    goto use_shell_always_quoting_style;
    333 	c_escape:
    334 	  if (backslash_escapes)
    335 	    {
    336 	      c = esc;
    337 	      goto store_escape;
    338 	    }
    339 	  break;
    340 
    341 	case '{': case '}': /* sometimes special if isolated */
    342 	  if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
    343 	    break;
    344 	  /* Fall through.  */
    345 	case '#': case '~':
    346 	  if (i != 0)
    347 	    break;
    348 	  /* Fall through.  */
    349 	case ' ':
    350 	case '!': /* special in bash */
    351 	case '"': case '$': case '&':
    352 	case '(': case ')': case '*': case ';':
    353 	case '<':
    354 	case '=': /* sometimes special in 0th or (with "set -k") later args */
    355 	case '>': case '[':
    356 	case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
    357 	case '`': case '|':
    358 	  /* A shell special character.  In theory, '$' and '`' could
    359 	     be the first bytes of multibyte characters, which means
    360 	     we should check them with mbrtowc, but in practice this
    361 	     doesn't happen so it's not worth worrying about.  */
    362 	  if (quoting_style == shell_quoting_style)
    363 	    goto use_shell_always_quoting_style;
    364 	  break;
    365 
    366 	case '\'':
    367 	  switch (quoting_style)
    368 	    {
    369 	    case shell_quoting_style:
    370 	      goto use_shell_always_quoting_style;
    371 
    372 	    case shell_always_quoting_style:
    373 	      STORE ('\'');
    374 	      STORE ('\\');
    375 	      STORE ('\'');
    376 	      break;
    377 
    378 	    default:
    379 	      break;
    380 	    }
    381 	  break;
    382 
    383 	case '%': case '+': case ',': case '-': case '.': case '/':
    384 	case '0': case '1': case '2': case '3': case '4': case '5':
    385 	case '6': case '7': case '8': case '9': case ':':
    386 	case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
    387 	case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
    388 	case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
    389 	case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
    390 	case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
    391 	case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
    392 	case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
    393 	case 'o': case 'p': case 'q': case 'r': case 's': case 't':
    394 	case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
    395 	  /* These characters don't cause problems, no matter what the
    396 	     quoting style is.  They cannot start multibyte sequences.  */
    397 	  break;
    398 
    399 	default:
    400 	  /* If we have a multibyte sequence, copy it until we reach
    401 	     its end, find an error, or come back to the initial shift
    402 	     state.  For C-like styles, if the sequence has
    403 	     unprintable characters, escape the whole sequence, since
    404 	     we can't easily escape single characters within it.  */
    405 	  {
    406 	    /* Length of multibyte sequence found so far.  */
    407 	    size_t m;
    408 
    409 	    bool printable;
    410 
    411 	    if (unibyte_locale)
    412 	      {
    413 		m = 1;
    414 		printable = isprint (c) != 0;
    415 	      }
    416 	    else
    417 	      {
    418 		mbstate_t mbstate;
    419 		memset (&mbstate, 0, sizeof mbstate);
    420 
    421 		m = 0;
    422 		printable = true;
    423 		if (argsize == SIZE_MAX)
    424 		  argsize = strlen (arg);
    425 
    426 		do
    427 		  {
    428 		    wchar_t w;
    429 		    size_t bytes = mbrtowc (&w, &arg[i + m],
    430 					    argsize - (i + m), &mbstate);
    431 		    if (bytes == 0)
    432 		      break;
    433 		    else if (bytes == (size_t) -1)
    434 		      {
    435 			printable = false;
    436 			break;
    437 		      }
    438 		    else if (bytes == (size_t) -2)
    439 		      {
    440 			printable = false;
    441 			while (i + m < argsize && arg[i + m])
    442 			  m++;
    443 			break;
    444 		      }
    445 		    else
    446 		      {
    447 			/* Work around a bug with older shells that "see" a '\'
    448 			   that is really the 2nd byte of a multibyte character.
    449 			   In practice the problem is limited to ASCII
    450 			   chars >= '@' that are shell special chars.  */
    451 			if ('[' == 0x5b && quoting_style == shell_quoting_style)
    452 			  {
    453 			    size_t j;
    454 			    for (j = 1; j < bytes; j++)
    455 			      switch (arg[i + m + j])
    456 				{
    457 				case '[': case '\\': case '^':
    458 				case '`': case '|':
    459 				  goto use_shell_always_quoting_style;
    460 
    461 				default:
    462 				  break;
    463 				}
    464 			  }
    465 
    466 			if (! iswprint (w))
    467 			  printable = false;
    468 			m += bytes;
    469 		      }
    470 		  }
    471 		while (! mbsinit (&mbstate));
    472 	      }
    473 
    474 	    if (1 < m || (backslash_escapes && ! printable))
    475 	      {
    476 		/* Output a multibyte sequence, or an escaped
    477 		   unprintable unibyte character.  */
    478 		size_t ilim = i + m;
    479 
    480 		for (;;)
    481 		  {
    482 		    if (backslash_escapes && ! printable)
    483 		      {
    484 			STORE ('\\');
    485 			STORE ('0' + (c >> 6));
    486 			STORE ('0' + ((c >> 3) & 7));
    487 			c = '0' + (c & 7);
    488 		      }
    489 		    if (ilim <= i + 1)
    490 		      break;
    491 		    STORE (c);
    492 		    c = arg[++i];
    493 		  }
    494 
    495 		goto store_c;
    496 	      }
    497 	  }
    498 	}
    499 
    500       if (! (backslash_escapes
    501 	     && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
    502 	goto store_c;
    503 
    504     store_escape:
    505       STORE ('\\');
    506 
    507     store_c:
    508       STORE (c);
    509     }
    510 
    511   if (i == 0 && quoting_style == shell_quoting_style)
    512     goto use_shell_always_quoting_style;
    513 
    514   if (quote_string)
    515     for (; *quote_string; quote_string++)
    516       STORE (*quote_string);
    517 
    518   if (len < buffersize)
    519     buffer[len] = '\0';
    520   return len;
    521 
    522  use_shell_always_quoting_style:
    523   return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
    524 				   shell_always_quoting_style, o);
    525 }
    526 
    527 /* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
    528    argument ARG (of size ARGSIZE), using O to control quoting.
    529    If O is null, use the default.
    530    Terminate the output with a null character, and return the written
    531    size of the output, not counting the terminating null.
    532    If BUFFERSIZE is too small to store the output string, return the
    533    value that would have been returned had BUFFERSIZE been large enough.
    534    If ARGSIZE is SIZE_MAX, use the string length of the argument for
    535    ARGSIZE.  */
    536 size_t
    537 quotearg_buffer (char *buffer, size_t buffersize,
    538 		 char const *arg, size_t argsize,
    539 		 struct quoting_options const *o)
    540 {
    541   struct quoting_options const *p = o ? o : &default_quoting_options;
    542   int e = errno;
    543   size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
    544 				       p->style, p);
    545   errno = e;
    546   return r;
    547 }
    548 
    549 /* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
    550    allocated storage containing the quoted string.  */
    551 char *
    552 quotearg_alloc (char const *arg, size_t argsize,
    553 		struct quoting_options const *o)
    554 {
    555   int e = errno;
    556   size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
    557   char *buf = xmalloc (bufsize);
    558   quotearg_buffer (buf, bufsize, arg, argsize, o);
    559   errno = e;
    560   return buf;
    561 }
    562 
    563 /* Use storage slot N to return a quoted version of argument ARG.
    564    ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
    565    null-terminated string.
    566    OPTIONS specifies the quoting options.
    567    The returned value points to static storage that can be
    568    reused by the next call to this function with the same value of N.
    569    N must be nonnegative.  N is deliberately declared with type "int"
    570    to allow for future extensions (using negative values).  */
    571 static char *
    572 quotearg_n_options (int n, char const *arg, size_t argsize,
    573 		    struct quoting_options const *options)
    574 {
    575   int e = errno;
    576 
    577   /* Preallocate a slot 0 buffer, so that the caller can always quote
    578      one small component of a "memory exhausted" message in slot 0.  */
    579   static char slot0[256];
    580   static unsigned int nslots = 1;
    581   unsigned int n0 = n;
    582   struct slotvec
    583     {
    584       size_t size;
    585       char *val;
    586     };
    587   static struct slotvec slotvec0 = {sizeof slot0, slot0};
    588   static struct slotvec *slotvec = &slotvec0;
    589 
    590   if (n < 0)
    591     abort ();
    592 
    593   if (nslots <= n0)
    594     {
    595       /* FIXME: technically, the type of n1 should be `unsigned int',
    596 	 but that evokes an unsuppressible warning from gcc-4.0.1 and
    597 	 older.  If gcc ever provides an option to suppress that warning,
    598 	 revert to the original type, so that the test in xalloc_oversized
    599 	 is once again performed only at compile time.  */
    600       size_t n1 = n0 + 1;
    601 
    602       if (xalloc_oversized (n1, sizeof *slotvec))
    603 	xalloc_die ();
    604 
    605       if (slotvec == &slotvec0)
    606 	{
    607 	  slotvec = xmalloc (sizeof *slotvec);
    608 	  *slotvec = slotvec0;
    609 	}
    610       slotvec = xrealloc (slotvec, n1 * sizeof *slotvec);
    611       memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
    612       nslots = n1;
    613     }
    614 
    615   {
    616     size_t size = slotvec[n].size;
    617     char *val = slotvec[n].val;
    618     size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
    619 
    620     if (size <= qsize)
    621       {
    622 	slotvec[n].size = size = qsize + 1;
    623 	if (val != slot0)
    624 	  free (val);
    625 	slotvec[n].val = val = xmalloc (size);
    626 	quotearg_buffer (val, size, arg, argsize, options);
    627       }
    628 
    629     errno = e;
    630     return val;
    631   }
    632 }
    633 
    634 char *
    635 quotearg_n (int n, char const *arg)
    636 {
    637   return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
    638 }
    639 
    640 char *
    641 quotearg (char const *arg)
    642 {
    643   return quotearg_n (0, arg);
    644 }
    645 
    646 /* Return quoting options for STYLE, with no extra quoting.  */
    647 static struct quoting_options
    648 quoting_options_from_style (enum quoting_style style)
    649 {
    650   struct quoting_options o;
    651   o.style = style;
    652   memset (o.quote_these_too, 0, sizeof o.quote_these_too);
    653   return o;
    654 }
    655 
    656 char *
    657 quotearg_n_style (int n, enum quoting_style s, char const *arg)
    658 {
    659   struct quoting_options const o = quoting_options_from_style (s);
    660   return quotearg_n_options (n, arg, SIZE_MAX, &o);
    661 }
    662 
    663 char *
    664 quotearg_n_style_mem (int n, enum quoting_style s,
    665 		      char const *arg, size_t argsize)
    666 {
    667   struct quoting_options const o = quoting_options_from_style (s);
    668   return quotearg_n_options (n, arg, argsize, &o);
    669 }
    670 
    671 char *
    672 quotearg_style (enum quoting_style s, char const *arg)
    673 {
    674   return quotearg_n_style (0, s, arg);
    675 }
    676 
    677 char *
    678 quotearg_char (char const *arg, char ch)
    679 {
    680   struct quoting_options options;
    681   options = default_quoting_options;
    682   set_char_quoting (&options, ch, 1);
    683   return quotearg_n_options (0, arg, SIZE_MAX, &options);
    684 }
    685 
    686 char *
    687 quotearg_colon (char const *arg)
    688 {
    689   return quotearg_char (arg, ':');
    690 }
    691