Home | History | Annotate | Download | only in binutils
      1 /* strings -- print the strings of printable characters in files
      2    Copyright (C) 1993-2016 Free Software Foundation, Inc.
      3 
      4    This program is free software; you can redistribute it and/or modify
      5    it under the terms of the GNU General Public License as published by
      6    the Free Software Foundation; either version 3, or (at your option)
      7    any later version.
      8 
      9    This program is distributed in the hope that it will be useful,
     10    but WITHOUT ANY WARRANTY; without even the implied warranty of
     11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12    GNU General Public License for more details.
     13 
     14    You should have received a copy of the GNU General Public License
     15    along with this program; if not, write to the Free Software
     16    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
     17    02110-1301, USA.  */
     18 
     19 /* Usage: strings [options] file...
     21 
     22    Options:
     23    --all
     24    -a
     25    -		Scan each file in its entirety.
     26 
     27    --data
     28    -d		Scan only the initialized data section(s) of object files.
     29 
     30    --print-file-name
     31    -f		Print the name of the file before each string.
     32 
     33    --bytes=min-len
     34    -n min-len
     35    -min-len	Print graphic char sequences, MIN-LEN or more bytes long,
     36 		that are followed by a NUL or a newline.  Default is 4.
     37 
     38    --radix={o,x,d}
     39    -t {o,x,d}	Print the offset within the file before each string,
     40 		in octal/hex/decimal.
     41 
     42   --include-all-whitespace
     43   -w		By default tab and space are the only whitepace included in graphic
     44 		char sequences.  This option considers all of isspace() valid.
     45 
     46    -o		Like -to.  (Some other implementations have -o like -to,
     47 		others like -td.  We chose one arbitrarily.)
     48 
     49    --encoding={s,S,b,l,B,L}
     50    -e {s,S,b,l,B,L}
     51 		Select character encoding: 7-bit-character, 8-bit-character,
     52 		bigendian 16-bit, littleendian 16-bit, bigendian 32-bit,
     53 		littleendian 32-bit.
     54 
     55    --target=BFDNAME
     56    -T {bfdname}
     57 		Specify a non-default object file format.
     58 
     59   --output-separator=sep_string
     60   -s sep_string	String used to separate parsed strings in output.
     61 		Default is newline.
     62 
     63    --help
     64    -h		Print the usage message on the standard output.
     65 
     66    --version
     67    -V
     68    -v		Print the program version number.
     69 
     70    Written by Richard Stallman <rms (at) gnu.ai.mit.edu>
     71    and David MacKenzie <djm (at) gnu.ai.mit.edu>.  */
     72 
     73 #include "sysdep.h"
     74 #include "bfd.h"
     75 #include "getopt.h"
     76 #include "libiberty.h"
     77 #include "safe-ctype.h"
     78 #include "bucomm.h"
     79 
     80 #define STRING_ISGRAPHIC(c) \
     81       (   (c) >= 0 \
     82        && (c) <= 255 \
     83        && ((c) == '\t' || ISPRINT (c) || (encoding == 'S' && (c) > 127) \
     84            || (include_all_whitespace == TRUE && ISSPACE (c))) \
     85       )
     86 
     87 #ifndef errno
     88 extern int errno;
     89 #endif
     90 
     91 /* The BFD section flags that identify an initialized data section.  */
     92 #define DATA_FLAGS (SEC_ALLOC | SEC_LOAD | SEC_HAS_CONTENTS)
     93 
     94 /* Radix for printing addresses (must be 8, 10 or 16).  */
     95 static int address_radix;
     96 
     97 /* Minimum length of sequence of graphic chars to trigger output.  */
     98 static int string_min;
     99 
    100 /* Whether or not we include all whitespace as a graphic char.   */
    101 static bfd_boolean include_all_whitespace;
    102 
    103 /* TRUE means print address within file for each string.  */
    104 static bfd_boolean print_addresses;
    105 
    106 /* TRUE means print filename for each string.  */
    107 static bfd_boolean print_filenames;
    108 
    109 /* TRUE means for object files scan only the data section.  */
    110 static bfd_boolean datasection_only;
    111 
    112 /* TRUE if we found an initialized data section in the current file.  */
    113 static bfd_boolean got_a_section;
    114 
    115 /* The BFD object file format.  */
    116 static char *target;
    117 
    118 /* The character encoding format.  */
    119 static char encoding;
    120 static int encoding_bytes;
    121 
    122 /* Output string used to separate parsed strings  */
    123 static char *output_separator;
    124 
    125 static struct option long_options[] =
    126 {
    127   {"all", no_argument, NULL, 'a'},
    128   {"data", no_argument, NULL, 'd'},
    129   {"print-file-name", no_argument, NULL, 'f'},
    130   {"bytes", required_argument, NULL, 'n'},
    131   {"radix", required_argument, NULL, 't'},
    132   {"include-all-whitespace", required_argument, NULL, 'w'},
    133   {"encoding", required_argument, NULL, 'e'},
    134   {"target", required_argument, NULL, 'T'},
    135   {"output-separator", required_argument, NULL, 's'},
    136   {"help", no_argument, NULL, 'h'},
    137   {"version", no_argument, NULL, 'v'},
    138   {NULL, 0, NULL, 0}
    139 };
    140 
    141 /* Records the size of a named file so that we
    142    do not repeatedly run bfd_stat() on it.  */
    143 
    144 typedef struct
    145 {
    146   const char *  filename;
    147   bfd_size_type filesize;
    148 } filename_and_size_t;
    149 
    150 static void strings_a_section (bfd *, asection *, void *);
    151 static bfd_boolean strings_object_file (const char *);
    152 static bfd_boolean strings_file (char *);
    153 static void print_strings (const char *, FILE *, file_ptr, int, int, char *);
    154 static void usage (FILE *, int);
    155 static long get_char (FILE *, file_ptr *, int *, char **);
    156 
    157 int main (int, char **);
    159 
    160 int
    161 main (int argc, char **argv)
    162 {
    163   int optc;
    164   int exit_status = 0;
    165   bfd_boolean files_given = FALSE;
    166   char *s;
    167   int numeric_opt = 0;
    168 
    169 #if defined (HAVE_SETLOCALE)
    170   setlocale (LC_ALL, "");
    171 #endif
    172   bindtextdomain (PACKAGE, LOCALEDIR);
    173   textdomain (PACKAGE);
    174 
    175   program_name = argv[0];
    176   xmalloc_set_program_name (program_name);
    177   bfd_set_error_program_name (program_name);
    178 
    179   expandargv (&argc, &argv);
    180 
    181   string_min = 4;
    182   include_all_whitespace = FALSE;
    183   print_addresses = FALSE;
    184   print_filenames = FALSE;
    185   if (DEFAULT_STRINGS_ALL)
    186     datasection_only = FALSE;
    187   else
    188     datasection_only = TRUE;
    189   target = NULL;
    190   encoding = 's';
    191   output_separator = NULL;
    192 
    193   while ((optc = getopt_long (argc, argv, "adfhHn:wot:e:T:s:Vv0123456789",
    194 			      long_options, (int *) 0)) != EOF)
    195     {
    196       switch (optc)
    197 	{
    198 	case 'a':
    199 	  datasection_only = FALSE;
    200 	  break;
    201 
    202 	case 'd':
    203 	  datasection_only = TRUE;
    204 	  break;
    205 
    206 	case 'f':
    207 	  print_filenames = TRUE;
    208 	  break;
    209 
    210 	case 'H':
    211 	case 'h':
    212 	  usage (stdout, 0);
    213 
    214 	case 'n':
    215 	  string_min = (int) strtoul (optarg, &s, 0);
    216 	  if (s != NULL && *s != 0)
    217 	    fatal (_("invalid integer argument %s"), optarg);
    218 	  break;
    219 
    220 	case 'w':
    221 	  include_all_whitespace = TRUE;
    222 	  break;
    223 
    224 	case 'o':
    225 	  print_addresses = TRUE;
    226 	  address_radix = 8;
    227 	  break;
    228 
    229 	case 't':
    230 	  print_addresses = TRUE;
    231 	  if (optarg[1] != '\0')
    232 	    usage (stderr, 1);
    233 	  switch (optarg[0])
    234 	    {
    235 	    case 'o':
    236 	      address_radix = 8;
    237 	      break;
    238 
    239 	    case 'd':
    240 	      address_radix = 10;
    241 	      break;
    242 
    243 	    case 'x':
    244 	      address_radix = 16;
    245 	      break;
    246 
    247 	    default:
    248 	      usage (stderr, 1);
    249 	    }
    250 	  break;
    251 
    252 	case 'T':
    253 	  target = optarg;
    254 	  break;
    255 
    256 	case 'e':
    257 	  if (optarg[1] != '\0')
    258 	    usage (stderr, 1);
    259 	  encoding = optarg[0];
    260 	  break;
    261 
    262 	case 's':
    263 	  output_separator = optarg;
    264           break;
    265 
    266 	case 'V':
    267 	case 'v':
    268 	  print_version ("strings");
    269 	  break;
    270 
    271 	case '?':
    272 	  usage (stderr, 1);
    273 
    274 	default:
    275 	  numeric_opt = optind;
    276 	  break;
    277 	}
    278     }
    279 
    280   if (numeric_opt != 0)
    281     {
    282       string_min = (int) strtoul (argv[numeric_opt - 1] + 1, &s, 0);
    283       if (s != NULL && *s != 0)
    284 	fatal (_("invalid integer argument %s"), argv[numeric_opt - 1] + 1);
    285     }
    286   if (string_min < 1)
    287     fatal (_("invalid minimum string length %d"), string_min);
    288 
    289   switch (encoding)
    290     {
    291     case 'S':
    292     case 's':
    293       encoding_bytes = 1;
    294       break;
    295     case 'b':
    296     case 'l':
    297       encoding_bytes = 2;
    298       break;
    299     case 'B':
    300     case 'L':
    301       encoding_bytes = 4;
    302       break;
    303     default:
    304       usage (stderr, 1);
    305     }
    306 
    307   bfd_init ();
    308   set_default_bfd_target ();
    309 
    310   if (optind >= argc)
    311     {
    312       datasection_only = FALSE;
    313       SET_BINARY (fileno (stdin));
    314       print_strings ("{standard input}", stdin, 0, 0, 0, (char *) NULL);
    315       files_given = TRUE;
    316     }
    317   else
    318     {
    319       for (; optind < argc; ++optind)
    320 	{
    321 	  if (strcmp (argv[optind], "-") == 0)
    322 	    datasection_only = FALSE;
    323 	  else
    324 	    {
    325 	      files_given = TRUE;
    326 	      exit_status |= strings_file (argv[optind]) == FALSE;
    327 	    }
    328 	}
    329     }
    330 
    331   if (!files_given)
    332     usage (stderr, 1);
    333 
    334   return (exit_status);
    335 }
    336 
    337 /* Scan section SECT of the file ABFD, whose printable name is in
    339    ARG->filename and whose size might be in ARG->filesize.  If it
    340    contains initialized data set `got_a_section' and print the
    341    strings in it.
    342 
    343    FIXME: We ought to be able to return error codes/messages for
    344    certain conditions.  */
    345 
    346 static void
    347 strings_a_section (bfd *abfd, asection *sect, void *arg)
    348 {
    349   filename_and_size_t * filename_and_sizep;
    350   bfd_size_type *filesizep;
    351   bfd_size_type sectsize;
    352   void *mem;
    353 
    354   if ((sect->flags & DATA_FLAGS) != DATA_FLAGS)
    355     return;
    356 
    357   sectsize = bfd_get_section_size (sect);
    358 
    359   if (sectsize <= 0)
    360     return;
    361 
    362   /* Get the size of the file.  This might have been cached for us.  */
    363   filename_and_sizep = (filename_and_size_t *) arg;
    364   filesizep = & filename_and_sizep->filesize;
    365 
    366   if (*filesizep == 0)
    367     {
    368       struct stat st;
    369 
    370       if (bfd_stat (abfd, &st))
    371 	return;
    372 
    373       /* Cache the result so that we do not repeatedly stat this file.  */
    374       *filesizep = st.st_size;
    375     }
    376 
    377   /* Compare the size of the section against the size of the file.
    378      If the section is bigger then the file must be corrupt and
    379      we should not try dumping it.  */
    380   if (sectsize >= *filesizep)
    381     return;
    382 
    383   mem = xmalloc (sectsize);
    384 
    385   if (bfd_get_section_contents (abfd, sect, mem, (file_ptr) 0, sectsize))
    386     {
    387       got_a_section = TRUE;
    388 
    389       print_strings (filename_and_sizep->filename, NULL, sect->filepos,
    390 		     0, sectsize, (char *) mem);
    391     }
    392 
    393   free (mem);
    394 }
    395 
    396 /* Scan all of the sections in FILE, and print the strings
    397    in the initialized data section(s).
    398 
    399    Return TRUE if successful,
    400    FALSE if not (such as if FILE is not an object file).  */
    401 
    402 static bfd_boolean
    403 strings_object_file (const char *file)
    404 {
    405   filename_and_size_t filename_and_size;
    406   bfd *abfd;
    407 
    408   abfd = bfd_openr (file, target);
    409 
    410   if (abfd == NULL)
    411     /* Treat the file as a non-object file.  */
    412     return FALSE;
    413 
    414   /* This call is mainly for its side effect of reading in the sections.
    415      We follow the traditional behavior of `strings' in that we don't
    416      complain if we don't recognize a file to be an object file.  */
    417   if (!bfd_check_format (abfd, bfd_object))
    418     {
    419       bfd_close (abfd);
    420       return FALSE;
    421     }
    422 
    423   got_a_section = FALSE;
    424   filename_and_size.filename = file;
    425   filename_and_size.filesize = 0;
    426   bfd_map_over_sections (abfd, strings_a_section, & filename_and_size);
    427 
    428   if (!bfd_close (abfd))
    429     {
    430       bfd_nonfatal (file);
    431       return FALSE;
    432     }
    433 
    434   return got_a_section;
    435 }
    436 
    437 /* Print the strings in FILE.  Return TRUE if ok, FALSE if an error occurs.  */
    438 
    439 static bfd_boolean
    440 strings_file (char *file)
    441 {
    442   struct stat st;
    443 
    444   /* get_file_size does not support non-S_ISREG files.  */
    445 
    446   if (stat (file, &st) < 0)
    447     {
    448       if (errno == ENOENT)
    449 	non_fatal (_("'%s': No such file"), file);
    450       else
    451 	non_fatal (_("Warning: could not locate '%s'.  reason: %s"),
    452 		   file, strerror (errno));
    453       return FALSE;
    454     }
    455 
    456   /* If we weren't told to scan the whole file,
    457      try to open it as an object file and only look at
    458      initialized data sections.  If that fails, fall back to the
    459      whole file.  */
    460   if (!datasection_only || !strings_object_file (file))
    461     {
    462       FILE *stream;
    463 
    464       stream = fopen (file, FOPEN_RB);
    465       if (stream == NULL)
    466 	{
    467 	  fprintf (stderr, "%s: ", program_name);
    468 	  perror (file);
    469 	  return FALSE;
    470 	}
    471 
    472       print_strings (file, stream, (file_ptr) 0, 0, 0, (char *) 0);
    473 
    474       if (fclose (stream) == EOF)
    475 	{
    476 	  fprintf (stderr, "%s: ", program_name);
    477 	  perror (file);
    478 	  return FALSE;
    479 	}
    480     }
    481 
    482   return TRUE;
    483 }
    484 
    485 /* Read the next character, return EOF if none available.
    487    Assume that STREAM is positioned so that the next byte read
    488    is at address ADDRESS in the file.
    489 
    490    If STREAM is NULL, do not read from it.
    491    The caller can supply a buffer of characters
    492    to be processed before the data in STREAM.
    493    MAGIC is the address of the buffer and
    494    MAGICCOUNT is how many characters are in it.  */
    495 
    496 static long
    497 get_char (FILE *stream, file_ptr *address, int *magiccount, char **magic)
    498 {
    499   int c, i;
    500   long r = 0;
    501 
    502   for (i = 0; i < encoding_bytes; i++)
    503     {
    504       if (*magiccount)
    505 	{
    506 	  (*magiccount)--;
    507 	  c = *(*magic)++;
    508 	}
    509       else
    510 	{
    511 	  if (stream == NULL)
    512 	    return EOF;
    513 
    514 	  /* Only use getc_unlocked if we found a declaration for it.
    515 	     Otherwise, libc is not thread safe by default, and we
    516 	     should not use it.  */
    517 
    518 #if defined(HAVE_GETC_UNLOCKED) && HAVE_DECL_GETC_UNLOCKED
    519 	  c = getc_unlocked (stream);
    520 #else
    521 	  c = getc (stream);
    522 #endif
    523 	  if (c == EOF)
    524 	    return EOF;
    525 	}
    526 
    527       (*address)++;
    528       r = (r << 8) | (c & 0xff);
    529     }
    530 
    531   switch (encoding)
    532     {
    533     default:
    534       break;
    535     case 'l':
    536       r = ((r & 0xff) << 8) | ((r & 0xff00) >> 8);
    537       break;
    538     case 'L':
    539       r = (((r & 0xff) << 24) | ((r & 0xff00) << 8)
    540 	   | ((r & 0xff0000) >> 8) | ((r & 0xff000000) >> 24));
    541       break;
    542     }
    543 
    544   return r;
    545 }
    546 
    547 /* Find the strings in file FILENAME, read from STREAM.
    549    Assume that STREAM is positioned so that the next byte read
    550    is at address ADDRESS in the file.
    551    Stop reading at address STOP_POINT in the file, if nonzero.
    552 
    553    If STREAM is NULL, do not read from it.
    554    The caller can supply a buffer of characters
    555    to be processed before the data in STREAM.
    556    MAGIC is the address of the buffer and
    557    MAGICCOUNT is how many characters are in it.
    558    Those characters come at address ADDRESS and the data in STREAM follow.  */
    559 
    560 static void
    561 print_strings (const char *filename, FILE *stream, file_ptr address,
    562 	       int stop_point, int magiccount, char *magic)
    563 {
    564   char *buf = (char *) xmalloc (sizeof (char) * (string_min + 1));
    565 
    566   while (1)
    567     {
    568       file_ptr start;
    569       int i;
    570       long c;
    571 
    572       /* See if the next `string_min' chars are all graphic chars.  */
    573     tryline:
    574       if (stop_point && address >= stop_point)
    575 	break;
    576       start = address;
    577       for (i = 0; i < string_min; i++)
    578 	{
    579 	  c = get_char (stream, &address, &magiccount, &magic);
    580 	  if (c == EOF)
    581 	    {
    582 	      free (buf);
    583 	      return;
    584 	    }
    585 	  if (! STRING_ISGRAPHIC (c))
    586 	    /* Found a non-graphic.  Try again starting with next char.  */
    587 	    goto tryline;
    588 	  buf[i] = c;
    589 	}
    590 
    591       /* We found a run of `string_min' graphic characters.  Print up
    592 	 to the next non-graphic character.  */
    593 
    594       if (print_filenames)
    595 	printf ("%s: ", filename);
    596       if (print_addresses)
    597 	switch (address_radix)
    598 	  {
    599 	  case 8:
    600 #ifdef HAVE_LONG_LONG
    601 	    if (sizeof (start) > sizeof (long))
    602 	      {
    603 # ifndef __MSVCRT__
    604 	        printf ("%7llo ", (unsigned long long) start);
    605 # else
    606 	        printf ("%7I64o ", (unsigned long long) start);
    607 # endif
    608 	      }
    609 	    else
    610 #elif !BFD_HOST_64BIT_LONG
    611 	    if (start != (unsigned long) start)
    612 	      printf ("++%7lo ", (unsigned long) start);
    613 	    else
    614 #endif
    615 	      printf ("%7lo ", (unsigned long) start);
    616 	    break;
    617 
    618 	  case 10:
    619 #ifdef HAVE_LONG_LONG
    620 	    if (sizeof (start) > sizeof (long))
    621 	      {
    622 # ifndef __MSVCRT__
    623 	        printf ("%7lld ", (unsigned long long) start);
    624 # else
    625 	        printf ("%7I64d ", (unsigned long long) start);
    626 # endif
    627 	      }
    628 	    else
    629 #elif !BFD_HOST_64BIT_LONG
    630 	    if (start != (unsigned long) start)
    631 	      printf ("++%7ld ", (unsigned long) start);
    632 	    else
    633 #endif
    634 	      printf ("%7ld ", (long) start);
    635 	    break;
    636 
    637 	  case 16:
    638 #ifdef HAVE_LONG_LONG
    639 	    if (sizeof (start) > sizeof (long))
    640 	      {
    641 # ifndef __MSVCRT__
    642 	        printf ("%7llx ", (unsigned long long) start);
    643 # else
    644 	        printf ("%7I64x ", (unsigned long long) start);
    645 # endif
    646 	      }
    647 	    else
    648 #elif !BFD_HOST_64BIT_LONG
    649 	    if (start != (unsigned long) start)
    650 	      printf ("%lx%8.8lx ", (unsigned long) (start >> 32),
    651 		      (unsigned long) (start & 0xffffffff));
    652 	    else
    653 #endif
    654 	      printf ("%7lx ", (unsigned long) start);
    655 	    break;
    656 	  }
    657 
    658       buf[i] = '\0';
    659       fputs (buf, stdout);
    660 
    661       while (1)
    662 	{
    663 	  c = get_char (stream, &address, &magiccount, &magic);
    664 	  if (c == EOF)
    665 	    break;
    666 	  if (! STRING_ISGRAPHIC (c))
    667 	    break;
    668 	  putchar (c);
    669 	}
    670 
    671       if (output_separator)
    672         fputs (output_separator, stdout);
    673       else
    674         putchar ('\n');
    675     }
    676   free (buf);
    677 }
    678 
    679 static void
    681 usage (FILE *stream, int status)
    682 {
    683   fprintf (stream, _("Usage: %s [option(s)] [file(s)]\n"), program_name);
    684   fprintf (stream, _(" Display printable strings in [file(s)] (stdin by default)\n"));
    685   fprintf (stream, _(" The options are:\n"));
    686 
    687   if (DEFAULT_STRINGS_ALL)
    688     fprintf (stream, _("\
    689   -a - --all                Scan the entire file, not just the data section [default]\n\
    690   -d --data                 Only scan the data sections in the file\n"));
    691   else
    692     fprintf (stream, _("\
    693   -a - --all                Scan the entire file, not just the data section\n\
    694   -d --data                 Only scan the data sections in the file [default]\n"));
    695 
    696   fprintf (stream, _("\
    697   -f --print-file-name      Print the name of the file before each string\n\
    698   -n --bytes=[number]       Locate & print any NUL-terminated sequence of at\n\
    699   -<number>                   least [number] characters (default 4).\n\
    700   -t --radix={o,d,x}        Print the location of the string in base 8, 10 or 16\n\
    701   -w --include-all-whitespace Include all whitespace as valid string characters\n\
    702   -o                        An alias for --radix=o\n\
    703   -T --target=<BFDNAME>     Specify the binary file format\n\
    704   -e --encoding={s,S,b,l,B,L} Select character size and endianness:\n\
    705                             s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit\n\
    706   -s --output-separator=<string> String used to separate strings in output.\n\
    707   @<file>                   Read options from <file>\n\
    708   -h --help                 Display this information\n\
    709   -v -V --version           Print the program's version number\n"));
    710   list_supported_targets (program_name, stream);
    711   if (REPORT_BUGS_TO[0] && status == 0)
    712     fprintf (stream, _("Report bugs to %s\n"), REPORT_BUGS_TO);
    713   exit (status);
    714 }
    715