Home | History | Annotate | Download | only in libdwfl
      1 /* Standard libdwfl callbacks for debugging the running Linux kernel.
      2    Copyright (C) 2005-2011, 2013, 2014, 2015 Red Hat, Inc.
      3    This file is part of elfutils.
      4 
      5    This file is free software; you can redistribute it and/or modify
      6    it under the terms of either
      7 
      8      * the GNU Lesser General Public License as published by the Free
      9        Software Foundation; either version 3 of the License, or (at
     10        your option) any later version
     11 
     12    or
     13 
     14      * the GNU General Public License as published by the Free
     15        Software Foundation; either version 2 of the License, or (at
     16        your option) any later version
     17 
     18    or both in parallel, as here.
     19 
     20    elfutils is distributed in the hope that it will be useful, but
     21    WITHOUT ANY WARRANTY; without even the implied warranty of
     22    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
     23    General Public License for more details.
     24 
     25    You should have received copies of the GNU General Public License and
     26    the GNU Lesser General Public License along with this program.  If
     27    not, see <http://www.gnu.org/licenses/>.  */
     28 
     29 /* We include this before config.h because it can't handle _FILE_OFFSET_BITS.
     30    Everything we need here is fine if its declarations just come first.  */
     31 /* Some makefiles, e.g. HOST_linux-x86.mk, predefine _FILE_OFFSET_BITS.  */
     32 #undef _FILE_OFFSET_BITS
     33 #include <fts.h>
     34 
     35 #include <config.h>
     36 
     37 #include "libdwflP.h"
     38 #include <inttypes.h>
     39 #include <errno.h>
     40 #include <stdio.h>
     41 #include <stdio_ext.h>
     42 #include <string.h>
     43 #include <stdlib.h>
     44 #include <sys/utsname.h>
     45 #include <fcntl.h>
     46 #include <unistd.h>
     47 
     48 /* Since fts.h is included before config.h, its indirect inclusions may not
     49    give us the right LFS aliases of these functions, so map them manually.  */
     50 #ifdef _FILE_OFFSET_BITS
     51 #define open open64
     52 #define fopen fopen64
     53 #endif
     54 
     55 
     56 #define KERNEL_MODNAME	"kernel"
     57 
     58 #define MODULEDIRFMT	"/lib/modules/%s"
     59 
     60 #define KNOTESFILE	"/sys/kernel/notes"
     61 #define	MODNOTESFMT	"/sys/module/%s/notes"
     62 #define KSYMSFILE	"/proc/kallsyms"
     63 #define MODULELIST	"/proc/modules"
     64 #define	SECADDRDIRFMT	"/sys/module/%s/sections/"
     65 #define MODULE_SECT_NAME_LEN 32	/* Minimum any linux/module.h has had.  */
     66 
     67 
     68 static const char *vmlinux_suffixes[] =
     69   {
     70     ".gz",
     71 #ifdef USE_BZLIB
     72     ".bz2",
     73 #endif
     74 #ifdef USE_LZMA
     75     ".xz",
     76 #endif
     77   };
     78 
     79 /* Try to open the given file as it is or under the debuginfo directory.  */
     80 static int
     81 try_kernel_name (Dwfl *dwfl, char **fname, bool try_debug)
     82 {
     83   if (*fname == NULL)
     84     return -1;
     85 
     86   /* Don't bother trying *FNAME itself here if the path will cause it to be
     87      tried because we give its own basename as DEBUGLINK_FILE.  */
     88   int fd = ((((dwfl->callbacks->debuginfo_path
     89 	       ? *dwfl->callbacks->debuginfo_path : NULL)
     90 	      ?: DEFAULT_DEBUGINFO_PATH)[0] == ':') ? -1
     91 	    : TEMP_FAILURE_RETRY (open (*fname, O_RDONLY)));
     92 
     93   if (fd < 0)
     94     {
     95       Dwfl_Module fakemod = { .dwfl = dwfl };
     96       /* First try the file's unadorned basename as DEBUGLINK_FILE,
     97 	 to look for "vmlinux" files.  */
     98       fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
     99 						 *fname, basename (*fname), 0,
    100 						 &fakemod.debug.name);
    101       if (fd < 0 && try_debug)
    102 	/* Next, let the call use the default of basename + ".debug",
    103 	   to look for "vmlinux.debug" files.  */
    104 	fd = INTUSE(dwfl_standard_find_debuginfo) (&fakemod, NULL, NULL, 0,
    105 						   *fname, NULL, 0,
    106 						   &fakemod.debug.name);
    107       if (fakemod.debug.name != NULL)
    108 	{
    109 	  free (*fname);
    110 	  *fname = fakemod.debug.name;
    111 	}
    112     }
    113 
    114   if (fd < 0)
    115     for (size_t i = 0;
    116 	 i < sizeof vmlinux_suffixes / sizeof vmlinux_suffixes[0];
    117 	 ++i)
    118       {
    119 	char *zname;
    120 	if (asprintf (&zname, "%s%s", *fname, vmlinux_suffixes[i]) > 0)
    121 	  {
    122 	    fd = TEMP_FAILURE_RETRY (open (zname, O_RDONLY));
    123 	    if (fd < 0)
    124 	      free (zname);
    125 	    else
    126 	      {
    127 		free (*fname);
    128 		*fname = zname;
    129 	      }
    130 	  }
    131       }
    132 
    133   if (fd < 0)
    134     {
    135       free (*fname);
    136       *fname = NULL;
    137     }
    138 
    139   return fd;
    140 }
    141 
    142 static inline const char *
    143 kernel_release (void)
    144 {
    145   /* Cache the `uname -r` string we'll use.  */
    146   static struct utsname utsname;
    147   if (utsname.release[0] == '\0' && uname (&utsname) != 0)
    148     return NULL;
    149   return utsname.release;
    150 }
    151 
    152 static int
    153 find_kernel_elf (Dwfl *dwfl, const char *release, char **fname)
    154 {
    155   if ((release[0] == '/'
    156        ? asprintf (fname, "%s/vmlinux", release)
    157        : asprintf (fname, "/boot/vmlinux-%s", release)) < 0)
    158     return -1;
    159 
    160   int fd = try_kernel_name (dwfl, fname, true);
    161   if (fd < 0 && release[0] != '/')
    162     {
    163       free (*fname);
    164       if (asprintf (fname, MODULEDIRFMT "/vmlinux", release) < 0)
    165 	return -1;
    166       fd = try_kernel_name (dwfl, fname, true);
    167     }
    168 
    169   return fd;
    170 }
    171 
    172 static int
    173 get_release (Dwfl *dwfl, const char **release)
    174 {
    175   if (dwfl == NULL)
    176     return -1;
    177 
    178   const char *release_string = release == NULL ? NULL : *release;
    179   if (release_string == NULL)
    180     {
    181       release_string = kernel_release ();
    182       if (release_string == NULL)
    183 	return errno;
    184       if (release != NULL)
    185 	*release = release_string;
    186     }
    187 
    188   return 0;
    189 }
    190 
    191 static int
    192 report_kernel (Dwfl *dwfl, const char **release,
    193 	       int (*predicate) (const char *module, const char *file))
    194 {
    195   int result = get_release (dwfl, release);
    196   if (unlikely (result != 0))
    197     return result;
    198 
    199   char *fname;
    200   int fd = find_kernel_elf (dwfl, *release, &fname);
    201 
    202   if (fd < 0)
    203     result = ((predicate != NULL && !(*predicate) (KERNEL_MODNAME, NULL))
    204 	      ? 0 : errno ?: ENOENT);
    205   else
    206     {
    207       bool report = true;
    208 
    209       if (predicate != NULL)
    210 	{
    211 	  /* Let the predicate decide whether to use this one.  */
    212 	  int want = (*predicate) (KERNEL_MODNAME, fname);
    213 	  if (want < 0)
    214 	    result = errno;
    215 	  report = want > 0;
    216 	}
    217 
    218       if (report)
    219 	{
    220 	  /* Note that on some architectures (e.g. x86_64) the vmlinux
    221 	     is ET_EXEC, while on others (e.g. ppc64) it is ET_DYN.
    222 	     In both cases the phdr p_vaddr load address will be non-zero.
    223 	     We want the image to be placed as if it was ET_DYN, so
    224 	     pass true for add_p_vaddr which will do the right thing
    225 	     (in combination with a zero base) in either case.  */
    226 	  Dwfl_Module *mod = INTUSE(dwfl_report_elf) (dwfl, KERNEL_MODNAME,
    227 						      fname, fd, 0, true);
    228 	  if (mod == NULL)
    229 	    result = -1;
    230 	  else
    231 	    /* The kernel is ET_EXEC, but always treat it as relocatable.  */
    232 	    mod->e_type = ET_DYN;
    233 	}
    234 
    235       free (fname);
    236 
    237       if (!report || result < 0)
    238 	close (fd);
    239     }
    240 
    241   return result;
    242 }
    243 
    244 /* Look for a kernel debug archive.  If we find one, report all its modules.
    245    If not, return ENOENT.  */
    246 static int
    247 report_kernel_archive (Dwfl *dwfl, const char **release,
    248 		       int (*predicate) (const char *module, const char *file))
    249 {
    250   int result = get_release (dwfl, release);
    251   if (unlikely (result != 0))
    252     return result;
    253 
    254   char *archive;
    255   int res = (((*release)[0] == '/')
    256 	     ? asprintf (&archive, "%s/debug.a", *release)
    257 	     : asprintf (&archive, MODULEDIRFMT "/debug.a", *release));
    258   if (unlikely (res < 0))
    259     return ENOMEM;
    260 
    261   int fd = try_kernel_name (dwfl, &archive, false);
    262   if (fd < 0)
    263     result = errno ?: ENOENT;
    264   else
    265     {
    266       /* We have the archive file open!  */
    267       Dwfl_Module *last = __libdwfl_report_offline (dwfl, NULL, archive, fd,
    268 						    true, predicate);
    269       if (unlikely (last == NULL))
    270 	result = -1;
    271       else
    272 	{
    273 	  /* Find the kernel and move it to the head of the list.  */
    274 	  Dwfl_Module **tailp = &dwfl->modulelist, **prevp = tailp;
    275 	  for (Dwfl_Module *m = *prevp; m != NULL; m = *(prevp = &m->next))
    276 	    if (!m->gc && m->e_type != ET_REL && !strcmp (m->name, "kernel"))
    277 	      {
    278 		*prevp = m->next;
    279 		m->next = *tailp;
    280 		*tailp = m;
    281 		break;
    282 	      }
    283 	}
    284     }
    285 
    286   free (archive);
    287   return result;
    288 }
    289 
    290 static size_t
    291 check_suffix (const FTSENT *f, size_t namelen)
    292 {
    293 #define TRY(sfx)							\
    294   if ((namelen ? f->fts_namelen == namelen + sizeof sfx - 1		\
    295        : f->fts_namelen >= sizeof sfx)					\
    296       && !memcmp (f->fts_name + f->fts_namelen - (sizeof sfx - 1),	\
    297 		  sfx, sizeof sfx))					\
    298     return sizeof sfx - 1
    299 
    300   TRY (".ko");
    301   TRY (".ko.gz");
    302 #if USE_BZLIB
    303   TRY (".ko.bz2");
    304 #endif
    305 #if USE_LZMA
    306   TRY (".ko.xz");
    307 #endif
    308 
    309   return 0;
    310 
    311 #undef	TRY
    312 }
    313 
    314 /* Report a kernel and all its modules found on disk, for offline use.
    315    If RELEASE starts with '/', it names a directory to look in;
    316    if not, it names a directory to find under /lib/modules/;
    317    if null, /lib/modules/`uname -r` is used.
    318    Returns zero on success, -1 if dwfl_report_module failed,
    319    or an errno code if finding the files on disk failed.  */
    320 int
    321 dwfl_linux_kernel_report_offline (Dwfl *dwfl, const char *release,
    322 				  int (*predicate) (const char *module,
    323 						    const char *file))
    324 {
    325   int result = report_kernel_archive (dwfl, &release, predicate);
    326   if (result != ENOENT)
    327     return result;
    328 
    329   /* First report the kernel.  */
    330   result = report_kernel (dwfl, &release, predicate);
    331   if (result == 0)
    332     {
    333       /* Do "find /lib/modules/RELEASE -name *.ko".  */
    334 
    335       char *modulesdir[] = { NULL, NULL };
    336       if (release[0] == '/')
    337 	modulesdir[0] = (char *) release;
    338       else
    339 	{
    340 	  if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
    341 	    return errno;
    342 	}
    343 
    344       FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
    345       if (modulesdir[0] == (char *) release)
    346 	modulesdir[0] = NULL;
    347       if (fts == NULL)
    348 	{
    349 	  free (modulesdir[0]);
    350 	  return errno;
    351 	}
    352 
    353       FTSENT *f;
    354       while ((f = fts_read (fts)) != NULL)
    355 	{
    356 	  /* Skip a "source" subtree, which tends to be large.
    357 	     This insane hard-coding of names is what depmod does too.  */
    358 	  if (f->fts_namelen == sizeof "source" - 1
    359 	      && !strcmp (f->fts_name, "source"))
    360 	    {
    361 	      fts_set (fts, f, FTS_SKIP);
    362 	      continue;
    363 	    }
    364 
    365 	  switch (f->fts_info)
    366 	    {
    367 	    case FTS_F:
    368 	    case FTS_SL:
    369 	    case FTS_NSOK:;
    370 	      /* See if this file name matches "*.ko".  */
    371 	      const size_t suffix = check_suffix (f, 0);
    372 	      if (suffix)
    373 		{
    374 		  /* We have a .ko file to report.  Following the algorithm
    375 		     by which the kernel makefiles set KBUILD_MODNAME, we
    376 		     replace all ',' or '-' with '_' in the file name and
    377 		     call that the module name.  Modules could well be
    378 		     built using different embedded names than their file
    379 		     names.  To handle that, we would have to look at the
    380 		     __this_module.name contents in the module's text.  */
    381 
    382 		  char *name = strndup (f->fts_name, f->fts_namelen - suffix);
    383 		  if (unlikely (name == NULL))
    384 		    {
    385 		      __libdwfl_seterrno (DWFL_E_NOMEM);
    386 		      result = -1;
    387 		      break;
    388 		    }
    389 		  for (size_t i = 0; i < f->fts_namelen - suffix; ++i)
    390 		    if (name[i] == '-' || name[i] == ',')
    391 		      name[i] = '_';
    392 
    393 		  if (predicate != NULL)
    394 		    {
    395 		      /* Let the predicate decide whether to use this one.  */
    396 		      int want = (*predicate) (name, f->fts_path);
    397 		      if (want < 0)
    398 			{
    399 			  result = -1;
    400 			  free (name);
    401 			  break;
    402 			}
    403 		      if (!want)
    404 			{
    405 			  free (name);
    406 			  continue;
    407 			}
    408 		    }
    409 
    410 		  if (dwfl_report_offline (dwfl, name, f->fts_path, -1) == NULL)
    411 		    {
    412 		      free (name);
    413 		      result = -1;
    414 		      break;
    415 		    }
    416 		  free (name);
    417 		}
    418 	      continue;
    419 
    420 	    case FTS_ERR:
    421 	    case FTS_DNR:
    422 	    case FTS_NS:
    423 	      result = f->fts_errno;
    424 	      break;
    425 
    426 	    case FTS_SLNONE:
    427 	    default:
    428 	      continue;
    429 	    }
    430 
    431 	  /* We only get here in error cases.  */
    432 	  break;
    433 	}
    434       fts_close (fts);
    435       free (modulesdir[0]);
    436     }
    437 
    438   return result;
    439 }
    440 INTDEF (dwfl_linux_kernel_report_offline)
    441 
    442 
    443 /* State of read_address used by intuit_kernel_bounds. */
    444 struct read_address_state {
    445   FILE *f;
    446   char *line;
    447   size_t linesz;
    448   size_t n;
    449   char *p;
    450   const char *type;
    451 };
    452 
    453 static inline bool
    454 read_address (struct read_address_state *state, Dwarf_Addr *addr)
    455 {
    456   if ((state->n = getline (&state->line, &state->linesz, state->f)) < 1 ||
    457       state->line[state->n - 2] == ']')
    458     return false;
    459   *addr = strtoull (state->line, &state->p, 16);
    460   state->p += strspn (state->p, " \t");
    461   state->type = strsep (&state->p, " \t\n");
    462   if (state->type == NULL)
    463     return false;
    464   return state->p != NULL && state->p != state->line;
    465 }
    466 
    467 
    468 /* Grovel around to guess the bounds of the runtime kernel image.  */
    469 static int
    470 intuit_kernel_bounds (Dwarf_Addr *start, Dwarf_Addr *end, Dwarf_Addr *notes)
    471 {
    472   struct read_address_state state = { NULL, NULL, 0, 0, NULL, NULL };
    473 
    474   state.f = fopen (KSYMSFILE, "r");
    475   if (state.f == NULL)
    476     return errno;
    477 
    478   (void) __fsetlocking (state.f, FSETLOCKING_BYCALLER);
    479 
    480   *notes = 0;
    481 
    482   int result;
    483   do
    484     result = read_address (&state, start) ? 0 : -1;
    485   while (result == 0 && strchr ("TtRr", *state.type) == NULL);
    486 
    487   if (result == 0)
    488     {
    489       *end = *start;
    490       while (read_address (&state, end))
    491 	if (*notes == 0 && !strcmp (state.p, "__start_notes\n"))
    492 	  *notes = *end;
    493 
    494       Dwarf_Addr round_kernel = sysconf (_SC_PAGE_SIZE);
    495       *start &= -(Dwarf_Addr) round_kernel;
    496       *end += round_kernel - 1;
    497       *end &= -(Dwarf_Addr) round_kernel;
    498       if (*start >= *end || *end - *start < round_kernel)
    499 	result = -1;
    500     }
    501   free (state.line);
    502 
    503   if (result == -1)
    504     result = ferror_unlocked (state.f) ? errno : ENOEXEC;
    505 
    506   fclose (state.f);
    507 
    508   return result;
    509 }
    510 
    511 
    512 /* Look for a build ID note in NOTESFILE and associate the ID with MOD.  */
    513 static int
    514 check_notes (Dwfl_Module *mod, const char *notesfile,
    515 	     Dwarf_Addr vaddr, const char *secname)
    516 {
    517   int fd = open (notesfile, O_RDONLY);
    518   if (fd < 0)
    519     return 1;
    520 
    521   assert (sizeof (Elf32_Nhdr) == sizeof (GElf_Nhdr));
    522   assert (sizeof (Elf64_Nhdr) == sizeof (GElf_Nhdr));
    523   union
    524   {
    525     GElf_Nhdr nhdr;
    526     unsigned char data[8192];
    527   } buf;
    528 
    529   ssize_t n = read (fd, buf.data, sizeof buf);
    530   close (fd);
    531 
    532   if (n <= 0)
    533     return 1;
    534 
    535   unsigned char *p = buf.data;
    536   while (p < &buf.data[n])
    537     {
    538       /* No translation required since we are reading the native kernel.  */
    539       GElf_Nhdr *nhdr = (void *) p;
    540       p += sizeof *nhdr;
    541       unsigned char *name = p;
    542       p += (nhdr->n_namesz + 3) & -4U;
    543       unsigned char *bits = p;
    544       p += (nhdr->n_descsz + 3) & -4U;
    545 
    546       if (p <= &buf.data[n]
    547 	  && nhdr->n_type == NT_GNU_BUILD_ID
    548 	  && nhdr->n_namesz == sizeof "GNU"
    549 	  && !memcmp (name, "GNU", sizeof "GNU"))
    550 	{
    551 	  /* Found it.  For a module we must figure out its VADDR now.  */
    552 
    553 	  if (secname != NULL
    554 	      && (INTUSE(dwfl_linux_kernel_module_section_address)
    555 		  (mod, NULL, mod->name, 0, secname, 0, NULL, &vaddr) != 0
    556 		  || vaddr == (GElf_Addr) -1l))
    557 	    vaddr = 0;
    558 
    559 	  if (vaddr != 0)
    560 	    vaddr += bits - buf.data;
    561 	  return INTUSE(dwfl_module_report_build_id) (mod, bits,
    562 						      nhdr->n_descsz, vaddr);
    563 	}
    564     }
    565 
    566   return 0;
    567 }
    568 
    569 /* Look for a build ID for the kernel.  */
    570 static int
    571 check_kernel_notes (Dwfl_Module *kernelmod, GElf_Addr vaddr)
    572 {
    573   return check_notes (kernelmod, KNOTESFILE, vaddr, NULL) < 0 ? -1 : 0;
    574 }
    575 
    576 /* Look for a build ID for a loaded kernel module.  */
    577 static int
    578 check_module_notes (Dwfl_Module *mod)
    579 {
    580   char *dirs[2] = { NULL, NULL };
    581   if (asprintf (&dirs[0], MODNOTESFMT, mod->name) < 0)
    582     return ENOMEM;
    583 
    584   FTS *fts = fts_open (dirs, FTS_NOSTAT | FTS_LOGICAL, NULL);
    585   if (fts == NULL)
    586     {
    587       free (dirs[0]);
    588       return 0;
    589     }
    590 
    591   int result = 0;
    592   FTSENT *f;
    593   while ((f = fts_read (fts)) != NULL)
    594     {
    595       switch (f->fts_info)
    596 	{
    597 	case FTS_F:
    598 	case FTS_SL:
    599 	case FTS_NSOK:
    600 	  result = check_notes (mod, f->fts_accpath, 0, f->fts_name);
    601 	  if (result > 0)	/* Nothing found.  */
    602 	    {
    603 	      result = 0;
    604 	      continue;
    605 	    }
    606 	  break;
    607 
    608 	case FTS_ERR:
    609 	case FTS_DNR:
    610 	  result = f->fts_errno;
    611 	  break;
    612 
    613 	case FTS_NS:
    614 	case FTS_SLNONE:
    615 	default:
    616 	  continue;
    617 	}
    618 
    619       /* We only get here when finished or in error cases.  */
    620       break;
    621     }
    622   fts_close (fts);
    623   free (dirs[0]);
    624 
    625   return result;
    626 }
    627 
    628 int
    629 dwfl_linux_kernel_report_kernel (Dwfl *dwfl)
    630 {
    631   Dwarf_Addr start = 0;
    632   Dwarf_Addr end = 0;
    633 
    634   #define report() \
    635     (INTUSE(dwfl_report_module) (dwfl, KERNEL_MODNAME, start, end))
    636 
    637   /* This is a bit of a kludge.  If we already reported the kernel,
    638      don't bother figuring it out again--it never changes.  */
    639   for (Dwfl_Module *m = dwfl->modulelist; m != NULL; m = m->next)
    640     if (!strcmp (m->name, KERNEL_MODNAME))
    641       {
    642 	start = m->low_addr;
    643 	end = m->high_addr;
    644 	return report () == NULL ? -1 : 0;
    645       }
    646 
    647   /* Try to figure out the bounds of the kernel image without
    648      looking for any vmlinux file.  */
    649   Dwarf_Addr notes;
    650   /* The compiler cannot deduce that if intuit_kernel_bounds returns
    651      zero NOTES will be initialized.  Fake the initialization.  */
    652   asm ("" : "=m" (notes));
    653   int result = intuit_kernel_bounds (&start, &end, &notes);
    654   if (result == 0)
    655     {
    656       Dwfl_Module *mod = report ();
    657       return unlikely (mod == NULL) ? -1 : check_kernel_notes (mod, notes);
    658     }
    659   if (result != ENOENT)
    660     return result;
    661 
    662   /* Find the ELF file for the running kernel and dwfl_report_elf it.  */
    663   return report_kernel (dwfl, NULL, NULL);
    664 }
    665 INTDEF (dwfl_linux_kernel_report_kernel)
    666 
    667 
    668 static inline bool
    669 subst_name (char from, char to,
    670             const char * const module_name,
    671             char * const alternate_name,
    672             const size_t namelen)
    673 {
    674   const char *n = memchr (module_name, from, namelen);
    675   if (n == NULL)
    676     return false;
    677   char *a = mempcpy (alternate_name, module_name, n - module_name);
    678   *a++ = to;
    679   ++n;
    680   const char *p;
    681   while ((p = memchr (n, from, namelen - (n - module_name))) != NULL)
    682     {
    683       a = mempcpy (a, n, p - n);
    684       *a++ = to;
    685       n = p + 1;
    686     }
    687   memcpy (a, n, namelen - (n - module_name) + 1);
    688   return true;
    689 }
    690 
    691 /* Dwfl_Callbacks.find_elf for the running Linux kernel and its modules.  */
    692 
    693 int
    694 dwfl_linux_kernel_find_elf (Dwfl_Module *mod,
    695 			    void **userdata __attribute__ ((unused)),
    696 			    const char *module_name,
    697 			    Dwarf_Addr base __attribute__ ((unused)),
    698 			    char **file_name, Elf **elfp)
    699 {
    700   if (mod->build_id_len > 0)
    701     {
    702       int fd = INTUSE(dwfl_build_id_find_elf) (mod, NULL, NULL, 0,
    703 					       file_name, elfp);
    704       if (fd >= 0 || mod->main.elf != NULL || errno != 0)
    705 	return fd;
    706     }
    707 
    708   const char *release = kernel_release ();
    709   if (release == NULL)
    710     return errno;
    711 
    712   if (!strcmp (module_name, KERNEL_MODNAME))
    713     return find_kernel_elf (mod->dwfl, release, file_name);
    714 
    715   /* Do "find /lib/modules/`uname -r` -name MODULE_NAME.ko".  */
    716 
    717   char *modulesdir[] = { NULL, NULL };
    718   if (asprintf (&modulesdir[0], MODULEDIRFMT, release) < 0)
    719     return -1;
    720 
    721   FTS *fts = fts_open (modulesdir, FTS_NOSTAT | FTS_LOGICAL, NULL);
    722   if (fts == NULL)
    723     {
    724       free (modulesdir[0]);
    725       return -1;
    726     }
    727 
    728   size_t namelen = strlen (module_name);
    729 
    730   /* This is a kludge.  There is no actual necessary relationship between
    731      the name of the .ko file installed and the module name the kernel
    732      knows it by when it's loaded.  The kernel's only idea of the module
    733      name comes from the name embedded in the object's magic
    734      .gnu.linkonce.this_module section.
    735 
    736      In practice, these module names match the .ko file names except for
    737      some using '_' and some using '-'.  So our cheap kludge is to look for
    738      two files when either a '_' or '-' appears in a module name, one using
    739      only '_' and one only using '-'.  */
    740 
    741   char *alternate_name = malloc (namelen + 1);
    742   if (unlikely (alternate_name == NULL))
    743     {
    744       free (modulesdir[0]);
    745       return ENOMEM;
    746     }
    747   if (!subst_name ('-', '_', module_name, alternate_name, namelen) &&
    748       !subst_name ('_', '-', module_name, alternate_name, namelen))
    749     alternate_name[0] = '\0';
    750 
    751   FTSENT *f;
    752   int error = ENOENT;
    753   while ((f = fts_read (fts)) != NULL)
    754     {
    755       /* Skip a "source" subtree, which tends to be large.
    756 	 This insane hard-coding of names is what depmod does too.  */
    757       if (f->fts_namelen == sizeof "source" - 1
    758 	  && !strcmp (f->fts_name, "source"))
    759 	{
    760 	  fts_set (fts, f, FTS_SKIP);
    761 	  continue;
    762 	}
    763 
    764       error = ENOENT;
    765       switch (f->fts_info)
    766 	{
    767 	case FTS_F:
    768 	case FTS_SL:
    769 	case FTS_NSOK:
    770 	  /* See if this file name is "MODULE_NAME.ko".  */
    771 	  if (check_suffix (f, namelen)
    772 	      && (!memcmp (f->fts_name, module_name, namelen)
    773 		  || !memcmp (f->fts_name, alternate_name, namelen)))
    774 	    {
    775 	      int fd = open (f->fts_accpath, O_RDONLY);
    776 	      *file_name = strdup (f->fts_path);
    777 	      fts_close (fts);
    778 	      free (modulesdir[0]);
    779 	      free (alternate_name);
    780 	      if (fd < 0)
    781 		free (*file_name);
    782 	      else if (*file_name == NULL)
    783 		{
    784 		  close (fd);
    785 		  fd = -1;
    786 		}
    787 	      return fd;
    788 	    }
    789 	  break;
    790 
    791 	case FTS_ERR:
    792 	case FTS_DNR:
    793 	case FTS_NS:
    794 	  error = f->fts_errno;
    795 	  break;
    796 
    797 	case FTS_SLNONE:
    798 	default:
    799 	  break;
    800 	}
    801     }
    802 
    803   fts_close (fts);
    804   free (modulesdir[0]);
    805   free (alternate_name);
    806   errno = error;
    807   return -1;
    808 }
    809 INTDEF (dwfl_linux_kernel_find_elf)
    810 
    811 
    812 /* Dwfl_Callbacks.section_address for kernel modules in the running Linux.
    813    We read the information from /sys/module directly.  */
    814 
    815 int
    816 dwfl_linux_kernel_module_section_address
    817 (Dwfl_Module *mod __attribute__ ((unused)),
    818  void **userdata __attribute__ ((unused)),
    819  const char *modname, Dwarf_Addr base __attribute__ ((unused)),
    820  const char *secname, Elf32_Word shndx __attribute__ ((unused)),
    821  const GElf_Shdr *shdr __attribute__ ((unused)),
    822  Dwarf_Addr *addr)
    823 {
    824   char *sysfile;
    825   if (asprintf (&sysfile, SECADDRDIRFMT "%s", modname, secname) < 0)
    826     return DWARF_CB_ABORT;
    827 
    828   FILE *f = fopen (sysfile, "r");
    829   free (sysfile);
    830 
    831   if (f == NULL)
    832     {
    833       if (errno == ENOENT)
    834 	{
    835 	  /* The .modinfo and .data.percpu sections are never kept
    836 	     loaded in the kernel.  If the kernel was compiled without
    837 	     CONFIG_MODULE_UNLOAD, the .exit.* sections are not
    838 	     actually loaded at all.
    839 
    840 	     Setting *ADDR to -1 tells the caller this section is
    841 	     actually absent from memory.  */
    842 
    843 	  if (!strcmp (secname, ".modinfo")
    844 	      || !strcmp (secname, ".data.percpu")
    845 	      || !strncmp (secname, ".exit", 5))
    846 	    {
    847 	      *addr = (Dwarf_Addr) -1l;
    848 	      return DWARF_CB_OK;
    849 	    }
    850 
    851 	  /* The goofy PPC64 module_frob_arch_sections function tweaks
    852 	     the section names as a way to control other kernel code's
    853 	     behavior, and this cruft leaks out into the /sys information.
    854 	     The file name for ".init*" may actually look like "_init*".  */
    855 
    856 	  const bool is_init = !strncmp (secname, ".init", 5);
    857 	  if (is_init)
    858 	    {
    859 	      if (asprintf (&sysfile, SECADDRDIRFMT "_%s",
    860 			    modname, &secname[1]) < 0)
    861 		return ENOMEM;
    862 	      f = fopen (sysfile, "r");
    863 	      free (sysfile);
    864 	      if (f != NULL)
    865 		goto ok;
    866 	    }
    867 
    868 	  /* The kernel truncates section names to MODULE_SECT_NAME_LEN - 1.
    869 	     In case that size increases in the future, look for longer
    870 	     truncated names first.  */
    871 	  size_t namelen = strlen (secname);
    872 	  if (namelen >= MODULE_SECT_NAME_LEN)
    873 	    {
    874 	      int len = asprintf (&sysfile, SECADDRDIRFMT "%s",
    875 				  modname, secname);
    876 	      if (len < 0)
    877 		return DWARF_CB_ABORT;
    878 	      char *end = sysfile + len;
    879 	      do
    880 		{
    881 		  *--end = '\0';
    882 		  f = fopen (sysfile, "r");
    883 		  if (is_init && f == NULL && errno == ENOENT)
    884 		    {
    885 		      sysfile[len - namelen] = '_';
    886 		      f = fopen (sysfile, "r");
    887 		      sysfile[len - namelen] = '.';
    888 		    }
    889 		}
    890 	      while (f == NULL && errno == ENOENT
    891 		     && end - &sysfile[len - namelen] >= MODULE_SECT_NAME_LEN);
    892 	      free (sysfile);
    893 
    894 	      if (f != NULL)
    895 		goto ok;
    896 	    }
    897 	}
    898 
    899       return DWARF_CB_ABORT;
    900     }
    901 
    902  ok:
    903   (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
    904 
    905   int result = (fscanf (f, "%" PRIx64 "\n", addr) == 1 ? 0
    906 		: ferror_unlocked (f) ? errno : ENOEXEC);
    907   fclose (f);
    908 
    909   if (result == 0)
    910     return DWARF_CB_OK;
    911 
    912   errno = result;
    913   return DWARF_CB_ABORT;
    914 }
    915 INTDEF (dwfl_linux_kernel_module_section_address)
    916 
    917 int
    918 dwfl_linux_kernel_report_modules (Dwfl *dwfl)
    919 {
    920   FILE *f = fopen (MODULELIST, "r");
    921   if (f == NULL)
    922     return errno;
    923 
    924   (void) __fsetlocking (f, FSETLOCKING_BYCALLER);
    925 
    926   int result = 0;
    927   Dwarf_Addr modaddr;
    928   unsigned long int modsz;
    929   char modname[128];
    930   char *line = NULL;
    931   size_t linesz = 0;
    932   /* We can't just use fscanf here because it's not easy to distinguish \n
    933      from other whitespace so as to take the optional word following the
    934      address but always stop at the end of the line.  */
    935   while (getline (&line, &linesz, f) > 0
    936 	 && sscanf (line, "%128s %lu %*s %*s %*s %" PRIx64 " %*s\n",
    937 		    modname, &modsz, &modaddr) == 3)
    938     {
    939       Dwfl_Module *mod = INTUSE(dwfl_report_module) (dwfl, modname,
    940 						     modaddr, modaddr + modsz);
    941       if (mod == NULL)
    942 	{
    943 	  result = -1;
    944 	  break;
    945 	}
    946 
    947       result = check_module_notes (mod);
    948     }
    949   free (line);
    950 
    951   if (result == 0)
    952     result = ferror_unlocked (f) ? errno : feof_unlocked (f) ? 0 : ENOEXEC;
    953 
    954   fclose (f);
    955 
    956   return result;
    957 }
    958 INTDEF (dwfl_linux_kernel_report_modules)
    959