Home | History | Annotate | Download | only in src
      1 // Copyright (c) 2009, Google Inc.
      2 // All rights reserved.
      3 //
      4 // Redistribution and use in source and binary forms, with or without
      5 // modification, are permitted provided that the following conditions are
      6 // met:
      7 //
      8 //     * Redistributions of source code must retain the above copyright
      9 // notice, this list of conditions and the following disclaimer.
     10 //     * Redistributions in binary form must reproduce the above
     11 // copyright notice, this list of conditions and the following disclaimer
     12 // in the documentation and/or other materials provided with the
     13 // distribution.
     14 //     * Neither the name of Google Inc. nor the names of its
     15 // contributors may be used to endorse or promote products derived from
     16 // this software without specific prior written permission.
     17 //
     18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
     22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
     24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     29 
     30 // ---
     31 // Author: Craig Silverstein
     32 //
     33 // This forks out to pprof to do the actual symbolizing.  We might
     34 // be better off writing our own in C++.
     35 
     36 #include "config.h"
     37 #include "symbolize.h"
     38 #include <stdlib.h>
     39 #ifdef HAVE_UNISTD_H
     40 #include <unistd.h>   // for write()
     41 #endif
     42 #ifdef HAVE_SYS_SOCKET_H
     43 #include <sys/socket.h>   // for socketpair() -- needed by Symbolize
     44 #endif
     45 #ifdef HAVE_SYS_WAIT_H
     46 #include <sys/wait.h>   // for wait() -- needed by Symbolize
     47 #endif
     48 #ifdef HAVE_POLL_H
     49 #include <poll.h>
     50 #endif
     51 #ifdef __MACH__
     52 #include <mach-o/dyld.h>   // for GetProgramInvocationName()
     53 #include <limits.h>        // for PATH_MAX
     54 #endif
     55 #if defined(__CYGWIN__) || defined(__CYGWIN32__)
     56 #include <io.h>            // for get_osfhandle()
     57 #endif
     58 #include <string>
     59 #include "base/commandlineflags.h"
     60 #include "base/logging.h"
     61 #include "base/sysinfo.h"
     62 
     63 using std::string;
     64 using tcmalloc::DumpProcSelfMaps;   // from sysinfo.h
     65 
     66 
     67 DEFINE_string(symbolize_pprof,
     68               EnvToString("PPROF_PATH", "pprof"),
     69               "Path to pprof to call for reporting function names.");
     70 
     71 // heap_profile_table_pprof may be referenced after destructors are
     72 // called (since that's when leak-checking is done), so we make
     73 // a more-permanent copy that won't ever get destroyed.
     74 static string* g_pprof_path = new string(FLAGS_symbolize_pprof);
     75 
     76 // Returns NULL if we're on an OS where we can't get the invocation name.
     77 // Using a static var is ok because we're not called from a thread.
     78 static char* GetProgramInvocationName() {
     79 #if defined(HAVE_PROGRAM_INVOCATION_NAME)
     80   extern char* program_invocation_name;  // gcc provides this
     81   return program_invocation_name;
     82 #elif defined(__MACH__)
     83   // We don't want to allocate memory for this since we may be
     84   // calculating it when memory is corrupted.
     85   static char program_invocation_name[PATH_MAX];
     86   if (program_invocation_name[0] == '\0') {  // first time calculating
     87     uint32_t length = sizeof(program_invocation_name);
     88     if (_NSGetExecutablePath(program_invocation_name, &length))
     89       return NULL;
     90   }
     91   return program_invocation_name;
     92 #else
     93   return NULL;   // figure out a way to get argv[0]
     94 #endif
     95 }
     96 
     97 // Prints an error message when you can't run Symbolize().
     98 static void PrintError(const char* reason) {
     99   RAW_LOG(ERROR,
    100           "*** WARNING: Cannot convert addresses to symbols in output below.\n"
    101           "*** Reason: %s\n"
    102           "*** If you cannot fix this, try running pprof directly.\n",
    103           reason);
    104 }
    105 
    106 void SymbolTable::Add(const void* addr) {
    107   symbolization_table_[addr] = "";
    108 }
    109 
    110 const char* SymbolTable::GetSymbol(const void* addr) {
    111   return symbolization_table_[addr];
    112 }
    113 
    114 // Updates symbolization_table with the pointers to symbol names corresponding
    115 // to its keys. The symbol names are stored in out, which is allocated and
    116 // freed by the caller of this routine.
    117 // Note that the forking/etc is not thread-safe or re-entrant.  That's
    118 // ok for the purpose we need -- reporting leaks detected by heap-checker
    119 // -- but be careful if you decide to use this routine for other purposes.
    120 // Returns number of symbols read on error.  If can't symbolize, returns 0
    121 // and emits an error message about why.
    122 int SymbolTable::Symbolize() {
    123 #if !defined(HAVE_UNISTD_H)  || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H)
    124   PrintError("Perftools does not know how to call a sub-process on this O/S");
    125   return 0;
    126 #else
    127   const char* argv0 = GetProgramInvocationName();
    128   if (argv0 == NULL) {  // can't call symbolize if we can't figure out our name
    129     PrintError("Cannot figure out the name of this executable (argv0)");
    130     return 0;
    131   }
    132   if (access(g_pprof_path->c_str(), R_OK) != 0) {
    133     PrintError("Cannot find 'pprof' (is PPROF_PATH set correctly?)");
    134     return 0;
    135   }
    136 
    137   // All this work is to do two-way communication.  ugh.
    138   int *child_in = NULL;   // file descriptors
    139   int *child_out = NULL;  // for now, we don't worry about child_err
    140   int child_fds[5][2];    // socketpair may be called up to five times below
    141 
    142   // The client program may close its stdin and/or stdout and/or stderr
    143   // thus allowing socketpair to reuse file descriptors 0, 1 or 2.
    144   // In this case the communication between the forked processes may be broken
    145   // if either the parent or the child tries to close or duplicate these
    146   // descriptors. The loop below produces two pairs of file descriptors, each
    147   // greater than 2 (stderr).
    148   for (int i = 0; i < 5; i++) {
    149     if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) {
    150       for (int j = 0; j < i; j++) {
    151         close(child_fds[j][0]);
    152         close(child_fds[j][1]);
    153         PrintError("Cannot create a socket pair");
    154         return 0;
    155       }
    156     } else {
    157       if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) {
    158         if (child_in == NULL) {
    159           child_in = child_fds[i];
    160         } else {
    161           child_out = child_fds[i];
    162           for (int j = 0; j < i; j++) {
    163             if (child_fds[j] == child_in) continue;
    164             close(child_fds[j][0]);
    165             close(child_fds[j][1]);
    166           }
    167           break;
    168         }
    169       }
    170     }
    171   }
    172 
    173   switch (fork()) {
    174     case -1: {  // error
    175       close(child_in[0]);
    176       close(child_in[1]);
    177       close(child_out[0]);
    178       close(child_out[1]);
    179       PrintError("Unknown error calling fork()");
    180       return 0;
    181     }
    182     case 0: {  // child
    183       close(child_in[1]);   // child uses the 0's, parent uses the 1's
    184       close(child_out[1]);  // child uses the 0's, parent uses the 1's
    185       close(0);
    186       close(1);
    187       if (dup2(child_in[0], 0) == -1) _exit(1);
    188       if (dup2(child_out[0], 1) == -1) _exit(2);
    189       // Unset vars that might cause trouble when we fork
    190       unsetenv("CPUPROFILE");
    191       unsetenv("HEAPPROFILE");
    192       unsetenv("HEAPCHECK");
    193       unsetenv("PERFTOOLS_VERBOSE");
    194       execlp(g_pprof_path->c_str(), g_pprof_path->c_str(),
    195              "--symbols", argv0, NULL);
    196       _exit(3);  // if execvp fails, it's bad news for us
    197     }
    198     default: {  // parent
    199       close(child_in[0]);   // child uses the 0's, parent uses the 1's
    200       close(child_out[0]);  // child uses the 0's, parent uses the 1's
    201 #ifdef HAVE_POLL_H
    202       // Waiting for 1ms seems to give the OS time to notice any errors.
    203       poll(0, 0, 1);
    204       // For maximum safety, we check to make sure the execlp
    205       // succeeded before trying to write.  (Otherwise we'll get a
    206       // SIGPIPE.)  For systems without poll.h, we'll just skip this
    207       // check, and trust that the user set PPROF_PATH correctly!
    208       struct pollfd pfd = { child_in[1], POLLOUT, 0 };
    209       if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) ||
    210           (pfd.revents & (POLLHUP|POLLERR))) {
    211         PrintError("Cannot run 'pprof' (is PPROF_PATH set correctly?)");
    212         return 0;
    213       }
    214 #endif
    215 #if defined(__CYGWIN__) || defined(__CYGWIN32__)
    216       // On cygwin, DumpProcSelfMaps() takes a HANDLE, not an fd.  Convert.
    217       const HANDLE symbols_handle = (HANDLE) get_osfhandle(child_in[1]);
    218       DumpProcSelfMaps(symbols_handle);
    219 #else
    220       DumpProcSelfMaps(child_in[1]);  // what pprof expects on stdin
    221 #endif
    222 
    223       // Allocate 24 bytes = ("0x" + 8 bytes + "\n" + overhead) for each
    224       // address to feed to pprof.
    225       const int kOutBufSize = 24 * symbolization_table_.size();
    226       char *pprof_buffer = new char[kOutBufSize];
    227       int written = 0;
    228       for (SymbolMap::const_iterator iter = symbolization_table_.begin();
    229            iter != symbolization_table_.end(); ++iter) {
    230         written += snprintf(pprof_buffer + written, kOutBufSize - written,
    231                  // pprof expects format to be 0xXXXXXX
    232                  "0x%" PRIxPTR "\n", reinterpret_cast<uintptr_t>(iter->first));
    233       }
    234       write(child_in[1], pprof_buffer, strlen(pprof_buffer));
    235       close(child_in[1]);             // that's all we need to write
    236 
    237       const int kSymbolBufferSize = kSymbolSize * symbolization_table_.size();
    238       int total_bytes_read = 0;
    239       delete[] symbol_buffer_;
    240       symbol_buffer_ = new char[kSymbolBufferSize];
    241       memset(symbol_buffer_, '\0', kSymbolBufferSize);
    242       while (1) {
    243         int bytes_read = read(child_out[1], symbol_buffer_ + total_bytes_read,
    244                               kSymbolBufferSize - total_bytes_read);
    245         if (bytes_read < 0) {
    246           close(child_out[1]);
    247           PrintError("Cannot read data from pprof");
    248           return 0;
    249         } else if (bytes_read == 0) {
    250           close(child_out[1]);
    251           wait(NULL);
    252           break;
    253         } else {
    254           total_bytes_read += bytes_read;
    255         }
    256       }
    257       // We have successfully read the output of pprof into out.  Make sure
    258       // the last symbol is full (we can tell because it ends with a \n).
    259       if (total_bytes_read == 0 || symbol_buffer_[total_bytes_read - 1] != '\n')
    260         return 0;
    261       // make the symbolization_table_ values point to the output vector
    262       SymbolMap::iterator fill = symbolization_table_.begin();
    263       int num_symbols = 0;
    264       const char *current_name = symbol_buffer_;
    265       for (int i = 0; i < total_bytes_read; i++) {
    266         if (symbol_buffer_[i] == '\n') {
    267           fill->second = current_name;
    268           symbol_buffer_[i] = '\0';
    269           current_name = symbol_buffer_ + i + 1;
    270           fill++;
    271           num_symbols++;
    272         }
    273       }
    274       return num_symbols;
    275     }
    276   }
    277   PrintError("Unkown error (should never occur!)");
    278   return 0;  // shouldn't be reachable
    279 #endif
    280 }
    281