1 // Copyright (c) 2009, Google Inc. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above 11 // copyright notice, this list of conditions and the following disclaimer 12 // in the documentation and/or other materials provided with the 13 // distribution. 14 // * Neither the name of Google Inc. nor the names of its 15 // contributors may be used to endorse or promote products derived from 16 // this software without specific prior written permission. 17 // 18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 30 // --- 31 // Author: Craig Silverstein 32 // 33 // This forks out to pprof to do the actual symbolizing. We might 34 // be better off writing our own in C++. 35 36 #include "config.h" 37 #include "symbolize.h" 38 #include <stdlib.h> 39 #ifdef HAVE_UNISTD_H 40 #include <unistd.h> // for write() 41 #endif 42 #ifdef HAVE_SYS_SOCKET_H 43 #include <sys/socket.h> // for socketpair() -- needed by Symbolize 44 #endif 45 #ifdef HAVE_SYS_WAIT_H 46 #include <sys/wait.h> // for wait() -- needed by Symbolize 47 #endif 48 #ifdef HAVE_POLL_H 49 #include <poll.h> 50 #endif 51 #ifdef __MACH__ 52 #include <mach-o/dyld.h> // for GetProgramInvocationName() 53 #include <limits.h> // for PATH_MAX 54 #endif 55 #if defined(__CYGWIN__) || defined(__CYGWIN32__) 56 #include <io.h> // for get_osfhandle() 57 #endif 58 #include <string> 59 #include "base/commandlineflags.h" 60 #include "base/logging.h" 61 #include "base/sysinfo.h" 62 63 using std::string; 64 using tcmalloc::DumpProcSelfMaps; // from sysinfo.h 65 66 67 DEFINE_string(symbolize_pprof, 68 EnvToString("PPROF_PATH", "pprof"), 69 "Path to pprof to call for reporting function names."); 70 71 // heap_profile_table_pprof may be referenced after destructors are 72 // called (since that's when leak-checking is done), so we make 73 // a more-permanent copy that won't ever get destroyed. 74 static string* g_pprof_path = new string(FLAGS_symbolize_pprof); 75 76 // Returns NULL if we're on an OS where we can't get the invocation name. 77 // Using a static var is ok because we're not called from a thread. 78 static char* GetProgramInvocationName() { 79 #if defined(HAVE_PROGRAM_INVOCATION_NAME) 80 extern char* program_invocation_name; // gcc provides this 81 return program_invocation_name; 82 #elif defined(__MACH__) 83 // We don't want to allocate memory for this since we may be 84 // calculating it when memory is corrupted. 85 static char program_invocation_name[PATH_MAX]; 86 if (program_invocation_name[0] == '\0') { // first time calculating 87 uint32_t length = sizeof(program_invocation_name); 88 if (_NSGetExecutablePath(program_invocation_name, &length)) 89 return NULL; 90 } 91 return program_invocation_name; 92 #else 93 return NULL; // figure out a way to get argv[0] 94 #endif 95 } 96 97 // Prints an error message when you can't run Symbolize(). 98 static void PrintError(const char* reason) { 99 RAW_LOG(ERROR, 100 "*** WARNING: Cannot convert addresses to symbols in output below.\n" 101 "*** Reason: %s\n" 102 "*** If you cannot fix this, try running pprof directly.\n", 103 reason); 104 } 105 106 void SymbolTable::Add(const void* addr) { 107 symbolization_table_[addr] = ""; 108 } 109 110 const char* SymbolTable::GetSymbol(const void* addr) { 111 return symbolization_table_[addr]; 112 } 113 114 // Updates symbolization_table with the pointers to symbol names corresponding 115 // to its keys. The symbol names are stored in out, which is allocated and 116 // freed by the caller of this routine. 117 // Note that the forking/etc is not thread-safe or re-entrant. That's 118 // ok for the purpose we need -- reporting leaks detected by heap-checker 119 // -- but be careful if you decide to use this routine for other purposes. 120 // Returns number of symbols read on error. If can't symbolize, returns 0 121 // and emits an error message about why. 122 int SymbolTable::Symbolize() { 123 #if !defined(HAVE_UNISTD_H) || !defined(HAVE_SYS_SOCKET_H) || !defined(HAVE_SYS_WAIT_H) 124 PrintError("Perftools does not know how to call a sub-process on this O/S"); 125 return 0; 126 #else 127 const char* argv0 = GetProgramInvocationName(); 128 if (argv0 == NULL) { // can't call symbolize if we can't figure out our name 129 PrintError("Cannot figure out the name of this executable (argv0)"); 130 return 0; 131 } 132 if (access(g_pprof_path->c_str(), R_OK) != 0) { 133 PrintError("Cannot find 'pprof' (is PPROF_PATH set correctly?)"); 134 return 0; 135 } 136 137 // All this work is to do two-way communication. ugh. 138 int *child_in = NULL; // file descriptors 139 int *child_out = NULL; // for now, we don't worry about child_err 140 int child_fds[5][2]; // socketpair may be called up to five times below 141 142 // The client program may close its stdin and/or stdout and/or stderr 143 // thus allowing socketpair to reuse file descriptors 0, 1 or 2. 144 // In this case the communication between the forked processes may be broken 145 // if either the parent or the child tries to close or duplicate these 146 // descriptors. The loop below produces two pairs of file descriptors, each 147 // greater than 2 (stderr). 148 for (int i = 0; i < 5; i++) { 149 if (socketpair(AF_UNIX, SOCK_STREAM, 0, child_fds[i]) == -1) { 150 for (int j = 0; j < i; j++) { 151 close(child_fds[j][0]); 152 close(child_fds[j][1]); 153 PrintError("Cannot create a socket pair"); 154 return 0; 155 } 156 } else { 157 if ((child_fds[i][0] > 2) && (child_fds[i][1] > 2)) { 158 if (child_in == NULL) { 159 child_in = child_fds[i]; 160 } else { 161 child_out = child_fds[i]; 162 for (int j = 0; j < i; j++) { 163 if (child_fds[j] == child_in) continue; 164 close(child_fds[j][0]); 165 close(child_fds[j][1]); 166 } 167 break; 168 } 169 } 170 } 171 } 172 173 switch (fork()) { 174 case -1: { // error 175 close(child_in[0]); 176 close(child_in[1]); 177 close(child_out[0]); 178 close(child_out[1]); 179 PrintError("Unknown error calling fork()"); 180 return 0; 181 } 182 case 0: { // child 183 close(child_in[1]); // child uses the 0's, parent uses the 1's 184 close(child_out[1]); // child uses the 0's, parent uses the 1's 185 close(0); 186 close(1); 187 if (dup2(child_in[0], 0) == -1) _exit(1); 188 if (dup2(child_out[0], 1) == -1) _exit(2); 189 // Unset vars that might cause trouble when we fork 190 unsetenv("CPUPROFILE"); 191 unsetenv("HEAPPROFILE"); 192 unsetenv("HEAPCHECK"); 193 unsetenv("PERFTOOLS_VERBOSE"); 194 execlp(g_pprof_path->c_str(), g_pprof_path->c_str(), 195 "--symbols", argv0, NULL); 196 _exit(3); // if execvp fails, it's bad news for us 197 } 198 default: { // parent 199 close(child_in[0]); // child uses the 0's, parent uses the 1's 200 close(child_out[0]); // child uses the 0's, parent uses the 1's 201 #ifdef HAVE_POLL_H 202 // Waiting for 1ms seems to give the OS time to notice any errors. 203 poll(0, 0, 1); 204 // For maximum safety, we check to make sure the execlp 205 // succeeded before trying to write. (Otherwise we'll get a 206 // SIGPIPE.) For systems without poll.h, we'll just skip this 207 // check, and trust that the user set PPROF_PATH correctly! 208 struct pollfd pfd = { child_in[1], POLLOUT, 0 }; 209 if (!poll(&pfd, 1, 0) || !(pfd.revents & POLLOUT) || 210 (pfd.revents & (POLLHUP|POLLERR))) { 211 PrintError("Cannot run 'pprof' (is PPROF_PATH set correctly?)"); 212 return 0; 213 } 214 #endif 215 #if defined(__CYGWIN__) || defined(__CYGWIN32__) 216 // On cygwin, DumpProcSelfMaps() takes a HANDLE, not an fd. Convert. 217 const HANDLE symbols_handle = (HANDLE) get_osfhandle(child_in[1]); 218 DumpProcSelfMaps(symbols_handle); 219 #else 220 DumpProcSelfMaps(child_in[1]); // what pprof expects on stdin 221 #endif 222 223 // Allocate 24 bytes = ("0x" + 8 bytes + "\n" + overhead) for each 224 // address to feed to pprof. 225 const int kOutBufSize = 24 * symbolization_table_.size(); 226 char *pprof_buffer = new char[kOutBufSize]; 227 int written = 0; 228 for (SymbolMap::const_iterator iter = symbolization_table_.begin(); 229 iter != symbolization_table_.end(); ++iter) { 230 written += snprintf(pprof_buffer + written, kOutBufSize - written, 231 // pprof expects format to be 0xXXXXXX 232 "0x%" PRIxPTR "\n", reinterpret_cast<uintptr_t>(iter->first)); 233 } 234 write(child_in[1], pprof_buffer, strlen(pprof_buffer)); 235 close(child_in[1]); // that's all we need to write 236 237 const int kSymbolBufferSize = kSymbolSize * symbolization_table_.size(); 238 int total_bytes_read = 0; 239 delete[] symbol_buffer_; 240 symbol_buffer_ = new char[kSymbolBufferSize]; 241 memset(symbol_buffer_, '\0', kSymbolBufferSize); 242 while (1) { 243 int bytes_read = read(child_out[1], symbol_buffer_ + total_bytes_read, 244 kSymbolBufferSize - total_bytes_read); 245 if (bytes_read < 0) { 246 close(child_out[1]); 247 PrintError("Cannot read data from pprof"); 248 return 0; 249 } else if (bytes_read == 0) { 250 close(child_out[1]); 251 wait(NULL); 252 break; 253 } else { 254 total_bytes_read += bytes_read; 255 } 256 } 257 // We have successfully read the output of pprof into out. Make sure 258 // the last symbol is full (we can tell because it ends with a \n). 259 if (total_bytes_read == 0 || symbol_buffer_[total_bytes_read - 1] != '\n') 260 return 0; 261 // make the symbolization_table_ values point to the output vector 262 SymbolMap::iterator fill = symbolization_table_.begin(); 263 int num_symbols = 0; 264 const char *current_name = symbol_buffer_; 265 for (int i = 0; i < total_bytes_read; i++) { 266 if (symbol_buffer_[i] == '\n') { 267 fill->second = current_name; 268 symbol_buffer_[i] = '\0'; 269 current_name = symbol_buffer_ + i + 1; 270 fill++; 271 num_symbols++; 272 } 273 } 274 return num_symbols; 275 } 276 } 277 PrintError("Unkown error (should never occur!)"); 278 return 0; // shouldn't be reachable 279 #endif 280 } 281