1 // Copyright (c) 2010 Google Inc. All Rights Reserved. 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google Inc. nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 // Original author: Jim Blandy <jimb (at) mozilla.com> <jimb (at) red-bean.com> 30 31 // This file implements the google_breakpad::StabsReader class. 32 // See stabs_reader.h. 33 34 #include "common/stabs_reader.h" 35 36 #include <assert.h> 37 #include <stab.h> 38 #include <string.h> 39 40 #include <string> 41 42 #include "common/using_std_string.h" 43 44 using std::vector; 45 46 namespace google_breakpad { 47 48 StabsReader::EntryIterator::EntryIterator(const ByteBuffer *buffer, 49 bool big_endian, size_t value_size) 50 : value_size_(value_size), cursor_(buffer, big_endian) { 51 // Actually, we could handle weird sizes just fine, but they're 52 // probably mistakes --- expressed in bits, say. 53 assert(value_size == 4 || value_size == 8); 54 entry_.index = 0; 55 Fetch(); 56 } 57 58 void StabsReader::EntryIterator::Fetch() { 59 cursor_ 60 .Read(4, false, &entry_.name_offset) 61 .Read(1, false, &entry_.type) 62 .Read(1, false, &entry_.other) 63 .Read(2, false, &entry_.descriptor) 64 .Read(value_size_, false, &entry_.value); 65 entry_.at_end = !cursor_; 66 } 67 68 StabsReader::StabsReader(const uint8_t *stab, size_t stab_size, 69 const uint8_t *stabstr, size_t stabstr_size, 70 bool big_endian, size_t value_size, bool unitized, 71 StabsHandler *handler) 72 : entries_(stab, stab_size), 73 strings_(stabstr, stabstr_size), 74 iterator_(&entries_, big_endian, value_size), 75 unitized_(unitized), 76 handler_(handler), 77 string_offset_(0), 78 next_cu_string_offset_(0), 79 current_source_file_(NULL) { } 80 81 const char *StabsReader::SymbolString() { 82 ptrdiff_t offset = string_offset_ + iterator_->name_offset; 83 if (offset < 0 || (size_t) offset >= strings_.Size()) { 84 handler_->Warning("symbol %d: name offset outside the string section\n", 85 iterator_->index); 86 // Return our null string, to keep our promise about all names being 87 // taken from the string section. 88 offset = 0; 89 } 90 return reinterpret_cast<const char *>(strings_.start + offset); 91 } 92 93 bool StabsReader::Process() { 94 while (!iterator_->at_end) { 95 if (iterator_->type == N_SO) { 96 if (! ProcessCompilationUnit()) 97 return false; 98 } else if (iterator_->type == N_UNDF && unitized_) { 99 // In unitized STABS (including Linux STABS, and pretty much anything 100 // else that puts STABS data in sections), at the head of each 101 // compilation unit's entries there is an N_UNDF stab giving the 102 // number of symbols in the compilation unit, and the number of bytes 103 // that compilation unit's strings take up in the .stabstr section. 104 // Each CU's strings are separate; the n_strx values are offsets 105 // within the current CU's portion of the .stabstr section. 106 // 107 // As an optimization, the GNU linker combines all the 108 // compilation units into one, with a single N_UNDF at the 109 // beginning. However, other linkers, like Gold, do not perform 110 // this optimization. 111 string_offset_ = next_cu_string_offset_; 112 next_cu_string_offset_ = iterator_->value; 113 ++iterator_; 114 } 115 #if defined(HAVE_MACH_O_NLIST_H) 116 // Export symbols in Mach-O binaries look like this. 117 // This is necessary in order to be able to dump symbols 118 // from OS X system libraries. 119 else if ((iterator_->type & N_STAB) == 0 && 120 (iterator_->type & N_TYPE) == N_SECT) { 121 ProcessExtern(); 122 } 123 #endif 124 else { 125 ++iterator_; 126 } 127 } 128 return true; 129 } 130 131 bool StabsReader::ProcessCompilationUnit() { 132 assert(!iterator_->at_end && iterator_->type == N_SO); 133 134 // There may be an N_SO entry whose name ends with a slash, 135 // indicating the directory in which the compilation occurred. 136 // The build directory defaults to NULL. 137 const char *build_directory = NULL; 138 { 139 const char *name = SymbolString(); 140 if (name[0] && name[strlen(name) - 1] == '/') { 141 build_directory = name; 142 ++iterator_; 143 } 144 } 145 146 // We expect to see an N_SO entry with a filename next, indicating 147 // the start of the compilation unit. 148 { 149 if (iterator_->at_end || iterator_->type != N_SO) 150 return true; 151 const char *name = SymbolString(); 152 if (name[0] == '\0') { 153 // This seems to be a stray end-of-compilation-unit marker; 154 // consume it, but don't report the end, since we didn't see a 155 // beginning. 156 ++iterator_; 157 return true; 158 } 159 current_source_file_ = name; 160 } 161 162 if (! handler_->StartCompilationUnit(current_source_file_, 163 iterator_->value, 164 build_directory)) 165 return false; 166 167 ++iterator_; 168 169 // The STABS documentation says that some compilers may emit 170 // additional N_SO entries with names immediately following the 171 // first, and that they should be ignored. However, the original 172 // Breakpad STABS reader doesn't ignore them, so we won't either. 173 174 // Process the body of the compilation unit, up to the next N_SO. 175 while (!iterator_->at_end && iterator_->type != N_SO) { 176 if (iterator_->type == N_FUN) { 177 if (! ProcessFunction()) 178 return false; 179 } else if (iterator_->type == N_SLINE) { 180 // Mac OS X STABS place SLINE records before functions. 181 Line line; 182 // The value of an N_SLINE entry that appears outside a function is 183 // the absolute address of the line. 184 line.address = iterator_->value; 185 line.filename = current_source_file_; 186 // The n_desc of a N_SLINE entry is the line number. It's a 187 // signed 16-bit field; line numbers from 32768 to 65535 are 188 // stored as n-65536. 189 line.number = (uint16_t) iterator_->descriptor; 190 queued_lines_.push_back(line); 191 ++iterator_; 192 } else if (iterator_->type == N_SOL) { 193 current_source_file_ = SymbolString(); 194 ++iterator_; 195 } else { 196 // Ignore anything else. 197 ++iterator_; 198 } 199 } 200 201 // An N_SO with an empty name indicates the end of the compilation 202 // unit. Default to zero. 203 uint64_t ending_address = 0; 204 if (!iterator_->at_end) { 205 assert(iterator_->type == N_SO); 206 const char *name = SymbolString(); 207 if (name[0] == '\0') { 208 ending_address = iterator_->value; 209 ++iterator_; 210 } 211 } 212 213 if (! handler_->EndCompilationUnit(ending_address)) 214 return false; 215 216 queued_lines_.clear(); 217 218 return true; 219 } 220 221 bool StabsReader::ProcessFunction() { 222 assert(!iterator_->at_end && iterator_->type == N_FUN); 223 224 uint64_t function_address = iterator_->value; 225 // The STABS string for an N_FUN entry is the name of the function, 226 // followed by a colon, followed by type information for the 227 // function. We want to pass the name alone to StartFunction. 228 const char *stab_string = SymbolString(); 229 const char *name_end = strchr(stab_string, ':'); 230 if (! name_end) 231 name_end = stab_string + strlen(stab_string); 232 string name(stab_string, name_end - stab_string); 233 if (! handler_->StartFunction(name, function_address)) 234 return false; 235 ++iterator_; 236 237 // If there were any SLINE records given before the function, report them now. 238 for (vector<Line>::const_iterator it = queued_lines_.begin(); 239 it != queued_lines_.end(); it++) { 240 if (!handler_->Line(it->address, it->filename, it->number)) 241 return false; 242 } 243 queued_lines_.clear(); 244 245 while (!iterator_->at_end) { 246 if (iterator_->type == N_SO || iterator_->type == N_FUN) 247 break; 248 else if (iterator_->type == N_SLINE) { 249 // The value of an N_SLINE entry is the offset of the line from 250 // the function's start address. 251 uint64_t line_address = function_address + iterator_->value; 252 // The n_desc of a N_SLINE entry is the line number. It's a 253 // signed 16-bit field; line numbers from 32768 to 65535 are 254 // stored as n-65536. 255 uint16_t line_number = iterator_->descriptor; 256 if (! handler_->Line(line_address, current_source_file_, line_number)) 257 return false; 258 ++iterator_; 259 } else if (iterator_->type == N_SOL) { 260 current_source_file_ = SymbolString(); 261 ++iterator_; 262 } else 263 // Ignore anything else. 264 ++iterator_; 265 } 266 267 // We've reached the end of the function. See if we can figure out its 268 // ending address. 269 uint64_t ending_address = 0; 270 if (!iterator_->at_end) { 271 assert(iterator_->type == N_SO || iterator_->type == N_FUN); 272 if (iterator_->type == N_FUN) { 273 const char *symbol_name = SymbolString(); 274 if (symbol_name[0] == '\0') { 275 // An N_FUN entry with no name is a terminator for this function; 276 // its value is the function's size. 277 ending_address = function_address + iterator_->value; 278 ++iterator_; 279 } else { 280 // An N_FUN entry with a name is the next function, and we can take 281 // its value as our ending address. Don't advance the iterator, as 282 // we'll use this symbol to start the next function as well. 283 ending_address = iterator_->value; 284 } 285 } else { 286 // An N_SO entry could be an end-of-compilation-unit marker, or the 287 // start of the next compilation unit, but in either case, its value 288 // is our ending address. We don't advance the iterator; 289 // ProcessCompilationUnit will decide what to do with this symbol. 290 ending_address = iterator_->value; 291 } 292 } 293 294 if (! handler_->EndFunction(ending_address)) 295 return false; 296 297 return true; 298 } 299 300 bool StabsReader::ProcessExtern() { 301 #if defined(HAVE_MACH_O_NLIST_H) 302 assert(!iterator_->at_end && 303 (iterator_->type & N_STAB) == 0 && 304 (iterator_->type & N_TYPE) == N_SECT); 305 #endif 306 307 // TODO(mark): only do symbols in the text section? 308 if (!handler_->Extern(SymbolString(), iterator_->value)) 309 return false; 310 311 ++iterator_; 312 return true; 313 } 314 315 } // namespace google_breakpad 316