Home | History | Annotate | Download | only in quipper
      1 // Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "perf_reader.h"
      6 
      7 #include <byteswap.h>
      8 #include <limits.h>
      9 
     10 #include <bitset>
     11 #include <cstdio>
     12 #include <cstdlib>
     13 #include <cstring>
     14 #include <vector>
     15 
     16 #define LOG_TAG "perf_reader"
     17 
     18 #include "base/logging.h"
     19 
     20 #include "quipper_string.h"
     21 #include "perf_utils.h"
     22 
     23 namespace quipper {
     24 
     25 struct BufferWithSize {
     26   char* ptr;
     27   size_t size;
     28 };
     29 
     30 // If the buffer is read-only, it is not sufficient to mark the previous struct
     31 // as const, as this only means that the pointer cannot be changed, and says
     32 // nothing about the contents of the buffer.  So, we need another struct.
     33 struct ConstBufferWithSize {
     34   const char* ptr;
     35   size_t size;
     36 };
     37 
     38 namespace {
     39 
     40 // The type of the number of string data, found in the command line metadata in
     41 // the perf data file.
     42 typedef u32 num_string_data_type;
     43 
     44 // Types of the event desc fields that are not found in other structs.
     45 typedef u32 event_desc_num_events;
     46 typedef u32 event_desc_attr_size;
     47 typedef u32 event_desc_num_unique_ids;
     48 
     49 // The type of the number of nodes field in NUMA topology.
     50 typedef u32 numa_topology_num_nodes_type;
     51 
     52 // A mask that is applied to metadata_mask_ in order to get a mask for
     53 // only the metadata supported by quipper.
     54 const uint32_t kSupportedMetadataMask =
     55     1 << HEADER_TRACING_DATA |
     56     1 << HEADER_BUILD_ID |
     57     1 << HEADER_HOSTNAME |
     58     1 << HEADER_OSRELEASE |
     59     1 << HEADER_VERSION |
     60     1 << HEADER_ARCH |
     61     1 << HEADER_NRCPUS |
     62     1 << HEADER_CPUDESC |
     63     1 << HEADER_CPUID |
     64     1 << HEADER_TOTAL_MEM |
     65     1 << HEADER_CMDLINE |
     66     1 << HEADER_EVENT_DESC |
     67     1 << HEADER_CPU_TOPOLOGY |
     68     1 << HEADER_NUMA_TOPOLOGY |
     69     1 << HEADER_BRANCH_STACK;
     70 
     71 // By default, the build ID event has PID = -1.
     72 const uint32_t kDefaultBuildIDEventPid = static_cast<uint32_t>(-1);
     73 
     74 template <class T>
     75 void ByteSwap(T* input) {
     76   switch (sizeof(T)) {
     77   case sizeof(uint8_t):
     78     LOG(WARNING) << "Attempting to byte swap on a single byte.";
     79     break;
     80   case sizeof(uint16_t):
     81     *input = bswap_16(*input);
     82     break;
     83   case sizeof(uint32_t):
     84     *input = bswap_32(*input);
     85     break;
     86   case sizeof(uint64_t):
     87     *input = bswap_64(*input);
     88     break;
     89   default:
     90     LOG(FATAL) << "Invalid size for byte swap: " << sizeof(T) << " bytes";
     91     break;
     92   }
     93 }
     94 
     95 u64 MaybeSwap(u64 value, bool swap) {
     96   if (swap)
     97     return bswap_64(value);
     98   return value;
     99 }
    100 
    101 u32 MaybeSwap(u32 value, bool swap) {
    102   if (swap)
    103     return bswap_32(value);
    104   return value;
    105 }
    106 
    107 u8 ReverseByte(u8 x) {
    108   x = (x & 0xf0) >> 4 | (x & 0x0f) << 4;  // exchange nibbles
    109   x = (x & 0xcc) >> 2 | (x & 0x33) << 2;  // exchange pairs
    110   x = (x & 0xaa) >> 1 | (x & 0x55) << 1;  // exchange neighbors
    111   return x;
    112 }
    113 
    114 // If field points to the start of a bitfield padded to len bytes, this
    115 // performs an endian swap of the bitfield, assuming the compiler that produced
    116 // it conforms to the same ABI (bitfield layout is not completely specified by
    117 // the language).
    118 void SwapBitfieldOfBits(u8* field, size_t len) {
    119   for (size_t i = 0; i < len; i++) {
    120     field[i] = ReverseByte(field[i]);
    121   }
    122 }
    123 
    124 // The code currently assumes that the compiler will not add any padding to the
    125 // various structs.  These CHECKs make sure that this is true.
    126 void CheckNoEventHeaderPadding() {
    127   perf_event_header header;
    128   CHECK_EQ(sizeof(header),
    129            sizeof(header.type) + sizeof(header.misc) + sizeof(header.size));
    130 }
    131 
    132 void CheckNoPerfEventAttrPadding() {
    133   perf_event_attr attr;
    134   CHECK_EQ(sizeof(attr),
    135            (reinterpret_cast<u64>(&attr.__reserved_2) -
    136             reinterpret_cast<u64>(&attr)) +
    137            sizeof(attr.__reserved_2));
    138 }
    139 
    140 void CheckNoEventTypePadding() {
    141   perf_trace_event_type event_type;
    142   CHECK_EQ(sizeof(event_type),
    143            sizeof(event_type.event_id) + sizeof(event_type.name));
    144 }
    145 
    146 void CheckNoBuildIDEventPadding() {
    147   build_id_event event;
    148   CHECK_EQ(sizeof(event),
    149            sizeof(event.header.type) + sizeof(event.header.misc) +
    150            sizeof(event.header.size) + sizeof(event.pid) +
    151            sizeof(event.build_id));
    152 }
    153 
    154 // Creates/updates a build id event with |build_id| and |filename|.
    155 // Passing "" to |build_id| or |filename| will leave the corresponding field
    156 // unchanged (in which case |event| must be non-null).
    157 // If |event| is null or is not large enough, a new event will be created.
    158 // In this case, if |event| is non-null, it will be freed.
    159 // Otherwise, updates the fields of the existing event.
    160 // |new_misc| indicates kernel vs user space, and is only used to fill in the
    161 // |header.misc| field of new events.
    162 // In either case, returns a pointer to the event containing the updated data,
    163 // or NULL in the case of a failure.
    164 build_id_event* CreateOrUpdateBuildID(const string& build_id,
    165                                       const string& filename,
    166                                       uint16_t new_misc,
    167                                       build_id_event* event) {
    168   // When creating an event from scratch, build id and filename must be present.
    169   if (!event && (build_id.empty() || filename.empty()))
    170     return NULL;
    171   size_t new_len = GetUint64AlignedStringLength(
    172       filename.empty() ? event->filename : filename);
    173 
    174   // If event is null, or we don't have enough memory, allocate more memory, and
    175   // switch the new pointer with the existing pointer.
    176   size_t new_size = sizeof(*event) + new_len;
    177   if (!event || new_size > event->header.size) {
    178     build_id_event* new_event = CallocMemoryForBuildID(new_size);
    179 
    180     if (event) {
    181       // Copy over everything except the filename and free the event.
    182       // It is guaranteed that we are changing the filename - otherwise, the old
    183       // size and the new size would be equal.
    184       *new_event = *event;
    185       free(event);
    186     } else {
    187       // Fill in the fields appropriately.
    188       new_event->header.type = HEADER_BUILD_ID;
    189       new_event->header.misc = new_misc;
    190       new_event->pid = kDefaultBuildIDEventPid;
    191     }
    192     event = new_event;
    193   }
    194 
    195   // Here, event is the pointer to the build_id_event that we are keeping.
    196   // Update the event's size, build id, and filename.
    197   if (!build_id.empty() &&
    198       !StringToHex(build_id, event->build_id, arraysize(event->build_id))) {
    199     free(event);
    200     return NULL;
    201   }
    202 
    203   if (!filename.empty())
    204     CHECK_GT(snprintf(event->filename, new_len, "%s", filename.c_str()), 0);
    205 
    206   event->header.size = new_size;
    207   return event;
    208 }
    209 
    210 // Reads |size| bytes from |buffer| into |dest| and advances |src_offset|.
    211 bool ReadDataFromBuffer(const ConstBufferWithSize& buffer,
    212                         size_t size,
    213                         const string& value_name,
    214                         size_t* src_offset,
    215                         void* dest) {
    216   size_t end_offset = *src_offset + size / sizeof(*buffer.ptr);
    217   if (buffer.size < end_offset) {
    218     LOG(ERROR) << "Not enough bytes to read " << value_name
    219                << ". Requested " << size << " bytes";
    220     return false;
    221   }
    222   memcpy(dest, buffer.ptr + *src_offset, size);
    223   *src_offset = end_offset;
    224   return true;
    225 }
    226 
    227 // Reads a CStringWithLength from |buffer| into |dest|, and advances the offset.
    228 bool ReadStringFromBuffer(const ConstBufferWithSize& buffer,
    229                           bool is_cross_endian,
    230                           size_t* offset,
    231                           CStringWithLength* dest) {
    232   if (!ReadDataFromBuffer(buffer, sizeof(dest->len), "string length",
    233                           offset, &dest->len)) {
    234     return false;
    235   }
    236   if (is_cross_endian)
    237     ByteSwap(&dest->len);
    238 
    239   if (buffer.size < *offset + dest->len) {
    240     LOG(ERROR) << "Not enough bytes to read string";
    241     return false;
    242   }
    243   dest->str = string(buffer.ptr + *offset);
    244   *offset += dest->len / sizeof(*buffer.ptr);
    245   return true;
    246 }
    247 
    248 // Read read info from perf data.  Corresponds to sample format type
    249 // PERF_SAMPLE_READ.
    250 const uint64_t* ReadReadInfo(const uint64_t* array,
    251                            bool swap_bytes,
    252                            uint64_t read_format,
    253                            struct perf_sample* sample) {
    254   if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
    255     sample->read.time_enabled = *array++;
    256   if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
    257     sample->read.time_running = *array++;
    258   if (read_format & PERF_FORMAT_ID)
    259     sample->read.one.id = *array++;
    260 
    261   if (swap_bytes) {
    262     ByteSwap(&sample->read.time_enabled);
    263     ByteSwap(&sample->read.time_running);
    264     ByteSwap(&sample->read.one.id);
    265   }
    266 
    267   return array;
    268 }
    269 
    270 // Read call chain info from perf data.  Corresponds to sample format type
    271 // PERF_SAMPLE_CALLCHAIN.
    272 const uint64_t* ReadCallchain(const uint64_t* array,
    273                             bool swap_bytes,
    274                             struct perf_sample* sample) {
    275   // Make sure there is no existing allocated memory in |sample->callchain|.
    276   CHECK_EQ(static_cast<void*>(NULL), sample->callchain);
    277 
    278   // The callgraph data consists of a uint64_t value |nr| followed by |nr|
    279   // addresses.
    280   uint64_t callchain_size = *array++;
    281   if (swap_bytes)
    282     ByteSwap(&callchain_size);
    283   struct ip_callchain* callchain =
    284       reinterpret_cast<struct ip_callchain*>(new uint64_t[callchain_size + 1]);
    285   callchain->nr = callchain_size;
    286   for (size_t i = 0; i < callchain_size; ++i) {
    287     callchain->ips[i] = *array++;
    288     if (swap_bytes)
    289       ByteSwap(&callchain->ips[i]);
    290   }
    291   sample->callchain = callchain;
    292 
    293   return array;
    294 }
    295 
    296 // Read raw info from perf data.  Corresponds to sample format type
    297 // PERF_SAMPLE_RAW.
    298 const uint64_t* ReadRawData(const uint64_t* array,
    299                           bool swap_bytes,
    300                           struct perf_sample* sample) {
    301   // First read the size.
    302   const uint32_t* ptr = reinterpret_cast<const uint32_t*>(array);
    303   sample->raw_size = *ptr++;
    304   if (swap_bytes)
    305     ByteSwap(&sample->raw_size);
    306 
    307   // Allocate space for and read the raw data bytes.
    308   sample->raw_data = new uint8_t[sample->raw_size];
    309   memcpy(sample->raw_data, ptr, sample->raw_size);
    310 
    311   // Determine the bytes that were read, and align to the next 64 bits.
    312   int bytes_read = AlignSize(sizeof(sample->raw_size) + sample->raw_size,
    313                              sizeof(uint64_t));
    314   array += bytes_read / sizeof(uint64_t);
    315 
    316   return array;
    317 }
    318 
    319 // Read call chain info from perf data.  Corresponds to sample format type
    320 // PERF_SAMPLE_CALLCHAIN.
    321 const uint64_t* ReadBranchStack(const uint64_t* array,
    322                               bool swap_bytes,
    323                               struct perf_sample* sample) {
    324   // Make sure there is no existing allocated memory in
    325   // |sample->branch_stack|.
    326   CHECK_EQ(static_cast<void*>(NULL), sample->branch_stack);
    327 
    328   // The branch stack data consists of a uint64_t value |nr| followed by |nr|
    329   // branch_entry structs.
    330   uint64_t branch_stack_size = *array++;
    331   if (swap_bytes)
    332     ByteSwap(&branch_stack_size);
    333   struct branch_stack* branch_stack =
    334       reinterpret_cast<struct branch_stack*>(
    335           new uint8_t[sizeof(uint64_t) +
    336                     branch_stack_size * sizeof(struct branch_entry)]);
    337   branch_stack->nr = branch_stack_size;
    338   for (size_t i = 0; i < branch_stack_size; ++i) {
    339     memcpy(&branch_stack->entries[i], array, sizeof(struct branch_entry));
    340     array += sizeof(struct branch_entry) / sizeof(*array);
    341     if (swap_bytes) {
    342       ByteSwap(&branch_stack->entries[i].from);
    343       ByteSwap(&branch_stack->entries[i].to);
    344     }
    345   }
    346   sample->branch_stack = branch_stack;
    347 
    348   return array;
    349 }
    350 
    351 size_t ReadPerfSampleFromData(const perf_event_type event_type,
    352                               const uint64_t* array,
    353                               const uint64_t sample_fields,
    354                               const uint64_t read_format,
    355                               bool swap_bytes,
    356                               struct perf_sample* sample) {
    357   const uint64_t* initial_array_ptr = array;
    358 
    359   union {
    360     uint32_t val32[sizeof(uint64_t) / sizeof(uint32_t)];
    361     uint64_t val64;
    362   };
    363 
    364   // See structure for PERF_RECORD_SAMPLE in kernel/perf_event.h
    365   // and compare sample_id when sample_id_all is set.
    366 
    367   // NB: For sample_id, sample_fields has already been masked to the set
    368   // of fields in that struct by GetSampleFieldsForEventType. That set
    369   // of fields is mostly in the same order as PERF_RECORD_SAMPLE, with
    370   // the exception of PERF_SAMPLE_IDENTIFIER.
    371 
    372   // PERF_SAMPLE_IDENTIFIER is in a different location depending on
    373   // if this is a SAMPLE event or the sample_id of another event.
    374   if (event_type == PERF_RECORD_SAMPLE) {
    375     // { u64                   id;       } && PERF_SAMPLE_IDENTIFIER
    376     if (sample_fields & PERF_SAMPLE_IDENTIFIER) {
    377       sample->id = MaybeSwap(*array++, swap_bytes);
    378     }
    379   }
    380 
    381   // { u64                   ip;       } && PERF_SAMPLE_IP
    382   if (sample_fields & PERF_SAMPLE_IP) {
    383     sample->ip = MaybeSwap(*array++, swap_bytes);
    384   }
    385 
    386   // { u32                   pid, tid; } && PERF_SAMPLE_TID
    387   if (sample_fields & PERF_SAMPLE_TID) {
    388     val64 = *array++;
    389     sample->pid = MaybeSwap(val32[0], swap_bytes);
    390     sample->tid = MaybeSwap(val32[1], swap_bytes);
    391   }
    392 
    393   // { u64                   time;     } && PERF_SAMPLE_TIME
    394   if (sample_fields & PERF_SAMPLE_TIME) {
    395     sample->time = MaybeSwap(*array++, swap_bytes);
    396   }
    397 
    398   // { u64                   addr;     } && PERF_SAMPLE_ADDR
    399   if (sample_fields & PERF_SAMPLE_ADDR) {
    400     sample->addr = MaybeSwap(*array++, swap_bytes);
    401   }
    402 
    403   // { u64                   id;       } && PERF_SAMPLE_ID
    404   if (sample_fields & PERF_SAMPLE_ID) {
    405     sample->id = MaybeSwap(*array++, swap_bytes);
    406   }
    407 
    408   // { u64                   stream_id;} && PERF_SAMPLE_STREAM_ID
    409   if (sample_fields & PERF_SAMPLE_STREAM_ID) {
    410     sample->stream_id = MaybeSwap(*array++, swap_bytes);
    411   }
    412 
    413   // { u32                   cpu, res; } && PERF_SAMPLE_CPU
    414   if (sample_fields & PERF_SAMPLE_CPU) {
    415     val64 = *array++;
    416     sample->cpu = MaybeSwap(val32[0], swap_bytes);
    417     // sample->res = MaybeSwap(*val32[1], swap_bytes);  // not implemented?
    418   }
    419 
    420   // This is the location of PERF_SAMPLE_IDENTIFIER in struct sample_id.
    421   if (event_type != PERF_RECORD_SAMPLE) {
    422     // { u64                   id;       } && PERF_SAMPLE_IDENTIFIER
    423     if (sample_fields & PERF_SAMPLE_IDENTIFIER) {
    424       sample->id = MaybeSwap(*array++, swap_bytes);
    425     }
    426   }
    427 
    428   //
    429   // The remaining fields are only in PERF_RECORD_SAMPLE
    430   //
    431 
    432   // { u64                   period;   } && PERF_SAMPLE_PERIOD
    433   if (sample_fields & PERF_SAMPLE_PERIOD) {
    434     sample->period = MaybeSwap(*array++, swap_bytes);
    435   }
    436 
    437   // { struct read_format    values;   } && PERF_SAMPLE_READ
    438   if (sample_fields & PERF_SAMPLE_READ) {
    439     // TODO(cwp-team): support grouped read info.
    440     if (read_format & PERF_FORMAT_GROUP)
    441       return 0;
    442     array = ReadReadInfo(array, swap_bytes, read_format, sample);
    443   }
    444 
    445   // { u64                   nr,
    446   //   u64                   ips[nr];  } && PERF_SAMPLE_CALLCHAIN
    447   if (sample_fields & PERF_SAMPLE_CALLCHAIN) {
    448     array = ReadCallchain(array, swap_bytes, sample);
    449   }
    450 
    451   // { u32                   size;
    452   //   char                  data[size];}&& PERF_SAMPLE_RAW
    453   if (sample_fields & PERF_SAMPLE_RAW) {
    454     array = ReadRawData(array, swap_bytes, sample);
    455   }
    456 
    457   // { u64                   nr;
    458   //   { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
    459   if (sample_fields & PERF_SAMPLE_BRANCH_STACK) {
    460     array = ReadBranchStack(array, swap_bytes, sample);
    461   }
    462 
    463   static const u64 kUnimplementedSampleFields =
    464       PERF_SAMPLE_REGS_USER  |
    465       PERF_SAMPLE_STACK_USER |
    466       PERF_SAMPLE_WEIGHT     |
    467       PERF_SAMPLE_DATA_SRC   |
    468       PERF_SAMPLE_TRANSACTION;
    469 
    470   if (sample_fields & kUnimplementedSampleFields) {
    471     LOG(WARNING) << "Unimplemented sample fields 0x"
    472                  << std::hex << (sample_fields & kUnimplementedSampleFields);
    473   }
    474 
    475   if (sample_fields & ~(PERF_SAMPLE_MAX-1)) {
    476     LOG(WARNING) << "Unrecognized sample fields 0x"
    477                  << std::hex << (sample_fields & ~(PERF_SAMPLE_MAX-1));
    478   }
    479 
    480   return (array - initial_array_ptr) * sizeof(uint64_t);
    481 }
    482 
    483 size_t WritePerfSampleToData(const perf_event_type event_type,
    484                              const struct perf_sample& sample,
    485                              const uint64_t sample_fields,
    486                              const uint64_t read_format,
    487                              uint64_t* array) {
    488   const uint64_t* initial_array_ptr = array;
    489 
    490   union {
    491     uint32_t val32[sizeof(uint64_t) / sizeof(uint32_t)];
    492     uint64_t val64;
    493   };
    494 
    495   // See notes at the top of ReadPerfSampleFromData regarding the structure
    496   // of PERF_RECORD_SAMPLE, sample_id, and PERF_SAMPLE_IDENTIFIER, as they
    497   // all apply here as well.
    498 
    499   // PERF_SAMPLE_IDENTIFIER is in a different location depending on
    500   // if this is a SAMPLE event or the sample_id of another event.
    501   if (event_type == PERF_RECORD_SAMPLE) {
    502     // { u64                   id;       } && PERF_SAMPLE_IDENTIFIER
    503     if (sample_fields & PERF_SAMPLE_IDENTIFIER) {
    504       *array++ = sample.id;
    505     }
    506   }
    507 
    508   // { u64                   ip;       } && PERF_SAMPLE_IP
    509   if (sample_fields & PERF_SAMPLE_IP) {
    510     *array++ = sample.ip;
    511   }
    512 
    513   // { u32                   pid, tid; } && PERF_SAMPLE_TID
    514   if (sample_fields & PERF_SAMPLE_TID) {
    515     val32[0] = sample.pid;
    516     val32[1] = sample.tid;
    517     *array++ = val64;
    518   }
    519 
    520   // { u64                   time;     } && PERF_SAMPLE_TIME
    521   if (sample_fields & PERF_SAMPLE_TIME) {
    522     *array++ = sample.time;
    523   }
    524 
    525   // { u64                   addr;     } && PERF_SAMPLE_ADDR
    526   if (sample_fields & PERF_SAMPLE_ADDR) {
    527     *array++ = sample.addr;
    528   }
    529 
    530   // { u64                   id;       } && PERF_SAMPLE_ID
    531   if (sample_fields & PERF_SAMPLE_ID) {
    532     *array++ = sample.id;
    533   }
    534 
    535   // { u64                   stream_id;} && PERF_SAMPLE_STREAM_ID
    536   if (sample_fields & PERF_SAMPLE_STREAM_ID) {
    537     *array++ = sample.stream_id;
    538   }
    539 
    540   // { u32                   cpu, res; } && PERF_SAMPLE_CPU
    541   if (sample_fields & PERF_SAMPLE_CPU) {
    542     val32[0] = sample.cpu;
    543     // val32[1] = sample.res;  // not implemented?
    544     val32[1] = 0;
    545     *array++ = val64;
    546   }
    547 
    548   // This is the location of PERF_SAMPLE_IDENTIFIER in struct sample_id.
    549   if (event_type != PERF_RECORD_SAMPLE) {
    550     // { u64                   id;       } && PERF_SAMPLE_IDENTIFIER
    551     if (sample_fields & PERF_SAMPLE_IDENTIFIER) {
    552       *array++ = sample.id;
    553     }
    554   }
    555 
    556   //
    557   // The remaining fields are only in PERF_RECORD_SAMPLE
    558   //
    559 
    560   // { u64                   period;   } && PERF_SAMPLE_PERIOD
    561   if (sample_fields & PERF_SAMPLE_PERIOD) {
    562     *array++ = sample.period;
    563   }
    564 
    565   // { struct read_format    values;   } && PERF_SAMPLE_READ
    566   if (sample_fields & PERF_SAMPLE_READ) {
    567     // TODO(cwp-team): support grouped read info.
    568     if (read_format & PERF_FORMAT_GROUP)
    569       return 0;
    570     if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
    571       *array++ = sample.read.time_enabled;
    572     if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
    573       *array++ = sample.read.time_running;
    574     if (read_format & PERF_FORMAT_ID)
    575       *array++ = sample.read.one.id;
    576   }
    577 
    578   // { u64                   nr,
    579   //   u64                   ips[nr];  } && PERF_SAMPLE_CALLCHAIN
    580   if (sample_fields & PERF_SAMPLE_CALLCHAIN) {
    581     if (!sample.callchain) {
    582       LOG(ERROR) << "Expecting callchain data, but none was found.";
    583     } else {
    584       *array++ = sample.callchain->nr;
    585       for (size_t i = 0; i < sample.callchain->nr; ++i)
    586         *array++ = sample.callchain->ips[i];
    587     }
    588   }
    589 
    590   // { u32                   size;
    591   //   char                  data[size];}&& PERF_SAMPLE_RAW
    592   if (sample_fields & PERF_SAMPLE_RAW) {
    593     uint32_t* ptr = reinterpret_cast<uint32_t*>(array);
    594     *ptr++ = sample.raw_size;
    595     memcpy(ptr, sample.raw_data, sample.raw_size);
    596 
    597     // Update the data read pointer after aligning to the next 64 bytes.
    598     int num_bytes = AlignSize(sizeof(sample.raw_size) + sample.raw_size,
    599                               sizeof(uint64_t));
    600     array += num_bytes / sizeof(uint64_t);
    601   }
    602 
    603   // { u64                   nr;
    604   //   { u64 from, to, flags } lbr[nr];} && PERF_SAMPLE_BRANCH_STACK
    605   if (sample_fields & PERF_SAMPLE_BRANCH_STACK) {
    606     if (!sample.branch_stack) {
    607       LOG(ERROR) << "Expecting branch stack data, but none was found.";
    608     } else {
    609       *array++ = sample.branch_stack->nr;
    610       for (size_t i = 0; i < sample.branch_stack->nr; ++i) {
    611         *array++ = sample.branch_stack->entries[i].from;
    612         *array++ = sample.branch_stack->entries[i].to;
    613         memcpy(array++, &sample.branch_stack->entries[i].flags,
    614                sizeof(uint64_t));
    615       }
    616     }
    617   }
    618 
    619   return (array - initial_array_ptr) * sizeof(uint64_t);
    620 }
    621 
    622 }  // namespace
    623 
    624 PerfReader::~PerfReader() {
    625   // Free allocated memory.
    626   for (size_t i = 0; i < build_id_events_.size(); ++i)
    627     if (build_id_events_[i])
    628       free(build_id_events_[i]);
    629 }
    630 
    631 void PerfReader::PerfizeBuildIDString(string* build_id) {
    632   build_id->resize(kBuildIDStringLength, '0');
    633 }
    634 
    635 void PerfReader::UnperfizeBuildIDString(string* build_id) {
    636   const size_t kPaddingSize = 8;
    637   const string kBuildIDPadding = string(kPaddingSize, '0');
    638 
    639   // Remove kBuildIDPadding from the end of build_id until we cannot remove any
    640   // more, or removing more would cause the build id to be empty.
    641   while (build_id->size() > kPaddingSize &&
    642          build_id->substr(build_id->size() - kPaddingSize) == kBuildIDPadding) {
    643     build_id->resize(build_id->size() - kPaddingSize);
    644   }
    645 }
    646 
    647 bool PerfReader::ReadFile(const string& filename) {
    648   std::vector<char> data;
    649   if (!ReadFileToData(filename, &data))
    650     return false;
    651   return ReadFromVector(data);
    652 }
    653 
    654 bool PerfReader::ReadFromVector(const std::vector<char>& data) {
    655   return ReadFromPointer(&data[0], data.size());
    656 }
    657 
    658 bool PerfReader::ReadFromString(const string& str) {
    659   return ReadFromPointer(str.c_str(), str.size());
    660 }
    661 
    662 bool PerfReader::ReadFromPointer(const char* perf_data, size_t size) {
    663   const ConstBufferWithSize data = { perf_data, size };
    664 
    665   if (data.size == 0)
    666     return false;
    667   if (!ReadHeader(data))
    668     return false;
    669 
    670   // Check if it is normal perf data.
    671   if (header_.size == sizeof(header_)) {
    672     DLOG(INFO) << "Perf data is in normal format.";
    673     metadata_mask_ = header_.adds_features[0];
    674     return (ReadAttrs(data) && ReadEventTypes(data) && ReadData(data)
    675             && ReadMetadata(data));
    676   }
    677 
    678   // Otherwise it is piped data.
    679   LOG(ERROR) << "Internal error: no support for piped data";
    680   return false;
    681 }
    682 
    683 bool PerfReader::Localize(
    684     const std::map<string, string>& build_ids_to_filenames) {
    685   std::map<string, string> perfized_build_ids_to_filenames;
    686   std::map<string, string>::const_iterator it;
    687   for (it = build_ids_to_filenames.begin();
    688        it != build_ids_to_filenames.end();
    689        ++it) {
    690     string build_id = it->first;
    691     PerfizeBuildIDString(&build_id);
    692     perfized_build_ids_to_filenames[build_id] = it->second;
    693   }
    694 
    695   std::map<string, string> filename_map;
    696   for (size_t i = 0; i < build_id_events_.size(); ++i) {
    697     build_id_event* event = build_id_events_[i];
    698     string build_id = HexToString(event->build_id, kBuildIDArraySize);
    699     if (perfized_build_ids_to_filenames.find(build_id) ==
    700         perfized_build_ids_to_filenames.end()) {
    701       continue;
    702     }
    703 
    704     string new_name = perfized_build_ids_to_filenames.at(build_id);
    705     filename_map[string(event->filename)] = new_name;
    706     build_id_event* new_event = CreateOrUpdateBuildID("", new_name, 0, event);
    707     CHECK(new_event);
    708     build_id_events_[i] = new_event;
    709   }
    710 
    711   LocalizeUsingFilenames(filename_map);
    712   return true;
    713 }
    714 
    715 bool PerfReader::LocalizeUsingFilenames(
    716     const std::map<string, string>& filename_map) {
    717   LocalizeMMapFilenames(filename_map);
    718   for (size_t i = 0; i < build_id_events_.size(); ++i) {
    719     build_id_event* event = build_id_events_[i];
    720     string old_name = event->filename;
    721 
    722     if (filename_map.find(event->filename) != filename_map.end()) {
    723       const string& new_name = filename_map.at(old_name);
    724       build_id_event* new_event = CreateOrUpdateBuildID("", new_name, 0, event);
    725       CHECK(new_event);
    726       build_id_events_[i] = new_event;
    727     }
    728   }
    729   return true;
    730 }
    731 
    732 void PerfReader::GetFilenames(std::vector<string>* filenames) const {
    733   std::set<string> filename_set;
    734   GetFilenamesAsSet(&filename_set);
    735   filenames->clear();
    736   filenames->insert(filenames->begin(), filename_set.begin(),
    737                     filename_set.end());
    738 }
    739 
    740 void PerfReader::GetFilenamesAsSet(std::set<string>* filenames) const {
    741   filenames->clear();
    742   for (size_t i = 0; i < events_.size(); ++i) {
    743     const event_t& event = *events_[i];
    744     if (event.header.type == PERF_RECORD_MMAP)
    745       filenames->insert(event.mmap.filename);
    746     if (event.header.type == PERF_RECORD_MMAP2)
    747       filenames->insert(event.mmap2.filename);
    748   }
    749 }
    750 
    751 void PerfReader::GetFilenamesToBuildIDs(
    752     std::map<string, string>* filenames_to_build_ids) const {
    753   filenames_to_build_ids->clear();
    754   for (size_t i = 0; i < build_id_events_.size(); ++i) {
    755     const build_id_event& event = *build_id_events_[i];
    756     string build_id = HexToString(event.build_id, kBuildIDArraySize);
    757     (*filenames_to_build_ids)[event.filename] = build_id;
    758   }
    759 }
    760 
    761 bool PerfReader::IsSupportedEventType(uint32_t type) {
    762   switch (type) {
    763   case PERF_RECORD_SAMPLE:
    764   case PERF_RECORD_MMAP:
    765   case PERF_RECORD_MMAP2:
    766   case PERF_RECORD_FORK:
    767   case PERF_RECORD_EXIT:
    768   case PERF_RECORD_COMM:
    769   case PERF_RECORD_LOST:
    770   case PERF_RECORD_THROTTLE:
    771   case PERF_RECORD_UNTHROTTLE:
    772     return true;
    773   case PERF_RECORD_READ:
    774   case PERF_RECORD_MAX:
    775     return false;
    776   default:
    777     LOG(FATAL) << "Unknown event type " << type;
    778     return false;
    779   }
    780 }
    781 
    782 bool PerfReader::ReadPerfSampleInfo(const event_t& event,
    783                                     struct perf_sample* sample) const {
    784   CHECK(sample);
    785 
    786   if (!IsSupportedEventType(event.header.type)) {
    787     LOG(ERROR) << "Unsupported event type " << event.header.type;
    788     return false;
    789   }
    790 
    791   uint64_t sample_format = GetSampleFieldsForEventType(event.header.type,
    792                                                        sample_type_);
    793   uint64_t offset = GetPerfSampleDataOffset(event);
    794   size_t size_read = ReadPerfSampleFromData(
    795       static_cast<perf_event_type>(event.header.type),
    796       reinterpret_cast<const uint64_t*>(&event) + offset / sizeof(uint64_t),
    797       sample_format,
    798       read_format_,
    799       is_cross_endian_,
    800       sample);
    801 
    802   size_t expected_size = event.header.size - offset;
    803   if (size_read != expected_size) {
    804     LOG(ERROR) << "Read " << size_read << " bytes, expected "
    805                << expected_size << " bytes.";
    806   }
    807 
    808   return (size_read == expected_size);
    809 }
    810 
    811 bool PerfReader::WritePerfSampleInfo(const perf_sample& sample,
    812                                      event_t* event) const {
    813   CHECK(event);
    814 
    815   if (!IsSupportedEventType(event->header.type)) {
    816     LOG(ERROR) << "Unsupported event type " << event->header.type;
    817     return false;
    818   }
    819 
    820   uint64_t sample_format = GetSampleFieldsForEventType(event->header.type,
    821                                                        sample_type_);
    822   uint64_t offset = GetPerfSampleDataOffset(*event);
    823 
    824   size_t expected_size = event->header.size - offset;
    825   memset(reinterpret_cast<uint8_t*>(event) + offset, 0, expected_size);
    826   size_t size_written = WritePerfSampleToData(
    827       static_cast<perf_event_type>(event->header.type),
    828       sample,
    829       sample_format,
    830       read_format_,
    831       reinterpret_cast<uint64_t*>(event) + offset / sizeof(uint64_t));
    832   if (size_written != expected_size) {
    833     LOG(ERROR) << "Wrote " << size_written << " bytes, expected "
    834                << expected_size << " bytes.";
    835   }
    836 
    837   return (size_written == expected_size);
    838 }
    839 
    840 bool PerfReader::ReadHeader(const ConstBufferWithSize& data) {
    841   CheckNoEventHeaderPadding();
    842   size_t offset = 0;
    843   if (!ReadDataFromBuffer(data, sizeof(piped_header_), "header magic",
    844                           &offset, &piped_header_)) {
    845     return false;
    846   }
    847   if (piped_header_.magic != kPerfMagic &&
    848       piped_header_.magic != bswap_64(kPerfMagic)) {
    849     LOG(ERROR) << "Read wrong magic. Expected: 0x" << std::hex << kPerfMagic
    850                << " or 0x" << std::hex << bswap_64(kPerfMagic)
    851                << " Got: 0x" << std::hex << piped_header_.magic;
    852     return false;
    853   }
    854   is_cross_endian_ = (piped_header_.magic != kPerfMagic);
    855   if (is_cross_endian_)
    856     ByteSwap(&piped_header_.size);
    857 
    858   // Header can be a piped header.
    859   if (piped_header_.size == sizeof(piped_header_))
    860     return true;
    861 
    862   // Re-read full header
    863   offset = 0;
    864   if (!ReadDataFromBuffer(data, sizeof(header_), "header data",
    865                           &offset, &header_)) {
    866     return false;
    867   }
    868   if (is_cross_endian_)
    869     ByteSwap(&header_.size);
    870 
    871   DLOG(INFO) << "event_types.size: " << header_.event_types.size;
    872   DLOG(INFO) << "event_types.offset: " << header_.event_types.offset;
    873 
    874   return true;
    875 }
    876 
    877 bool PerfReader::ReadAttrs(const ConstBufferWithSize& data) {
    878   size_t num_attrs = header_.attrs.size / header_.attr_size;
    879   size_t offset = header_.attrs.offset;
    880   for (size_t i = 0; i < num_attrs; i++) {
    881     if (!ReadAttr(data, &offset))
    882       return false;
    883   }
    884   return true;
    885 }
    886 
    887 bool PerfReader::ReadAttr(const ConstBufferWithSize& data, size_t* offset) {
    888   PerfFileAttr attr;
    889   if (!ReadEventAttr(data, offset, &attr.attr))
    890     return false;
    891 
    892   perf_file_section ids;
    893   if (!ReadDataFromBuffer(data, sizeof(ids), "ID section info", offset, &ids))
    894     return false;
    895   if (is_cross_endian_) {
    896     ByteSwap(&ids.offset);
    897     ByteSwap(&ids.size);
    898   }
    899 
    900   size_t num_ids = ids.size / sizeof(decltype(attr.ids)::value_type);
    901   // Convert the offset from u64 to size_t.
    902   size_t ids_offset = ids.offset;
    903   if (!ReadUniqueIDs(data, num_ids, &ids_offset, &attr.ids))
    904     return false;
    905   attrs_.push_back(attr);
    906   return true;
    907 }
    908 
    909 u32 PerfReader::ReadPerfEventAttrSize(const ConstBufferWithSize& data,
    910                                       size_t attr_offset) {
    911   static_assert(std::is_same<decltype(perf_event_attr::size), u32>::value,
    912                 "ReadPerfEventAttrSize return type should match "
    913                 "perf_event_attr.size");
    914   u32 attr_size;
    915   size_t attr_size_offset = attr_offset + offsetof(perf_event_attr, size);
    916   if (!ReadDataFromBuffer(data, sizeof(perf_event_attr::size),
    917                           "attr.size", &attr_size_offset, &attr_size)) {
    918     return kuint32max;
    919   }
    920   return MaybeSwap(attr_size, is_cross_endian_);
    921 }
    922 
    923 bool PerfReader::ReadEventAttr(const ConstBufferWithSize& data, size_t* offset,
    924                                perf_event_attr* attr) {
    925   CheckNoPerfEventAttrPadding();
    926 
    927   std::memset(attr, 0, sizeof(*attr));
    928   //*attr = {0};
    929 
    930   // read just size first
    931   u32 attr_size = ReadPerfEventAttrSize(data, *offset);
    932   if (attr_size == kuint32max) {
    933     return false;
    934   }
    935 
    936   // now read the the struct.
    937   if (!ReadDataFromBuffer(data, attr_size, "attribute", offset,
    938                           reinterpret_cast<char*>(attr))) {
    939     return false;
    940   }
    941 
    942   if (is_cross_endian_) {
    943     // Depending on attr->size, some of these might not have actually been
    944     // read. This is okay: they are zero.
    945     ByteSwap(&attr->type);
    946     ByteSwap(&attr->size);
    947     ByteSwap(&attr->config);
    948     ByteSwap(&attr->sample_period);
    949     ByteSwap(&attr->sample_type);
    950     ByteSwap(&attr->read_format);
    951 
    952     // NB: This will also reverse precise_ip : 2 as if it was two fields:
    953     auto *const bitfield_start = &attr->read_format + 1;
    954     SwapBitfieldOfBits(reinterpret_cast<u8*>(bitfield_start),
    955                        sizeof(u64));
    956     // ... So swap it back:
    957     const auto tmp = attr->precise_ip;
    958     attr->precise_ip = (tmp & 0x2) >> 1 | (tmp & 0x1) << 1;
    959 
    960     ByteSwap(&attr->wakeup_events);  // union with wakeup_watermark
    961     ByteSwap(&attr->bp_type);
    962     ByteSwap(&attr->bp_addr);        // union with config1
    963     ByteSwap(&attr->bp_len);         // union with config2
    964     ByteSwap(&attr->branch_sample_type);
    965     ByteSwap(&attr->sample_regs_user);
    966     ByteSwap(&attr->sample_stack_user);
    967   }
    968 
    969   CHECK_EQ(attr_size, attr->size);
    970   // The actual perf_event_attr data size might be different from the size of
    971   // the struct definition.  Check against perf_event_attr's |size| field.
    972   attr->size = sizeof(*attr);
    973 
    974   // Assign sample type if it hasn't been assigned, otherwise make sure all
    975   // subsequent attributes have the same sample type bits set.
    976   if (sample_type_ == 0) {
    977     sample_type_ = attr->sample_type;
    978   } else {
    979     CHECK_EQ(sample_type_, attr->sample_type)
    980         << "Event type sample format does not match sample format of other "
    981         << "event type.";
    982   }
    983 
    984   if (read_format_ == 0) {
    985     read_format_ = attr->read_format;
    986   } else {
    987     CHECK_EQ(read_format_, attr->read_format)
    988         << "Event type read format does not match read format of other event "
    989         << "types.";
    990   }
    991 
    992   return true;
    993 }
    994 
    995 bool PerfReader::ReadUniqueIDs(const ConstBufferWithSize& data, size_t num_ids,
    996                                size_t* offset, std::vector<u64>* ids) {
    997   ids->resize(num_ids);
    998   for (size_t j = 0; j < num_ids; j++) {
    999     if (!ReadDataFromBuffer(data, sizeof(ids->at(j)), "ID", offset,
   1000                             &ids->at(j))) {
   1001       return false;
   1002     }
   1003     if (is_cross_endian_)
   1004       ByteSwap(&ids->at(j));
   1005   }
   1006   return true;
   1007 }
   1008 
   1009 bool PerfReader::ReadEventTypes(const ConstBufferWithSize& data) {
   1010   size_t num_event_types = header_.event_types.size /
   1011       sizeof(struct perf_trace_event_type);
   1012   CHECK_EQ(sizeof(perf_trace_event_type) * num_event_types,
   1013            header_.event_types.size);
   1014   size_t offset = header_.event_types.offset;
   1015   for (size_t i = 0; i < num_event_types; ++i) {
   1016     if (!ReadEventType(data, &offset))
   1017       return false;
   1018   }
   1019   return true;
   1020 }
   1021 
   1022 bool PerfReader::ReadEventType(const ConstBufferWithSize& data,
   1023                                size_t* offset) {
   1024   CheckNoEventTypePadding();
   1025   perf_trace_event_type type;
   1026   memset(&type, 0, sizeof(type));
   1027   if (!ReadDataFromBuffer(data, sizeof(type.event_id), "event id",
   1028                           offset, &type.event_id)) {
   1029     return false;
   1030   }
   1031   const char* event_name = reinterpret_cast<const char*>(data.ptr + *offset);
   1032   CHECK_GT(snprintf(type.name, sizeof(type.name), "%s", event_name), 0);
   1033   *offset += sizeof(type.name);
   1034   event_types_.push_back(type);
   1035   return true;
   1036 }
   1037 
   1038 bool PerfReader::ReadData(const ConstBufferWithSize& data) {
   1039   u64 data_remaining_bytes = header_.data.size;
   1040   size_t offset = header_.data.offset;
   1041   while (data_remaining_bytes != 0) {
   1042     if (data.size < offset) {
   1043       LOG(ERROR) << "Not enough data to read a perf event.";
   1044       return false;
   1045     }
   1046 
   1047     const event_t* event = reinterpret_cast<const event_t*>(data.ptr + offset);
   1048     if (!ReadPerfEventBlock(*event))
   1049       return false;
   1050     data_remaining_bytes -= event->header.size;
   1051     offset += event->header.size;
   1052   }
   1053 
   1054   DLOG(INFO) << "Number of events stored: "<< events_.size();
   1055   return true;
   1056 }
   1057 
   1058 bool PerfReader::ReadMetadata(const ConstBufferWithSize& data) {
   1059   size_t offset = header_.data.offset + header_.data.size;
   1060 
   1061   for (u32 type = HEADER_FIRST_FEATURE; type != HEADER_LAST_FEATURE; ++type) {
   1062     if ((metadata_mask_ & (1 << type)) == 0)
   1063       continue;
   1064 
   1065     if (data.size < offset) {
   1066       LOG(ERROR) << "Not enough data to read offset and size of metadata.";
   1067       return false;
   1068     }
   1069 
   1070     u64 metadata_offset, metadata_size;
   1071     if (!ReadDataFromBuffer(data, sizeof(metadata_offset), "metadata offset",
   1072                             &offset, &metadata_offset) ||
   1073         !ReadDataFromBuffer(data, sizeof(metadata_size), "metadata size",
   1074                             &offset, &metadata_size)) {
   1075       return false;
   1076     }
   1077 
   1078     if (data.size < metadata_offset + metadata_size) {
   1079       LOG(ERROR) << "Not enough data to read metadata.";
   1080       return false;
   1081     }
   1082 
   1083     switch (type) {
   1084     case HEADER_TRACING_DATA:
   1085       if (!ReadTracingMetadata(data, metadata_offset, metadata_size)) {
   1086         return false;
   1087       }
   1088       break;
   1089     case HEADER_BUILD_ID:
   1090       if (!ReadBuildIDMetadata(data, type, metadata_offset, metadata_size))
   1091         return false;
   1092       break;
   1093     case HEADER_HOSTNAME:
   1094     case HEADER_OSRELEASE:
   1095     case HEADER_VERSION:
   1096     case HEADER_ARCH:
   1097     case HEADER_CPUDESC:
   1098     case HEADER_CPUID:
   1099     case HEADER_CMDLINE:
   1100       if (!ReadStringMetadata(data, type, metadata_offset, metadata_size))
   1101         return false;
   1102       break;
   1103     case HEADER_NRCPUS:
   1104       if (!ReadUint32Metadata(data, type, metadata_offset, metadata_size))
   1105         return false;
   1106       break;
   1107     case HEADER_TOTAL_MEM:
   1108       if (!ReadUint64Metadata(data, type, metadata_offset, metadata_size))
   1109         return false;
   1110       break;
   1111     case HEADER_EVENT_DESC:
   1112       break;
   1113     case HEADER_CPU_TOPOLOGY:
   1114       if (!ReadCPUTopologyMetadata(data, type, metadata_offset, metadata_size))
   1115         return false;
   1116       break;
   1117     case HEADER_NUMA_TOPOLOGY:
   1118       if (!ReadNUMATopologyMetadata(data, type, metadata_offset, metadata_size))
   1119         return false;
   1120       break;
   1121     case HEADER_PMU_MAPPINGS:
   1122       // ignore for now
   1123       continue;
   1124       break;
   1125     case HEADER_BRANCH_STACK:
   1126       continue;
   1127     default: LOG(INFO) << "Unsupported metadata type: " << type;
   1128       break;
   1129     }
   1130   }
   1131 
   1132   // Event type events are optional in some newer versions of perf. They
   1133   // contain the same information that is already in |attrs_|. Make sure the
   1134   // number of event types matches the number of attrs, but only if there are
   1135   // event type events present.
   1136   if (event_types_.size() > 0) {
   1137     if (event_types_.size() != attrs_.size()) {
   1138       LOG(ERROR) << "Mismatch between number of event type events and attr "
   1139                  << "events: " << event_types_.size() << " vs "
   1140                  << attrs_.size();
   1141       return false;
   1142     }
   1143     metadata_mask_ |= (1 << HEADER_EVENT_DESC);
   1144   }
   1145   return true;
   1146 }
   1147 
   1148 bool PerfReader::ReadBuildIDMetadata(const ConstBufferWithSize& data, u32 /*type*/,
   1149                                      size_t offset, size_t size) {
   1150   CheckNoBuildIDEventPadding();
   1151   while (size > 0) {
   1152     // Make sure there is enough data for everything but the filename.
   1153     if (data.size < offset + sizeof(build_id_event) / sizeof(*data.ptr)) {
   1154       LOG(ERROR) << "Not enough bytes to read build id event";
   1155       return false;
   1156     }
   1157 
   1158     const build_id_event* temp_ptr =
   1159         reinterpret_cast<const build_id_event*>(data.ptr + offset);
   1160     u16 event_size = temp_ptr->header.size;
   1161     if (is_cross_endian_)
   1162       ByteSwap(&event_size);
   1163 
   1164     // Make sure there is enough data for the rest of the event.
   1165     if (data.size < offset + event_size / sizeof(*data.ptr)) {
   1166       LOG(ERROR) << "Not enough bytes to read build id event";
   1167       return false;
   1168     }
   1169 
   1170     // Allocate memory for the event and copy over the bytes.
   1171     build_id_event* event = CallocMemoryForBuildID(event_size);
   1172     if (!ReadDataFromBuffer(data, event_size, "build id event",
   1173                             &offset, event)) {
   1174       return false;
   1175     }
   1176     if (is_cross_endian_) {
   1177       ByteSwap(&event->header.type);
   1178       ByteSwap(&event->header.misc);
   1179       ByteSwap(&event->header.size);
   1180       ByteSwap(&event->pid);
   1181     }
   1182     size -= event_size;
   1183 
   1184     // Perf tends to use more space than necessary, so fix the size.
   1185     event->header.size =
   1186         sizeof(*event) + GetUint64AlignedStringLength(event->filename);
   1187     build_id_events_.push_back(event);
   1188   }
   1189 
   1190   return true;
   1191 }
   1192 
   1193 bool PerfReader::ReadStringMetadata(const ConstBufferWithSize& data, u32 type,
   1194                                     size_t offset, size_t size) {
   1195   PerfStringMetadata str_data;
   1196   str_data.type = type;
   1197 
   1198   size_t start_offset = offset;
   1199   // Skip the number of string data if it is present.
   1200   if (NeedsNumberOfStringData(type))
   1201     offset += sizeof(num_string_data_type) / sizeof(*data.ptr);
   1202 
   1203   while ((offset - start_offset) < size) {
   1204     CStringWithLength single_string;
   1205     if (!ReadStringFromBuffer(data, is_cross_endian_, &offset, &single_string))
   1206       return false;
   1207     str_data.data.push_back(single_string);
   1208   }
   1209 
   1210   string_metadata_.push_back(str_data);
   1211   return true;
   1212 }
   1213 
   1214 bool PerfReader::ReadUint32Metadata(const ConstBufferWithSize& data, u32 type,
   1215                                     size_t offset, size_t size) {
   1216   PerfUint32Metadata uint32_data;
   1217   uint32_data.type = type;
   1218 
   1219   size_t start_offset = offset;
   1220   while (size > offset - start_offset) {
   1221     uint32_t item;
   1222     if (!ReadDataFromBuffer(data, sizeof(item), "uint32_t data", &offset,
   1223                             &item))
   1224       return false;
   1225 
   1226     if (is_cross_endian_)
   1227       ByteSwap(&item);
   1228 
   1229     uint32_data.data.push_back(item);
   1230   }
   1231 
   1232   uint32_metadata_.push_back(uint32_data);
   1233   return true;
   1234 }
   1235 
   1236 bool PerfReader::ReadUint64Metadata(const ConstBufferWithSize& data, u32 type,
   1237                                     size_t offset, size_t size) {
   1238   PerfUint64Metadata uint64_data;
   1239   uint64_data.type = type;
   1240 
   1241   size_t start_offset = offset;
   1242   while (size > offset - start_offset) {
   1243     uint64_t item;
   1244     if (!ReadDataFromBuffer(data, sizeof(item), "uint64_t data", &offset,
   1245                             &item))
   1246       return false;
   1247 
   1248     if (is_cross_endian_)
   1249       ByteSwap(&item);
   1250 
   1251     uint64_data.data.push_back(item);
   1252   }
   1253 
   1254   uint64_metadata_.push_back(uint64_data);
   1255   return true;
   1256 }
   1257 
   1258 bool PerfReader::ReadCPUTopologyMetadata(
   1259     const ConstBufferWithSize& data, u32 /*type*/, size_t offset, size_t /*size*/) {
   1260   num_siblings_type num_core_siblings;
   1261   if (!ReadDataFromBuffer(data, sizeof(num_core_siblings), "num cores",
   1262                           &offset, &num_core_siblings)) {
   1263     return false;
   1264   }
   1265   if (is_cross_endian_)
   1266     ByteSwap(&num_core_siblings);
   1267 
   1268   cpu_topology_.core_siblings.resize(num_core_siblings);
   1269   for (size_t i = 0; i < num_core_siblings; ++i) {
   1270     if (!ReadStringFromBuffer(data, is_cross_endian_, &offset,
   1271                               &cpu_topology_.core_siblings[i])) {
   1272       return false;
   1273     }
   1274   }
   1275 
   1276   num_siblings_type num_thread_siblings;
   1277   if (!ReadDataFromBuffer(data, sizeof(num_thread_siblings), "num threads",
   1278                           &offset, &num_thread_siblings)) {
   1279     return false;
   1280   }
   1281   if (is_cross_endian_)
   1282     ByteSwap(&num_thread_siblings);
   1283 
   1284   cpu_topology_.thread_siblings.resize(num_thread_siblings);
   1285   for (size_t i = 0; i < num_thread_siblings; ++i) {
   1286     if (!ReadStringFromBuffer(data, is_cross_endian_, &offset,
   1287                               &cpu_topology_.thread_siblings[i])) {
   1288       return false;
   1289     }
   1290   }
   1291 
   1292   return true;
   1293 }
   1294 
   1295 bool PerfReader::ReadNUMATopologyMetadata(
   1296     const ConstBufferWithSize& data, u32 /*type*/, size_t offset, size_t /*size*/) {
   1297   numa_topology_num_nodes_type num_nodes;
   1298   if (!ReadDataFromBuffer(data, sizeof(num_nodes), "num nodes",
   1299                           &offset, &num_nodes)) {
   1300     return false;
   1301   }
   1302   if (is_cross_endian_)
   1303     ByteSwap(&num_nodes);
   1304 
   1305   for (size_t i = 0; i < num_nodes; ++i) {
   1306     PerfNodeTopologyMetadata node;
   1307     if (!ReadDataFromBuffer(data, sizeof(node.id), "node id",
   1308                             &offset, &node.id) ||
   1309         !ReadDataFromBuffer(data, sizeof(node.total_memory),
   1310                             "node total memory", &offset,
   1311                             &node.total_memory) ||
   1312         !ReadDataFromBuffer(data, sizeof(node.free_memory),
   1313                             "node free memory", &offset, &node.free_memory) ||
   1314         !ReadStringFromBuffer(data, is_cross_endian_, &offset,
   1315                               &node.cpu_list)) {
   1316       return false;
   1317     }
   1318     if (is_cross_endian_) {
   1319       ByteSwap(&node.id);
   1320       ByteSwap(&node.total_memory);
   1321       ByteSwap(&node.free_memory);
   1322     }
   1323     numa_topology_.push_back(node);
   1324   }
   1325   return true;
   1326 }
   1327 
   1328 bool PerfReader::ReadTracingMetadata(
   1329     const ConstBufferWithSize& data, size_t offset, size_t size) {
   1330   size_t tracing_data_offset = offset;
   1331   tracing_data_.resize(size);
   1332   return ReadDataFromBuffer(data, tracing_data_.size(), "tracing_data",
   1333                             &tracing_data_offset, tracing_data_.data());
   1334 }
   1335 
   1336 bool PerfReader::ReadTracingMetadataEvent(
   1337     const ConstBufferWithSize& data, size_t offset) {
   1338   // TRACING_DATA's header.size is a lie. It is the size of only the event
   1339   // struct. The size of the data is in the event struct, and followed
   1340   // immediately by the tracing header data.
   1341 
   1342   // Make a copy of the event (but not the tracing data)
   1343   tracing_data_event tracing_event =
   1344       *reinterpret_cast<const tracing_data_event*>(data.ptr + offset);
   1345 
   1346   if (is_cross_endian_) {
   1347     ByteSwap(&tracing_event.header.type);
   1348     ByteSwap(&tracing_event.header.misc);
   1349     ByteSwap(&tracing_event.header.size);
   1350     ByteSwap(&tracing_event.size);
   1351   }
   1352 
   1353   return ReadTracingMetadata(data, offset + tracing_event.header.size,
   1354                              tracing_event.size);
   1355 }
   1356 
   1357 bool PerfReader::ReadAttrEventBlock(const ConstBufferWithSize& data,
   1358                                     size_t offset, size_t size) {
   1359   const size_t initial_offset = offset;
   1360   PerfFileAttr attr;
   1361   if (!ReadEventAttr(data, &offset, &attr.attr))
   1362     return false;
   1363 
   1364   // attr.attr.size has been upgraded to the current size of perf_event_attr.
   1365   const size_t actual_attr_size = offset - initial_offset;
   1366 
   1367   const size_t num_ids =
   1368       (size - actual_attr_size) / sizeof(decltype(attr.ids)::value_type);
   1369   if (!ReadUniqueIDs(data, num_ids, &offset, &attr.ids))
   1370     return false;
   1371 
   1372   // Event types are found many times in the perf data file.
   1373   // Only add this event type if it is not already present.
   1374   for (size_t i = 0; i < attrs_.size(); ++i) {
   1375     if (attrs_[i].ids[0] == attr.ids[0])
   1376       return true;
   1377   }
   1378   attrs_.push_back(attr);
   1379   return true;
   1380 }
   1381 
   1382 // When this method is called, |event| is a reference to the bytes in the data
   1383 // vector that contains the entire perf.data file.  As a result, we need to be
   1384 // careful to only copy event.header.size bytes.
   1385 // In particular, something like
   1386 // event_t event_copy = event;
   1387 // would be bad, because it would read past the end of the event, and possibly
   1388 // pass the end of the data vector as well.
   1389 bool PerfReader::ReadPerfEventBlock(const event_t& event) {
   1390   u16 size = event.header.size;
   1391   if (is_cross_endian_)
   1392     ByteSwap(&size);
   1393 
   1394   if (size > sizeof(event_t)) {
   1395     LOG(INFO) << "Data size: " << size << " sizeof(event_t): "
   1396               << sizeof(event_t);
   1397     return false;
   1398   }
   1399 
   1400   // Copy only the part of the event that is needed.
   1401   malloced_unique_ptr<event_t> event_copy(CallocMemoryForEvent(size));
   1402   memcpy(event_copy.get(), &event, size);
   1403   if (is_cross_endian_) {
   1404     ByteSwap(&event_copy->header.type);
   1405     ByteSwap(&event_copy->header.misc);
   1406     ByteSwap(&event_copy->header.size);
   1407   }
   1408 
   1409   uint32_t type = event_copy->header.type;
   1410   if (is_cross_endian_) {
   1411     switch (type) {
   1412     case PERF_RECORD_SAMPLE:
   1413       break;
   1414     case PERF_RECORD_MMAP:
   1415       ByteSwap(&event_copy->mmap.pid);
   1416       ByteSwap(&event_copy->mmap.tid);
   1417       ByteSwap(&event_copy->mmap.start);
   1418       ByteSwap(&event_copy->mmap.len);
   1419       ByteSwap(&event_copy->mmap.pgoff);
   1420       break;
   1421     case PERF_RECORD_MMAP2:
   1422       ByteSwap(&event_copy->mmap2.pid);
   1423       ByteSwap(&event_copy->mmap2.tid);
   1424       ByteSwap(&event_copy->mmap2.start);
   1425       ByteSwap(&event_copy->mmap2.len);
   1426       ByteSwap(&event_copy->mmap2.pgoff);
   1427       ByteSwap(&event_copy->mmap2.maj);
   1428       ByteSwap(&event_copy->mmap2.min);
   1429       ByteSwap(&event_copy->mmap2.ino);
   1430       ByteSwap(&event_copy->mmap2.ino_generation);
   1431       break;
   1432     case PERF_RECORD_FORK:
   1433     case PERF_RECORD_EXIT:
   1434       ByteSwap(&event_copy->fork.pid);
   1435       ByteSwap(&event_copy->fork.tid);
   1436       ByteSwap(&event_copy->fork.ppid);
   1437       ByteSwap(&event_copy->fork.ptid);
   1438       break;
   1439     case PERF_RECORD_COMM:
   1440       ByteSwap(&event_copy->comm.pid);
   1441       ByteSwap(&event_copy->comm.tid);
   1442       break;
   1443     case PERF_RECORD_LOST:
   1444       ByteSwap(&event_copy->lost.id);
   1445       ByteSwap(&event_copy->lost.lost);
   1446       break;
   1447     case PERF_RECORD_READ:
   1448       ByteSwap(&event_copy->read.pid);
   1449       ByteSwap(&event_copy->read.tid);
   1450       ByteSwap(&event_copy->read.value);
   1451       ByteSwap(&event_copy->read.time_enabled);
   1452       ByteSwap(&event_copy->read.time_running);
   1453       ByteSwap(&event_copy->read.id);
   1454       break;
   1455     default:
   1456       LOG(FATAL) << "Unknown event type: " << type;
   1457     }
   1458   }
   1459 
   1460   events_.push_back(std::move(event_copy));
   1461 
   1462   return true;
   1463 }
   1464 
   1465 size_t PerfReader::GetNumMetadata() const {
   1466   // This is just the number of 1s in the binary representation of the metadata
   1467   // mask.  However, make sure to only use supported metadata, and don't include
   1468   // branch stack (since it doesn't have an entry in the metadata section).
   1469   uint64_t new_mask = metadata_mask_;
   1470   new_mask &= kSupportedMetadataMask & ~(1 << HEADER_BRANCH_STACK);
   1471   std::bitset<sizeof(new_mask) * CHAR_BIT> bits(new_mask);
   1472   return bits.count();
   1473 }
   1474 
   1475 size_t PerfReader::GetEventDescMetadataSize() const {
   1476   size_t size = 0;
   1477   if (event_types_.empty()) {
   1478     return size;
   1479   }
   1480   if (metadata_mask_ & (1 << HEADER_EVENT_DESC)) {
   1481     if (event_types_.size() > 0 && event_types_.size() != attrs_.size()) {
   1482       LOG(ERROR) << "Mismatch between number of event type events and attr "
   1483                  << "events: " << event_types_.size() << " vs "
   1484                  << attrs_.size();
   1485       return size;
   1486     }
   1487     size += sizeof(event_desc_num_events) + sizeof(event_desc_attr_size);
   1488     CStringWithLength dummy;
   1489     for (size_t i = 0; i < attrs_.size(); ++i) {
   1490       size += sizeof(perf_event_attr) + sizeof(dummy.len);
   1491       size += sizeof(event_desc_num_unique_ids);
   1492       size += GetUint64AlignedStringLength(event_types_[i].name) * sizeof(char);
   1493       size += attrs_[i].ids.size() * sizeof(attrs_[i].ids[0]);
   1494     }
   1495   }
   1496   return size;
   1497 }
   1498 
   1499 size_t PerfReader::GetBuildIDMetadataSize() const {
   1500   size_t size = 0;
   1501   for (size_t i = 0; i < build_id_events_.size(); ++i)
   1502     size += build_id_events_[i]->header.size;
   1503   return size;
   1504 }
   1505 
   1506 size_t PerfReader::GetStringMetadataSize() const {
   1507   size_t size = 0;
   1508   for (size_t i = 0; i < string_metadata_.size(); ++i) {
   1509     const PerfStringMetadata& metadata = string_metadata_[i];
   1510     if (NeedsNumberOfStringData(metadata.type))
   1511       size += sizeof(num_string_data_type);
   1512 
   1513     for (size_t j = 0; j < metadata.data.size(); ++j) {
   1514       const CStringWithLength& str = metadata.data[j];
   1515       size += sizeof(str.len) + (str.len * sizeof(char));
   1516     }
   1517   }
   1518   return size;
   1519 }
   1520 
   1521 size_t PerfReader::GetUint32MetadataSize() const {
   1522   size_t size = 0;
   1523   for (size_t i = 0; i < uint32_metadata_.size(); ++i) {
   1524     const PerfUint32Metadata& metadata = uint32_metadata_[i];
   1525     size += metadata.data.size() * sizeof(metadata.data[0]);
   1526   }
   1527   return size;
   1528 }
   1529 
   1530 size_t PerfReader::GetUint64MetadataSize() const {
   1531   size_t size = 0;
   1532   for (size_t i = 0; i < uint64_metadata_.size(); ++i) {
   1533     const PerfUint64Metadata& metadata = uint64_metadata_[i];
   1534     size += metadata.data.size() * sizeof(metadata.data[0]);
   1535   }
   1536   return size;
   1537 }
   1538 
   1539 size_t PerfReader::GetCPUTopologyMetadataSize() const {
   1540   // Core siblings.
   1541   size_t size = sizeof(num_siblings_type);
   1542   for (size_t i = 0; i < cpu_topology_.core_siblings.size(); ++i) {
   1543     const CStringWithLength& str = cpu_topology_.core_siblings[i];
   1544     size += sizeof(str.len) + (str.len * sizeof(char));
   1545   }
   1546 
   1547   // Thread siblings.
   1548   size += sizeof(num_siblings_type);
   1549   for (size_t i = 0; i < cpu_topology_.thread_siblings.size(); ++i) {
   1550     const CStringWithLength& str = cpu_topology_.thread_siblings[i];
   1551     size += sizeof(str.len) + (str.len * sizeof(char));
   1552   }
   1553 
   1554   return size;
   1555 }
   1556 
   1557 size_t PerfReader::GetNUMATopologyMetadataSize() const {
   1558   size_t size = sizeof(numa_topology_num_nodes_type);
   1559   for (size_t i = 0; i < numa_topology_.size(); ++i) {
   1560     const PerfNodeTopologyMetadata& node = numa_topology_[i];
   1561     size += sizeof(node.id);
   1562     size += sizeof(node.total_memory) + sizeof(node.free_memory);
   1563     size += sizeof(node.cpu_list.len) + node.cpu_list.len * sizeof(char);
   1564   }
   1565   return size;
   1566 }
   1567 
   1568 bool PerfReader::NeedsNumberOfStringData(u32 type) const {
   1569   return type == HEADER_CMDLINE;
   1570 }
   1571 
   1572 bool PerfReader::LocalizeMMapFilenames(
   1573     const std::map<string, string>& filename_map) {
   1574   // Search for mmap/mmap2 events for which the filename needs to be updated.
   1575   for (size_t i = 0; i < events_.size(); ++i) {
   1576     string filename;
   1577     size_t size_of_fixed_event_parts;
   1578     event_t* event = events_[i].get();
   1579     if (event->header.type == PERF_RECORD_MMAP) {
   1580       filename = string(event->mmap.filename);
   1581       size_of_fixed_event_parts =
   1582           sizeof(event->mmap) - sizeof(event->mmap.filename);
   1583     } else if (event->header.type == PERF_RECORD_MMAP2) {
   1584       filename = string(event->mmap2.filename);
   1585       size_of_fixed_event_parts =
   1586           sizeof(event->mmap2) - sizeof(event->mmap2.filename);
   1587     } else {
   1588       continue;
   1589     }
   1590 
   1591     const auto it = filename_map.find(filename);
   1592     if (it == filename_map.end())  // not found
   1593       continue;
   1594 
   1595     const string& new_filename = it->second;
   1596     size_t old_len = GetUint64AlignedStringLength(filename);
   1597     size_t new_len = GetUint64AlignedStringLength(new_filename);
   1598     size_t old_offset = GetPerfSampleDataOffset(*event);
   1599     size_t sample_size = event->header.size - old_offset;
   1600 
   1601     int size_change = new_len - old_len;
   1602     size_t new_size = event->header.size + size_change;
   1603     size_t new_offset = old_offset + size_change;
   1604 
   1605     if (size_change > 0) {
   1606       // Allocate memory for a new event.
   1607       event_t* old_event = event;
   1608       malloced_unique_ptr<event_t> new_event(CallocMemoryForEvent(new_size));
   1609 
   1610       // Copy over everything except filename and sample info.
   1611       memcpy(new_event.get(), old_event, size_of_fixed_event_parts);
   1612 
   1613       // Copy over the sample info to the correct location.
   1614       char* old_addr = reinterpret_cast<char*>(old_event);
   1615       char* new_addr = reinterpret_cast<char*>(new_event.get());
   1616       memcpy(new_addr + new_offset, old_addr + old_offset, sample_size);
   1617 
   1618       events_[i] = std::move(new_event);
   1619       event = events_[i].get();
   1620     } else if (size_change < 0) {
   1621       // Move the perf sample data to its new location.
   1622       // Since source and dest could overlap, use memmove instead of memcpy.
   1623       char* start_addr = reinterpret_cast<char*>(event);
   1624       memmove(start_addr + new_offset, start_addr + old_offset, sample_size);
   1625     }
   1626 
   1627     // Copy over the new filename and fix the size of the event.
   1628     char *event_filename = nullptr;
   1629     if (event->header.type == PERF_RECORD_MMAP) {
   1630       event_filename = event->mmap.filename;
   1631     } else if (event->header.type == PERF_RECORD_MMAP2) {
   1632       event_filename = event->mmap2.filename;
   1633     } else {
   1634       LOG(FATAL) << "Unexpected event type";  // Impossible
   1635     }
   1636     CHECK_GT(snprintf(event_filename, new_filename.size() + 1, "%s",
   1637                       new_filename.c_str()),
   1638              0);
   1639     event->header.size = new_size;
   1640   }
   1641 
   1642   return true;
   1643 }
   1644 
   1645 }  // namespace quipper
   1646