Home | History | Annotate | Download | only in cc
      1 /*
      2  * Copyright (c) 2015 PLUMgrid, Inc.
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  * http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <inttypes.h>
     18 #include <poll.h>
     19 #include <stdio.h>
     20 #include <stdint.h>
     21 #include <stdlib.h>
     22 #include <string.h>
     23 #include <syscall.h>
     24 #include <sys/ioctl.h>
     25 #include <sys/mman.h>
     26 #include <sys/types.h>
     27 #include <unistd.h>
     28 #include <linux/perf_event.h>
     29 
     30 #include "libbpf.h"
     31 #include "perf_reader.h"
     32 
     33 enum {
     34   RB_NOT_USED = 0, // ring buffer not usd
     35   RB_USED_IN_MUNMAP = 1, // used in munmap
     36   RB_USED_IN_READ = 2, // used in read
     37 };
     38 
     39 struct perf_reader {
     40   perf_reader_raw_cb raw_cb;
     41   perf_reader_lost_cb lost_cb;
     42   void *cb_cookie; // to be returned in the cb
     43   void *buf; // for keeping segmented data
     44   size_t buf_size;
     45   void *base;
     46   int rb_use_state;
     47   pid_t rb_read_tid;
     48   int page_size;
     49   int page_cnt;
     50   int fd;
     51 };
     52 
     53 struct perf_reader * perf_reader_new(perf_reader_raw_cb raw_cb,
     54                                      perf_reader_lost_cb lost_cb,
     55                                      void *cb_cookie, int page_cnt) {
     56   struct perf_reader *reader = calloc(1, sizeof(struct perf_reader));
     57   if (!reader)
     58     return NULL;
     59   reader->raw_cb = raw_cb;
     60   reader->lost_cb = lost_cb;
     61   reader->cb_cookie = cb_cookie;
     62   reader->fd = -1;
     63   reader->page_size = getpagesize();
     64   reader->page_cnt = page_cnt;
     65   return reader;
     66 }
     67 
     68 void perf_reader_free(void *ptr) {
     69   if (ptr) {
     70     struct perf_reader *reader = ptr;
     71     pid_t tid = syscall(__NR_gettid);
     72     while (!__sync_bool_compare_and_swap(&reader->rb_use_state, RB_NOT_USED, RB_USED_IN_MUNMAP)) {
     73       // If the same thread, it is called from call back handler, no locking needed
     74       if (tid == reader->rb_read_tid)
     75         break;
     76     }
     77     munmap(reader->base, reader->page_size * (reader->page_cnt + 1));
     78     if (reader->fd >= 0) {
     79       ioctl(reader->fd, PERF_EVENT_IOC_DISABLE, 0);
     80       close(reader->fd);
     81     }
     82     free(reader->buf);
     83     free(ptr);
     84   }
     85 }
     86 
     87 int perf_reader_mmap(struct perf_reader *reader) {
     88   int mmap_size = reader->page_size * (reader->page_cnt + 1);
     89 
     90   if (reader->fd < 0) {
     91     fprintf(stderr, "%s: reader fd is not set\n", __FUNCTION__);
     92     return -1;
     93   }
     94 
     95   reader->base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE , MAP_SHARED, reader->fd, 0);
     96   if (reader->base == MAP_FAILED) {
     97     perror("mmap");
     98     return -1;
     99   }
    100 
    101   return 0;
    102 }
    103 
    104 struct perf_sample_trace_common {
    105   uint16_t id;
    106   uint8_t flags;
    107   uint8_t preempt_count;
    108   int pid;
    109 };
    110 
    111 struct perf_sample_trace_kprobe {
    112   struct perf_sample_trace_common common;
    113   uint64_t ip;
    114 };
    115 
    116 static void parse_sw(struct perf_reader *reader, void *data, int size) {
    117   uint8_t *ptr = data;
    118   struct perf_event_header *header = (void *)data;
    119 
    120   struct {
    121       uint32_t size;
    122       char data[0];
    123   } *raw = NULL;
    124 
    125   ptr += sizeof(*header);
    126   if (ptr > (uint8_t *)data + size) {
    127     fprintf(stderr, "%s: corrupt sample header\n", __FUNCTION__);
    128     return;
    129   }
    130 
    131   raw = (void *)ptr;
    132   ptr += sizeof(raw->size) + raw->size;
    133   if (ptr > (uint8_t *)data + size) {
    134     fprintf(stderr, "%s: corrupt raw sample\n", __FUNCTION__);
    135     return;
    136   }
    137 
    138   // sanity check
    139   if (ptr != (uint8_t *)data + size) {
    140     fprintf(stderr, "%s: extra data at end of sample\n", __FUNCTION__);
    141     return;
    142   }
    143 
    144   if (reader->raw_cb)
    145     reader->raw_cb(reader->cb_cookie, raw->data, raw->size);
    146 }
    147 
    148 static uint64_t read_data_head(volatile struct perf_event_mmap_page *perf_header) {
    149   uint64_t data_head = perf_header->data_head;
    150   asm volatile("" ::: "memory");
    151   return data_head;
    152 }
    153 
    154 static void write_data_tail(volatile struct perf_event_mmap_page *perf_header, uint64_t data_tail) {
    155   asm volatile("" ::: "memory");
    156   perf_header->data_tail = data_tail;
    157 }
    158 
    159 void perf_reader_event_read(struct perf_reader *reader) {
    160   volatile struct perf_event_mmap_page *perf_header = reader->base;
    161   uint64_t buffer_size = (uint64_t)reader->page_size * reader->page_cnt;
    162   uint64_t data_head;
    163   uint8_t *base = (uint8_t *)reader->base + reader->page_size;
    164   uint8_t *sentinel = (uint8_t *)reader->base + buffer_size + reader->page_size;
    165   uint8_t *begin, *end;
    166 
    167   reader->rb_read_tid = syscall(__NR_gettid);
    168   if (!__sync_bool_compare_and_swap(&reader->rb_use_state, RB_NOT_USED, RB_USED_IN_READ))
    169     return;
    170 
    171   // Consume all the events on this ring, calling the cb function for each one.
    172   // The message may fall on the ring boundary, in which case copy the message
    173   // into a malloced buffer.
    174   for (data_head = read_data_head(perf_header); perf_header->data_tail != data_head;
    175       data_head = read_data_head(perf_header)) {
    176     uint64_t data_tail = perf_header->data_tail;
    177     uint8_t *ptr;
    178 
    179     begin = base + data_tail % buffer_size;
    180     // event header is u64, won't wrap
    181     struct perf_event_header *e = (void *)begin;
    182     ptr = begin;
    183     end = base + (data_tail + e->size) % buffer_size;
    184     if (end < begin) {
    185       // perf event wraps around the ring, make a contiguous copy
    186       reader->buf = realloc(reader->buf, e->size);
    187       size_t len = sentinel - begin;
    188       memcpy(reader->buf, begin, len);
    189       memcpy((void *)((unsigned long)reader->buf + len), base, e->size - len);
    190       ptr = reader->buf;
    191     }
    192 
    193     if (e->type == PERF_RECORD_LOST) {
    194       /*
    195        * struct {
    196        *    struct perf_event_header    header;
    197        *    u64                id;
    198        *    u64                lost;
    199        *    struct sample_id        sample_id;
    200        * };
    201        */
    202       uint64_t lost = *(uint64_t *)(ptr + sizeof(*e) + sizeof(uint64_t));
    203       if (reader->lost_cb) {
    204         reader->lost_cb(reader->cb_cookie, lost);
    205       } else {
    206         fprintf(stderr, "Possibly lost %" PRIu64 " samples\n", lost);
    207       }
    208     } else if (e->type == PERF_RECORD_SAMPLE) {
    209       parse_sw(reader, ptr, e->size);
    210     } else {
    211       fprintf(stderr, "%s: unknown sample type %d\n", __FUNCTION__, e->type);
    212     }
    213 
    214     write_data_tail(perf_header, perf_header->data_tail + e->size);
    215   }
    216   reader->rb_use_state = RB_NOT_USED;
    217   __sync_synchronize();
    218   reader->rb_read_tid = 0;
    219 }
    220 
    221 int perf_reader_poll(int num_readers, struct perf_reader **readers, int timeout) {
    222   struct pollfd pfds[num_readers];
    223   int i;
    224 
    225   for (i = 0; i <num_readers; ++i) {
    226     pfds[i].fd = readers[i]->fd;
    227     pfds[i].events = POLLIN;
    228   }
    229 
    230   if (poll(pfds, num_readers, timeout) > 0) {
    231     for (i = 0; i < num_readers; ++i) {
    232       if (pfds[i].revents & POLLIN)
    233         perf_reader_event_read(readers[i]);
    234     }
    235   }
    236   return 0;
    237 }
    238 
    239 void perf_reader_set_fd(struct perf_reader *reader, int fd) {
    240   reader->fd = fd;
    241 }
    242 
    243 int perf_reader_fd(struct perf_reader *reader) {
    244   return reader->fd;
    245 }
    246