1 /* 2 * Copyright (c) 2015 PLUMgrid, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include <inttypes.h> 18 #include <poll.h> 19 #include <stdio.h> 20 #include <stdint.h> 21 #include <stdlib.h> 22 #include <string.h> 23 #include <syscall.h> 24 #include <sys/ioctl.h> 25 #include <sys/mman.h> 26 #include <sys/types.h> 27 #include <unistd.h> 28 #include <linux/perf_event.h> 29 30 #include "libbpf.h" 31 #include "perf_reader.h" 32 33 enum { 34 RB_NOT_USED = 0, // ring buffer not usd 35 RB_USED_IN_MUNMAP = 1, // used in munmap 36 RB_USED_IN_READ = 2, // used in read 37 }; 38 39 struct perf_reader { 40 perf_reader_raw_cb raw_cb; 41 perf_reader_lost_cb lost_cb; 42 void *cb_cookie; // to be returned in the cb 43 void *buf; // for keeping segmented data 44 size_t buf_size; 45 void *base; 46 int rb_use_state; 47 pid_t rb_read_tid; 48 int page_size; 49 int page_cnt; 50 int fd; 51 }; 52 53 struct perf_reader * perf_reader_new(perf_reader_raw_cb raw_cb, 54 perf_reader_lost_cb lost_cb, 55 void *cb_cookie, int page_cnt) { 56 struct perf_reader *reader = calloc(1, sizeof(struct perf_reader)); 57 if (!reader) 58 return NULL; 59 reader->raw_cb = raw_cb; 60 reader->lost_cb = lost_cb; 61 reader->cb_cookie = cb_cookie; 62 reader->fd = -1; 63 reader->page_size = getpagesize(); 64 reader->page_cnt = page_cnt; 65 return reader; 66 } 67 68 void perf_reader_free(void *ptr) { 69 if (ptr) { 70 struct perf_reader *reader = ptr; 71 pid_t tid = syscall(__NR_gettid); 72 while (!__sync_bool_compare_and_swap(&reader->rb_use_state, RB_NOT_USED, RB_USED_IN_MUNMAP)) { 73 // If the same thread, it is called from call back handler, no locking needed 74 if (tid == reader->rb_read_tid) 75 break; 76 } 77 munmap(reader->base, reader->page_size * (reader->page_cnt + 1)); 78 if (reader->fd >= 0) { 79 ioctl(reader->fd, PERF_EVENT_IOC_DISABLE, 0); 80 close(reader->fd); 81 } 82 free(reader->buf); 83 free(ptr); 84 } 85 } 86 87 int perf_reader_mmap(struct perf_reader *reader) { 88 int mmap_size = reader->page_size * (reader->page_cnt + 1); 89 90 if (reader->fd < 0) { 91 fprintf(stderr, "%s: reader fd is not set\n", __FUNCTION__); 92 return -1; 93 } 94 95 reader->base = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE , MAP_SHARED, reader->fd, 0); 96 if (reader->base == MAP_FAILED) { 97 perror("mmap"); 98 return -1; 99 } 100 101 return 0; 102 } 103 104 struct perf_sample_trace_common { 105 uint16_t id; 106 uint8_t flags; 107 uint8_t preempt_count; 108 int pid; 109 }; 110 111 struct perf_sample_trace_kprobe { 112 struct perf_sample_trace_common common; 113 uint64_t ip; 114 }; 115 116 static void parse_sw(struct perf_reader *reader, void *data, int size) { 117 uint8_t *ptr = data; 118 struct perf_event_header *header = (void *)data; 119 120 struct { 121 uint32_t size; 122 char data[0]; 123 } *raw = NULL; 124 125 ptr += sizeof(*header); 126 if (ptr > (uint8_t *)data + size) { 127 fprintf(stderr, "%s: corrupt sample header\n", __FUNCTION__); 128 return; 129 } 130 131 raw = (void *)ptr; 132 ptr += sizeof(raw->size) + raw->size; 133 if (ptr > (uint8_t *)data + size) { 134 fprintf(stderr, "%s: corrupt raw sample\n", __FUNCTION__); 135 return; 136 } 137 138 // sanity check 139 if (ptr != (uint8_t *)data + size) { 140 fprintf(stderr, "%s: extra data at end of sample\n", __FUNCTION__); 141 return; 142 } 143 144 if (reader->raw_cb) 145 reader->raw_cb(reader->cb_cookie, raw->data, raw->size); 146 } 147 148 static uint64_t read_data_head(volatile struct perf_event_mmap_page *perf_header) { 149 uint64_t data_head = perf_header->data_head; 150 asm volatile("" ::: "memory"); 151 return data_head; 152 } 153 154 static void write_data_tail(volatile struct perf_event_mmap_page *perf_header, uint64_t data_tail) { 155 asm volatile("" ::: "memory"); 156 perf_header->data_tail = data_tail; 157 } 158 159 void perf_reader_event_read(struct perf_reader *reader) { 160 volatile struct perf_event_mmap_page *perf_header = reader->base; 161 uint64_t buffer_size = (uint64_t)reader->page_size * reader->page_cnt; 162 uint64_t data_head; 163 uint8_t *base = (uint8_t *)reader->base + reader->page_size; 164 uint8_t *sentinel = (uint8_t *)reader->base + buffer_size + reader->page_size; 165 uint8_t *begin, *end; 166 167 reader->rb_read_tid = syscall(__NR_gettid); 168 if (!__sync_bool_compare_and_swap(&reader->rb_use_state, RB_NOT_USED, RB_USED_IN_READ)) 169 return; 170 171 // Consume all the events on this ring, calling the cb function for each one. 172 // The message may fall on the ring boundary, in which case copy the message 173 // into a malloced buffer. 174 for (data_head = read_data_head(perf_header); perf_header->data_tail != data_head; 175 data_head = read_data_head(perf_header)) { 176 uint64_t data_tail = perf_header->data_tail; 177 uint8_t *ptr; 178 179 begin = base + data_tail % buffer_size; 180 // event header is u64, won't wrap 181 struct perf_event_header *e = (void *)begin; 182 ptr = begin; 183 end = base + (data_tail + e->size) % buffer_size; 184 if (end < begin) { 185 // perf event wraps around the ring, make a contiguous copy 186 reader->buf = realloc(reader->buf, e->size); 187 size_t len = sentinel - begin; 188 memcpy(reader->buf, begin, len); 189 memcpy((void *)((unsigned long)reader->buf + len), base, e->size - len); 190 ptr = reader->buf; 191 } 192 193 if (e->type == PERF_RECORD_LOST) { 194 /* 195 * struct { 196 * struct perf_event_header header; 197 * u64 id; 198 * u64 lost; 199 * struct sample_id sample_id; 200 * }; 201 */ 202 uint64_t lost = *(uint64_t *)(ptr + sizeof(*e) + sizeof(uint64_t)); 203 if (reader->lost_cb) { 204 reader->lost_cb(reader->cb_cookie, lost); 205 } else { 206 fprintf(stderr, "Possibly lost %" PRIu64 " samples\n", lost); 207 } 208 } else if (e->type == PERF_RECORD_SAMPLE) { 209 parse_sw(reader, ptr, e->size); 210 } else { 211 fprintf(stderr, "%s: unknown sample type %d\n", __FUNCTION__, e->type); 212 } 213 214 write_data_tail(perf_header, perf_header->data_tail + e->size); 215 } 216 reader->rb_use_state = RB_NOT_USED; 217 __sync_synchronize(); 218 reader->rb_read_tid = 0; 219 } 220 221 int perf_reader_poll(int num_readers, struct perf_reader **readers, int timeout) { 222 struct pollfd pfds[num_readers]; 223 int i; 224 225 for (i = 0; i <num_readers; ++i) { 226 pfds[i].fd = readers[i]->fd; 227 pfds[i].events = POLLIN; 228 } 229 230 if (poll(pfds, num_readers, timeout) > 0) { 231 for (i = 0; i < num_readers; ++i) { 232 if (pfds[i].revents & POLLIN) 233 perf_reader_event_read(readers[i]); 234 } 235 } 236 return 0; 237 } 238 239 void perf_reader_set_fd(struct perf_reader *reader, int fd) { 240 reader->fd = fd; 241 } 242 243 int perf_reader_fd(struct perf_reader *reader) { 244 return reader->fd; 245 } 246