1 /* $OpenBSD: kqueue.c,v 1.5 2002/07/10 14:41:31 art Exp $ */ 2 3 /* 4 * Copyright 2000-2002 Niels Provos <provos (at) citi.umich.edu> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 #ifdef HAVE_CONFIG_H 30 #include "config.h" 31 #endif 32 33 #define _GNU_SOURCE 1 34 35 #include <sys/types.h> 36 #ifdef HAVE_SYS_TIME_H 37 #include <sys/time.h> 38 #else 39 #include <sys/_libevent_time.h> 40 #endif 41 #include <sys/queue.h> 42 #include <sys/event.h> 43 #include <signal.h> 44 #include <stdio.h> 45 #include <stdlib.h> 46 #include <string.h> 47 #include <unistd.h> 48 #include <errno.h> 49 #include <assert.h> 50 #ifdef HAVE_INTTYPES_H 51 #include <inttypes.h> 52 #endif 53 54 /* Some platforms apparently define the udata field of struct kevent as 55 * intptr_t, whereas others define it as void*. There doesn't seem to be an 56 * easy way to tell them apart via autoconf, so we need to use OS macros. */ 57 #if defined(HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__) 58 #define PTR_TO_UDATA(x) ((intptr_t)(x)) 59 #else 60 #define PTR_TO_UDATA(x) (x) 61 #endif 62 63 #include "event.h" 64 #include "event-internal.h" 65 #include "log.h" 66 67 #define EVLIST_X_KQINKERNEL 0x1000 68 69 #define NEVENT 64 70 71 struct kqop { 72 struct kevent *changes; 73 int nchanges; 74 struct kevent *events; 75 struct event_list evsigevents[NSIG]; 76 int nevents; 77 int kq; 78 pid_t pid; 79 }; 80 81 static void *kq_init (struct event_base *); 82 static int kq_add (void *, struct event *); 83 static int kq_del (void *, struct event *); 84 static int kq_dispatch (struct event_base *, void *, struct timeval *); 85 static int kq_insert (struct kqop *, struct kevent *); 86 static void kq_dealloc (struct event_base *, void *); 87 88 const struct eventop kqops = { 89 "kqueue", 90 kq_init, 91 kq_add, 92 kq_del, 93 kq_dispatch, 94 kq_dealloc, 95 1 /* need reinit */ 96 }; 97 98 static void * 99 kq_init(struct event_base *base) 100 { 101 int i, kq; 102 struct kqop *kqueueop; 103 104 /* Disable kqueue when this environment variable is set */ 105 if (evutil_getenv("EVENT_NOKQUEUE")) 106 return (NULL); 107 108 if (!(kqueueop = calloc(1, sizeof(struct kqop)))) 109 return (NULL); 110 111 /* Initalize the kernel queue */ 112 113 if ((kq = kqueue()) == -1) { 114 event_warn("kqueue"); 115 free (kqueueop); 116 return (NULL); 117 } 118 119 kqueueop->kq = kq; 120 121 kqueueop->pid = getpid(); 122 123 /* Initalize fields */ 124 kqueueop->changes = malloc(NEVENT * sizeof(struct kevent)); 125 if (kqueueop->changes == NULL) { 126 free (kqueueop); 127 return (NULL); 128 } 129 kqueueop->events = malloc(NEVENT * sizeof(struct kevent)); 130 if (kqueueop->events == NULL) { 131 free (kqueueop->changes); 132 free (kqueueop); 133 return (NULL); 134 } 135 kqueueop->nevents = NEVENT; 136 137 /* we need to keep track of multiple events per signal */ 138 for (i = 0; i < NSIG; ++i) { 139 TAILQ_INIT(&kqueueop->evsigevents[i]); 140 } 141 142 /* Check for Mac OS X kqueue bug. */ 143 kqueueop->changes[0].ident = -1; 144 kqueueop->changes[0].filter = EVFILT_READ; 145 kqueueop->changes[0].flags = EV_ADD; 146 /* 147 * If kqueue works, then kevent will succeed, and it will 148 * stick an error in events[0]. If kqueue is broken, then 149 * kevent will fail. 150 */ 151 if (kevent(kq, 152 kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 || 153 kqueueop->events[0].ident != -1 || 154 kqueueop->events[0].flags != EV_ERROR) { 155 event_warn("%s: detected broken kqueue; not using.", __func__); 156 free(kqueueop->changes); 157 free(kqueueop->events); 158 free(kqueueop); 159 close(kq); 160 return (NULL); 161 } 162 163 return (kqueueop); 164 } 165 166 static int 167 kq_insert(struct kqop *kqop, struct kevent *kev) 168 { 169 int nevents = kqop->nevents; 170 171 if (kqop->nchanges == nevents) { 172 struct kevent *newchange; 173 struct kevent *newresult; 174 175 nevents *= 2; 176 177 newchange = realloc(kqop->changes, 178 nevents * sizeof(struct kevent)); 179 if (newchange == NULL) { 180 event_warn("%s: malloc", __func__); 181 return (-1); 182 } 183 kqop->changes = newchange; 184 185 newresult = realloc(kqop->events, 186 nevents * sizeof(struct kevent)); 187 188 /* 189 * If we fail, we don't have to worry about freeing, 190 * the next realloc will pick it up. 191 */ 192 if (newresult == NULL) { 193 event_warn("%s: malloc", __func__); 194 return (-1); 195 } 196 kqop->events = newresult; 197 198 kqop->nevents = nevents; 199 } 200 201 memcpy(&kqop->changes[kqop->nchanges++], kev, sizeof(struct kevent)); 202 203 event_debug(("%s: fd %d %s%s", 204 __func__, (int)kev->ident, 205 kev->filter == EVFILT_READ ? "EVFILT_READ" : "EVFILT_WRITE", 206 kev->flags == EV_DELETE ? " (del)" : "")); 207 208 return (0); 209 } 210 211 static void 212 kq_sighandler(int sig) 213 { 214 /* Do nothing here */ 215 } 216 217 static int 218 kq_dispatch(struct event_base *base, void *arg, struct timeval *tv) 219 { 220 struct kqop *kqop = arg; 221 struct kevent *changes = kqop->changes; 222 struct kevent *events = kqop->events; 223 struct event *ev; 224 struct timespec ts, *ts_p = NULL; 225 int i, res; 226 227 if (tv != NULL) { 228 TIMEVAL_TO_TIMESPEC(tv, &ts); 229 ts_p = &ts; 230 } 231 232 res = kevent(kqop->kq, changes, kqop->nchanges, 233 events, kqop->nevents, ts_p); 234 kqop->nchanges = 0; 235 if (res == -1) { 236 if (errno != EINTR) { 237 event_warn("kevent"); 238 return (-1); 239 } 240 241 return (0); 242 } 243 244 event_debug(("%s: kevent reports %d", __func__, res)); 245 246 for (i = 0; i < res; i++) { 247 int which = 0; 248 249 if (events[i].flags & EV_ERROR) { 250 /* 251 * Error messages that can happen, when a delete fails. 252 * EBADF happens when the file discriptor has been 253 * closed, 254 * ENOENT when the file discriptor was closed and 255 * then reopened. 256 * EINVAL for some reasons not understood; EINVAL 257 * should not be returned ever; but FreeBSD does :-\ 258 * An error is also indicated when a callback deletes 259 * an event we are still processing. In that case 260 * the data field is set to ENOENT. 261 */ 262 if (events[i].data == EBADF || 263 events[i].data == EINVAL || 264 events[i].data == ENOENT) 265 continue; 266 errno = events[i].data; 267 return (-1); 268 } 269 270 if (events[i].filter == EVFILT_READ) { 271 which |= EV_READ; 272 } else if (events[i].filter == EVFILT_WRITE) { 273 which |= EV_WRITE; 274 } else if (events[i].filter == EVFILT_SIGNAL) { 275 which |= EV_SIGNAL; 276 } 277 278 if (!which) 279 continue; 280 281 if (events[i].filter == EVFILT_SIGNAL) { 282 struct event_list *head = 283 (struct event_list *)events[i].udata; 284 TAILQ_FOREACH(ev, head, ev_signal_next) { 285 event_active(ev, which, events[i].data); 286 } 287 } else { 288 ev = (struct event *)events[i].udata; 289 290 if (!(ev->ev_events & EV_PERSIST)) 291 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 292 293 event_active(ev, which, 1); 294 } 295 } 296 297 return (0); 298 } 299 300 301 static int 302 kq_add(void *arg, struct event *ev) 303 { 304 struct kqop *kqop = arg; 305 struct kevent kev; 306 307 if (ev->ev_events & EV_SIGNAL) { 308 int nsignal = EVENT_SIGNAL(ev); 309 310 assert(nsignal >= 0 && nsignal < NSIG); 311 if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) { 312 struct timespec timeout = { 0, 0 }; 313 314 memset(&kev, 0, sizeof(kev)); 315 kev.ident = nsignal; 316 kev.filter = EVFILT_SIGNAL; 317 kev.flags = EV_ADD; 318 kev.udata = PTR_TO_UDATA(&kqop->evsigevents[nsignal]); 319 320 /* Be ready for the signal if it is sent any 321 * time between now and the next call to 322 * kq_dispatch. */ 323 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 324 return (-1); 325 326 if (_evsignal_set_handler(ev->ev_base, nsignal, 327 kq_sighandler) == -1) 328 return (-1); 329 } 330 331 TAILQ_INSERT_TAIL(&kqop->evsigevents[nsignal], ev, 332 ev_signal_next); 333 ev->ev_flags |= EVLIST_X_KQINKERNEL; 334 return (0); 335 } 336 337 if (ev->ev_events & EV_READ) { 338 memset(&kev, 0, sizeof(kev)); 339 kev.ident = ev->ev_fd; 340 kev.filter = EVFILT_READ; 341 #ifdef NOTE_EOF 342 /* Make it behave like select() and poll() */ 343 kev.fflags = NOTE_EOF; 344 #endif 345 kev.flags = EV_ADD; 346 if (!(ev->ev_events & EV_PERSIST)) 347 kev.flags |= EV_ONESHOT; 348 kev.udata = PTR_TO_UDATA(ev); 349 350 if (kq_insert(kqop, &kev) == -1) 351 return (-1); 352 353 ev->ev_flags |= EVLIST_X_KQINKERNEL; 354 } 355 356 if (ev->ev_events & EV_WRITE) { 357 memset(&kev, 0, sizeof(kev)); 358 kev.ident = ev->ev_fd; 359 kev.filter = EVFILT_WRITE; 360 kev.flags = EV_ADD; 361 if (!(ev->ev_events & EV_PERSIST)) 362 kev.flags |= EV_ONESHOT; 363 kev.udata = PTR_TO_UDATA(ev); 364 365 if (kq_insert(kqop, &kev) == -1) 366 return (-1); 367 368 ev->ev_flags |= EVLIST_X_KQINKERNEL; 369 } 370 371 return (0); 372 } 373 374 static int 375 kq_del(void *arg, struct event *ev) 376 { 377 struct kqop *kqop = arg; 378 struct kevent kev; 379 380 if (!(ev->ev_flags & EVLIST_X_KQINKERNEL)) 381 return (0); 382 383 if (ev->ev_events & EV_SIGNAL) { 384 int nsignal = EVENT_SIGNAL(ev); 385 struct timespec timeout = { 0, 0 }; 386 387 assert(nsignal >= 0 && nsignal < NSIG); 388 TAILQ_REMOVE(&kqop->evsigevents[nsignal], ev, ev_signal_next); 389 if (TAILQ_EMPTY(&kqop->evsigevents[nsignal])) { 390 memset(&kev, 0, sizeof(kev)); 391 kev.ident = nsignal; 392 kev.filter = EVFILT_SIGNAL; 393 kev.flags = EV_DELETE; 394 395 /* Because we insert signal events 396 * immediately, we need to delete them 397 * immediately, too */ 398 if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) 399 return (-1); 400 401 if (_evsignal_restore_handler(ev->ev_base, 402 nsignal) == -1) 403 return (-1); 404 } 405 406 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 407 return (0); 408 } 409 410 if (ev->ev_events & EV_READ) { 411 memset(&kev, 0, sizeof(kev)); 412 kev.ident = ev->ev_fd; 413 kev.filter = EVFILT_READ; 414 kev.flags = EV_DELETE; 415 416 if (kq_insert(kqop, &kev) == -1) 417 return (-1); 418 419 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 420 } 421 422 if (ev->ev_events & EV_WRITE) { 423 memset(&kev, 0, sizeof(kev)); 424 kev.ident = ev->ev_fd; 425 kev.filter = EVFILT_WRITE; 426 kev.flags = EV_DELETE; 427 428 if (kq_insert(kqop, &kev) == -1) 429 return (-1); 430 431 ev->ev_flags &= ~EVLIST_X_KQINKERNEL; 432 } 433 434 return (0); 435 } 436 437 static void 438 kq_dealloc(struct event_base *base, void *arg) 439 { 440 struct kqop *kqop = arg; 441 442 if (kqop->changes) 443 free(kqop->changes); 444 if (kqop->events) 445 free(kqop->events); 446 if (kqop->kq >= 0 && kqop->pid == getpid()) 447 close(kqop->kq); 448 memset(kqop, 0, sizeof(struct kqop)); 449 free(kqop); 450 } 451