Home | History | Annotate | Download | only in libevent
      1 /*	$OpenBSD: kqueue.c,v 1.5 2002/07/10 14:41:31 art Exp $	*/
      2 
      3 /*
      4  * Copyright 2000-2007 Niels Provos <provos (at) citi.umich.edu>
      5  * Copyright 2007-2012 Niels Provos and Nick Mathewson
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. The name of the author may not be used to endorse or promote products
     16  *    derived from this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 #include "event2/event-config.h"
     30 #include "evconfig-private.h"
     31 
     32 #ifdef EVENT__HAVE_KQUEUE
     33 
     34 #include <sys/types.h>
     35 #ifdef EVENT__HAVE_SYS_TIME_H
     36 #include <sys/time.h>
     37 #endif
     38 #include <sys/queue.h>
     39 #include <sys/event.h>
     40 #include <signal.h>
     41 #include <stdio.h>
     42 #include <stdlib.h>
     43 #include <string.h>
     44 #include <unistd.h>
     45 #include <errno.h>
     46 #ifdef EVENT__HAVE_INTTYPES_H
     47 #include <inttypes.h>
     48 #endif
     49 
     50 /* Some platforms apparently define the udata field of struct kevent as
     51  * intptr_t, whereas others define it as void*.  There doesn't seem to be an
     52  * easy way to tell them apart via autoconf, so we need to use OS macros. */
     53 #if defined(EVENT__HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__) && !defined(__CloudABI__)
     54 #define PTR_TO_UDATA(x)	((intptr_t)(x))
     55 #define INT_TO_UDATA(x) ((intptr_t)(x))
     56 #else
     57 #define PTR_TO_UDATA(x)	(x)
     58 #define INT_TO_UDATA(x) ((void*)(x))
     59 #endif
     60 
     61 #include "event-internal.h"
     62 #include "log-internal.h"
     63 #include "evmap-internal.h"
     64 #include "event2/thread.h"
     65 #include "evthread-internal.h"
     66 #include "changelist-internal.h"
     67 
     68 #include "kqueue-internal.h"
     69 
     70 #define NEVENT		64
     71 
     72 struct kqop {
     73 	struct kevent *changes;
     74 	int changes_size;
     75 
     76 	struct kevent *events;
     77 	int events_size;
     78 	int kq;
     79 	int notify_event_added;
     80 	pid_t pid;
     81 };
     82 
     83 static void kqop_free(struct kqop *kqop);
     84 
     85 static void *kq_init(struct event_base *);
     86 static int kq_sig_add(struct event_base *, int, short, short, void *);
     87 static int kq_sig_del(struct event_base *, int, short, short, void *);
     88 static int kq_dispatch(struct event_base *, struct timeval *);
     89 static void kq_dealloc(struct event_base *);
     90 
     91 const struct eventop kqops = {
     92 	"kqueue",
     93 	kq_init,
     94 	event_changelist_add_,
     95 	event_changelist_del_,
     96 	kq_dispatch,
     97 	kq_dealloc,
     98 	1 /* need reinit */,
     99     EV_FEATURE_ET|EV_FEATURE_O1|EV_FEATURE_FDS,
    100 	EVENT_CHANGELIST_FDINFO_SIZE
    101 };
    102 
    103 static const struct eventop kqsigops = {
    104 	"kqueue_signal",
    105 	NULL,
    106 	kq_sig_add,
    107 	kq_sig_del,
    108 	NULL,
    109 	NULL,
    110 	1 /* need reinit */,
    111 	0,
    112 	0
    113 };
    114 
    115 static void *
    116 kq_init(struct event_base *base)
    117 {
    118 	int kq = -1;
    119 	struct kqop *kqueueop = NULL;
    120 
    121 	if (!(kqueueop = mm_calloc(1, sizeof(struct kqop))))
    122 		return (NULL);
    123 
    124 /* Initialize the kernel queue */
    125 
    126 	if ((kq = kqueue()) == -1) {
    127 		event_warn("kqueue");
    128 		goto err;
    129 	}
    130 
    131 	kqueueop->kq = kq;
    132 
    133 	kqueueop->pid = getpid();
    134 
    135 	/* Initialize fields */
    136 	kqueueop->changes = mm_calloc(NEVENT, sizeof(struct kevent));
    137 	if (kqueueop->changes == NULL)
    138 		goto err;
    139 	kqueueop->events = mm_calloc(NEVENT, sizeof(struct kevent));
    140 	if (kqueueop->events == NULL)
    141 		goto err;
    142 	kqueueop->events_size = kqueueop->changes_size = NEVENT;
    143 
    144 	/* Check for Mac OS X kqueue bug. */
    145 	memset(&kqueueop->changes[0], 0, sizeof kqueueop->changes[0]);
    146 	kqueueop->changes[0].ident = -1;
    147 	kqueueop->changes[0].filter = EVFILT_READ;
    148 	kqueueop->changes[0].flags = EV_ADD;
    149 	/*
    150 	 * If kqueue works, then kevent will succeed, and it will
    151 	 * stick an error in events[0].  If kqueue is broken, then
    152 	 * kevent will fail.
    153 	 */
    154 	if (kevent(kq,
    155 		kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 ||
    156 	    (int)kqueueop->events[0].ident != -1 ||
    157 	    !(kqueueop->events[0].flags & EV_ERROR)) {
    158 		event_warn("%s: detected broken kqueue; not using.", __func__);
    159 		goto err;
    160 	}
    161 
    162 	base->evsigsel = &kqsigops;
    163 
    164 	return (kqueueop);
    165 err:
    166 	if (kqueueop)
    167 		kqop_free(kqueueop);
    168 
    169 	return (NULL);
    170 }
    171 
    172 #define ADD_UDATA 0x30303
    173 
    174 static void
    175 kq_setup_kevent(struct kevent *out, evutil_socket_t fd, int filter, short change)
    176 {
    177 	memset(out, 0, sizeof(struct kevent));
    178 	out->ident = fd;
    179 	out->filter = filter;
    180 
    181 	if (change & EV_CHANGE_ADD) {
    182 		out->flags = EV_ADD;
    183 		/* We set a magic number here so that we can tell 'add'
    184 		 * errors from 'del' errors. */
    185 		out->udata = INT_TO_UDATA(ADD_UDATA);
    186 		if (change & EV_ET)
    187 			out->flags |= EV_CLEAR;
    188 #ifdef NOTE_EOF
    189 		/* Make it behave like select() and poll() */
    190 		if (filter == EVFILT_READ)
    191 			out->fflags = NOTE_EOF;
    192 #endif
    193 	} else {
    194 		EVUTIL_ASSERT(change & EV_CHANGE_DEL);
    195 		out->flags = EV_DELETE;
    196 	}
    197 }
    198 
    199 static int
    200 kq_build_changes_list(const struct event_changelist *changelist,
    201     struct kqop *kqop)
    202 {
    203 	int i;
    204 	int n_changes = 0;
    205 
    206 	for (i = 0; i < changelist->n_changes; ++i) {
    207 		struct event_change *in_ch = &changelist->changes[i];
    208 		struct kevent *out_ch;
    209 		if (n_changes >= kqop->changes_size - 1) {
    210 			int newsize = kqop->changes_size * 2;
    211 			struct kevent *newchanges;
    212 
    213 			newchanges = mm_realloc(kqop->changes,
    214 			    newsize * sizeof(struct kevent));
    215 			if (newchanges == NULL) {
    216 				event_warn("%s: realloc", __func__);
    217 				return (-1);
    218 			}
    219 			kqop->changes = newchanges;
    220 			kqop->changes_size = newsize;
    221 		}
    222 		if (in_ch->read_change) {
    223 			out_ch = &kqop->changes[n_changes++];
    224 			kq_setup_kevent(out_ch, in_ch->fd, EVFILT_READ,
    225 			    in_ch->read_change);
    226 		}
    227 		if (in_ch->write_change) {
    228 			out_ch = &kqop->changes[n_changes++];
    229 			kq_setup_kevent(out_ch, in_ch->fd, EVFILT_WRITE,
    230 			    in_ch->write_change);
    231 		}
    232 	}
    233 	return n_changes;
    234 }
    235 
    236 static int
    237 kq_grow_events(struct kqop *kqop, size_t new_size)
    238 {
    239 	struct kevent *newresult;
    240 
    241 	newresult = mm_realloc(kqop->events,
    242 	    new_size * sizeof(struct kevent));
    243 
    244 	if (newresult) {
    245 		kqop->events = newresult;
    246 		kqop->events_size = new_size;
    247 		return 0;
    248 	} else {
    249 		return -1;
    250 	}
    251 }
    252 
    253 static int
    254 kq_dispatch(struct event_base *base, struct timeval *tv)
    255 {
    256 	struct kqop *kqop = base->evbase;
    257 	struct kevent *events = kqop->events;
    258 	struct kevent *changes;
    259 	struct timespec ts, *ts_p = NULL;
    260 	int i, n_changes, res;
    261 
    262 	if (tv != NULL) {
    263 		ts.tv_sec = tv->tv_sec;
    264 		ts.tv_nsec = tv->tv_usec * 1000;
    265 		ts_p = &ts;
    266 	}
    267 
    268 	/* Build "changes" from "base->changes" */
    269 	EVUTIL_ASSERT(kqop->changes);
    270 	n_changes = kq_build_changes_list(&base->changelist, kqop);
    271 	if (n_changes < 0)
    272 		return -1;
    273 
    274 	event_changelist_remove_all_(&base->changelist, base);
    275 
    276 	/* steal the changes array in case some broken code tries to call
    277 	 * dispatch twice at once. */
    278 	changes = kqop->changes;
    279 	kqop->changes = NULL;
    280 
    281 	/* Make sure that 'events' is at least as long as the list of changes:
    282 	 * otherwise errors in the changes can get reported as a -1 return
    283 	 * value from kevent() rather than as EV_ERROR events in the events
    284 	 * array.
    285 	 *
    286 	 * (We could instead handle -1 return values from kevent() by
    287 	 * retrying with a smaller changes array or a larger events array,
    288 	 * but this approach seems less risky for now.)
    289 	 */
    290 	if (kqop->events_size < n_changes) {
    291 		int new_size = kqop->events_size;
    292 		do {
    293 			new_size *= 2;
    294 		} while (new_size < n_changes);
    295 
    296 		kq_grow_events(kqop, new_size);
    297 		events = kqop->events;
    298 	}
    299 
    300 	EVBASE_RELEASE_LOCK(base, th_base_lock);
    301 
    302 	res = kevent(kqop->kq, changes, n_changes,
    303 	    events, kqop->events_size, ts_p);
    304 
    305 	EVBASE_ACQUIRE_LOCK(base, th_base_lock);
    306 
    307 	EVUTIL_ASSERT(kqop->changes == NULL);
    308 	kqop->changes = changes;
    309 
    310 	if (res == -1) {
    311 		if (errno != EINTR) {
    312 			event_warn("kevent");
    313 			return (-1);
    314 		}
    315 
    316 		return (0);
    317 	}
    318 
    319 	event_debug(("%s: kevent reports %d", __func__, res));
    320 
    321 	for (i = 0; i < res; i++) {
    322 		int which = 0;
    323 
    324 		if (events[i].flags & EV_ERROR) {
    325 			switch (events[i].data) {
    326 
    327 			/* Can occur on delete if we are not currently
    328 			 * watching any events on this fd.  That can
    329 			 * happen when the fd was closed and another
    330 			 * file was opened with that fd. */
    331 			case ENOENT:
    332 			/* Can occur for reasons not fully understood
    333 			 * on FreeBSD. */
    334 			case EINVAL:
    335 				continue;
    336 #if defined(__FreeBSD__)
    337 			/*
    338 			 * This currently occurs if an FD is closed
    339 			 * before the EV_DELETE makes it out via kevent().
    340 			 * The FreeBSD capabilities code sees the blank
    341 			 * capability set and rejects the request to
    342 			 * modify an event.
    343 			 *
    344 			 * To be strictly correct - when an FD is closed,
    345 			 * all the registered events are also removed.
    346 			 * Queuing EV_DELETE to a closed FD is wrong.
    347 			 * The event(s) should just be deleted from
    348 			 * the pending changelist.
    349 			 */
    350 			case ENOTCAPABLE:
    351 				continue;
    352 #endif
    353 
    354 			/* Can occur on a delete if the fd is closed. */
    355 			case EBADF:
    356 				/* XXXX On NetBSD, we can also get EBADF if we
    357 				 * try to add the write side of a pipe, but
    358 				 * the read side has already been closed.
    359 				 * Other BSDs call this situation 'EPIPE'. It
    360 				 * would be good if we had a way to report
    361 				 * this situation. */
    362 				continue;
    363 			/* These two can occur on an add if the fd was one side
    364 			 * of a pipe, and the other side was closed. */
    365 			case EPERM:
    366 			case EPIPE:
    367 				/* Report read events, if we're listening for
    368 				 * them, so that the user can learn about any
    369 				 * add errors.  (If the operation was a
    370 				 * delete, then udata should be cleared.) */
    371 				if (events[i].udata) {
    372 					/* The operation was an add:
    373 					 * report the error as a read. */
    374 					which |= EV_READ;
    375 					break;
    376 				} else {
    377 					/* The operation was a del:
    378 					 * report nothing. */
    379 					continue;
    380 				}
    381 
    382 			/* Other errors shouldn't occur. */
    383 			default:
    384 				errno = events[i].data;
    385 				return (-1);
    386 			}
    387 		} else if (events[i].filter == EVFILT_READ) {
    388 			which |= EV_READ;
    389 		} else if (events[i].filter == EVFILT_WRITE) {
    390 			which |= EV_WRITE;
    391 		} else if (events[i].filter == EVFILT_SIGNAL) {
    392 			which |= EV_SIGNAL;
    393 #ifdef EVFILT_USER
    394 		} else if (events[i].filter == EVFILT_USER) {
    395 			base->is_notify_pending = 0;
    396 #endif
    397 		}
    398 
    399 		if (!which)
    400 			continue;
    401 
    402 		if (events[i].filter == EVFILT_SIGNAL) {
    403 			evmap_signal_active_(base, events[i].ident, 1);
    404 		} else {
    405 			evmap_io_active_(base, events[i].ident, which | EV_ET);
    406 		}
    407 	}
    408 
    409 	if (res == kqop->events_size) {
    410 		/* We used all the events space that we have. Maybe we should
    411 		   make it bigger. */
    412 		kq_grow_events(kqop, kqop->events_size * 2);
    413 	}
    414 
    415 	return (0);
    416 }
    417 
    418 static void
    419 kqop_free(struct kqop *kqop)
    420 {
    421 	if (kqop->changes)
    422 		mm_free(kqop->changes);
    423 	if (kqop->events)
    424 		mm_free(kqop->events);
    425 	if (kqop->kq >= 0 && kqop->pid == getpid())
    426 		close(kqop->kq);
    427 	memset(kqop, 0, sizeof(struct kqop));
    428 	mm_free(kqop);
    429 }
    430 
    431 static void
    432 kq_dealloc(struct event_base *base)
    433 {
    434 	struct kqop *kqop = base->evbase;
    435 	evsig_dealloc_(base);
    436 	kqop_free(kqop);
    437 }
    438 
    439 /* signal handling */
    440 static int
    441 kq_sig_add(struct event_base *base, int nsignal, short old, short events, void *p)
    442 {
    443 	struct kqop *kqop = base->evbase;
    444 	struct kevent kev;
    445 	struct timespec timeout = { 0, 0 };
    446 	(void)p;
    447 
    448 	EVUTIL_ASSERT(nsignal >= 0 && nsignal < NSIG);
    449 
    450 	memset(&kev, 0, sizeof(kev));
    451 	kev.ident = nsignal;
    452 	kev.filter = EVFILT_SIGNAL;
    453 	kev.flags = EV_ADD;
    454 
    455 	/* Be ready for the signal if it is sent any
    456 	 * time between now and the next call to
    457 	 * kq_dispatch. */
    458 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
    459 		return (-1);
    460 
    461         /* We can set the handler for most signals to SIG_IGN and
    462          * still have them reported to us in the queue.  However,
    463          * if the handler for SIGCHLD is SIG_IGN, the system reaps
    464          * zombie processes for us, and we don't get any notification.
    465          * This appears to be the only signal with this quirk. */
    466 	if (evsig_set_handler_(base, nsignal,
    467                                nsignal == SIGCHLD ? SIG_DFL : SIG_IGN) == -1)
    468 		return (-1);
    469 
    470 	return (0);
    471 }
    472 
    473 static int
    474 kq_sig_del(struct event_base *base, int nsignal, short old, short events, void *p)
    475 {
    476 	struct kqop *kqop = base->evbase;
    477 	struct kevent kev;
    478 
    479 	struct timespec timeout = { 0, 0 };
    480 	(void)p;
    481 
    482 	EVUTIL_ASSERT(nsignal >= 0 && nsignal < NSIG);
    483 
    484 	memset(&kev, 0, sizeof(kev));
    485 	kev.ident = nsignal;
    486 	kev.filter = EVFILT_SIGNAL;
    487 	kev.flags = EV_DELETE;
    488 
    489 	/* Because we insert signal events
    490 	 * immediately, we need to delete them
    491 	 * immediately, too */
    492 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
    493 		return (-1);
    494 
    495 	if (evsig_restore_handler_(base, nsignal) == -1)
    496 		return (-1);
    497 
    498 	return (0);
    499 }
    500 
    501 
    502 /* OSX 10.6 and FreeBSD 8.1 add support for EVFILT_USER, which we can use
    503  * to wake up the event loop from another thread. */
    504 
    505 /* Magic number we use for our filter ID. */
    506 #define NOTIFY_IDENT 42
    507 
    508 int
    509 event_kq_add_notify_event_(struct event_base *base)
    510 {
    511 	struct kqop *kqop = base->evbase;
    512 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER)
    513 	struct kevent kev;
    514 	struct timespec timeout = { 0, 0 };
    515 #endif
    516 
    517 	if (kqop->notify_event_added)
    518 		return 0;
    519 
    520 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER)
    521 	memset(&kev, 0, sizeof(kev));
    522 	kev.ident = NOTIFY_IDENT;
    523 	kev.filter = EVFILT_USER;
    524 	kev.flags = EV_ADD | EV_CLEAR;
    525 
    526 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) {
    527 		event_warn("kevent: adding EVFILT_USER event");
    528 		return -1;
    529 	}
    530 
    531 	kqop->notify_event_added = 1;
    532 
    533 	return 0;
    534 #else
    535 	return -1;
    536 #endif
    537 }
    538 
    539 int
    540 event_kq_notify_base_(struct event_base *base)
    541 {
    542 	struct kqop *kqop = base->evbase;
    543 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER)
    544 	struct kevent kev;
    545 	struct timespec timeout = { 0, 0 };
    546 #endif
    547 	if (! kqop->notify_event_added)
    548 		return -1;
    549 
    550 #if defined(EVFILT_USER) && defined(NOTE_TRIGGER)
    551 	memset(&kev, 0, sizeof(kev));
    552 	kev.ident = NOTIFY_IDENT;
    553 	kev.filter = EVFILT_USER;
    554 	kev.fflags = NOTE_TRIGGER;
    555 
    556 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1) {
    557 		event_warn("kevent: triggering EVFILT_USER event");
    558 		return -1;
    559 	}
    560 
    561 	return 0;
    562 #else
    563 	return -1;
    564 #endif
    565 }
    566 
    567 #endif /* EVENT__HAVE_KQUEUE */
    568