Home | History | Annotate | Download | only in libevent
      1 /*	$OpenBSD: kqueue.c,v 1.5 2002/07/10 14:41:31 art Exp $	*/
      2 
      3 /*
      4  * Copyright 2000-2007 Niels Provos <provos (at) citi.umich.edu>
      5  * Copyright 2007-2012 Niels Provos and Nick Mathewson
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. The name of the author may not be used to endorse or promote products
     16  *    derived from this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 #include "event2/event-config.h"
     30 
     31 #define _GNU_SOURCE
     32 
     33 #include <sys/types.h>
     34 #ifdef _EVENT_HAVE_SYS_TIME_H
     35 #include <sys/time.h>
     36 #endif
     37 #include <sys/queue.h>
     38 #include <sys/event.h>
     39 #include <signal.h>
     40 #include <stdio.h>
     41 #include <stdlib.h>
     42 #include <string.h>
     43 #include <unistd.h>
     44 #include <errno.h>
     45 #ifdef _EVENT_HAVE_INTTYPES_H
     46 #include <inttypes.h>
     47 #endif
     48 
     49 /* Some platforms apparently define the udata field of struct kevent as
     50  * intptr_t, whereas others define it as void*.  There doesn't seem to be an
     51  * easy way to tell them apart via autoconf, so we need to use OS macros. */
     52 #if defined(_EVENT_HAVE_INTTYPES_H) && !defined(__OpenBSD__) && !defined(__FreeBSD__) && !defined(__darwin__) && !defined(__APPLE__)
     53 #define PTR_TO_UDATA(x)	((intptr_t)(x))
     54 #define INT_TO_UDATA(x) ((intptr_t)(x))
     55 #else
     56 #define PTR_TO_UDATA(x)	(x)
     57 #define INT_TO_UDATA(x) ((void*)(x))
     58 #endif
     59 
     60 #include "event-internal.h"
     61 #include "log-internal.h"
     62 #include "evmap-internal.h"
     63 #include "event2/thread.h"
     64 #include "evthread-internal.h"
     65 #include "changelist-internal.h"
     66 
     67 #define NEVENT		64
     68 
     69 struct kqop {
     70 	struct kevent *changes;
     71 	int changes_size;
     72 
     73 	struct kevent *events;
     74 	int events_size;
     75 	int kq;
     76 	pid_t pid;
     77 };
     78 
     79 static void kqop_free(struct kqop *kqop);
     80 
     81 static void *kq_init(struct event_base *);
     82 static int kq_sig_add(struct event_base *, int, short, short, void *);
     83 static int kq_sig_del(struct event_base *, int, short, short, void *);
     84 static int kq_dispatch(struct event_base *, struct timeval *);
     85 static void kq_dealloc(struct event_base *);
     86 
     87 const struct eventop kqops = {
     88 	"kqueue",
     89 	kq_init,
     90 	event_changelist_add,
     91 	event_changelist_del,
     92 	kq_dispatch,
     93 	kq_dealloc,
     94 	1 /* need reinit */,
     95     EV_FEATURE_ET|EV_FEATURE_O1|EV_FEATURE_FDS,
     96 	EVENT_CHANGELIST_FDINFO_SIZE
     97 };
     98 
     99 static const struct eventop kqsigops = {
    100 	"kqueue_signal",
    101 	NULL,
    102 	kq_sig_add,
    103 	kq_sig_del,
    104 	NULL,
    105 	NULL,
    106 	1 /* need reinit */,
    107 	0,
    108 	0
    109 };
    110 
    111 static void *
    112 kq_init(struct event_base *base)
    113 {
    114 	int kq = -1;
    115 	struct kqop *kqueueop = NULL;
    116 
    117 	if (!(kqueueop = mm_calloc(1, sizeof(struct kqop))))
    118 		return (NULL);
    119 
    120 /* Initialize the kernel queue */
    121 
    122 	if ((kq = kqueue()) == -1) {
    123 		event_warn("kqueue");
    124 		goto err;
    125 	}
    126 
    127 	kqueueop->kq = kq;
    128 
    129 	kqueueop->pid = getpid();
    130 
    131 	/* Initialize fields */
    132 	kqueueop->changes = mm_calloc(NEVENT, sizeof(struct kevent));
    133 	if (kqueueop->changes == NULL)
    134 		goto err;
    135 	kqueueop->events = mm_calloc(NEVENT, sizeof(struct kevent));
    136 	if (kqueueop->events == NULL)
    137 		goto err;
    138 	kqueueop->events_size = kqueueop->changes_size = NEVENT;
    139 
    140 	/* Check for Mac OS X kqueue bug. */
    141 	memset(&kqueueop->changes[0], 0, sizeof kqueueop->changes[0]);
    142 	kqueueop->changes[0].ident = -1;
    143 	kqueueop->changes[0].filter = EVFILT_READ;
    144 	kqueueop->changes[0].flags = EV_ADD;
    145 	/*
    146 	 * If kqueue works, then kevent will succeed, and it will
    147 	 * stick an error in events[0].  If kqueue is broken, then
    148 	 * kevent will fail.
    149 	 */
    150 	if (kevent(kq,
    151 		kqueueop->changes, 1, kqueueop->events, NEVENT, NULL) != 1 ||
    152 	    (int)kqueueop->events[0].ident != -1 ||
    153 	    kqueueop->events[0].flags != EV_ERROR) {
    154 		event_warn("%s: detected broken kqueue; not using.", __func__);
    155 		goto err;
    156 	}
    157 
    158 	base->evsigsel = &kqsigops;
    159 
    160 	return (kqueueop);
    161 err:
    162 	if (kqueueop)
    163 		kqop_free(kqueueop);
    164 
    165 	return (NULL);
    166 }
    167 
    168 static void
    169 kq_sighandler(int sig)
    170 {
    171 	/* Do nothing here */
    172 }
    173 
    174 #define ADD_UDATA 0x30303
    175 
    176 static void
    177 kq_setup_kevent(struct kevent *out, evutil_socket_t fd, int filter, short change)
    178 {
    179 	memset(out, 0, sizeof(struct kevent));
    180 	out->ident = fd;
    181 	out->filter = filter;
    182 
    183 	if (change & EV_CHANGE_ADD) {
    184 		out->flags = EV_ADD;
    185 		/* We set a magic number here so that we can tell 'add'
    186 		 * errors from 'del' errors. */
    187 		out->udata = INT_TO_UDATA(ADD_UDATA);
    188 		if (change & EV_ET)
    189 			out->flags |= EV_CLEAR;
    190 #ifdef NOTE_EOF
    191 		/* Make it behave like select() and poll() */
    192 		if (filter == EVFILT_READ)
    193 			out->fflags = NOTE_EOF;
    194 #endif
    195 	} else {
    196 		EVUTIL_ASSERT(change & EV_CHANGE_DEL);
    197 		out->flags = EV_DELETE;
    198 	}
    199 }
    200 
    201 static int
    202 kq_build_changes_list(const struct event_changelist *changelist,
    203     struct kqop *kqop)
    204 {
    205 	int i;
    206 	int n_changes = 0;
    207 
    208 	for (i = 0; i < changelist->n_changes; ++i) {
    209 		struct event_change *in_ch = &changelist->changes[i];
    210 		struct kevent *out_ch;
    211 		if (n_changes >= kqop->changes_size - 1) {
    212 			int newsize = kqop->changes_size * 2;
    213 			struct kevent *newchanges;
    214 
    215 			newchanges = mm_realloc(kqop->changes,
    216 			    newsize * sizeof(struct kevent));
    217 			if (newchanges == NULL) {
    218 				event_warn("%s: realloc", __func__);
    219 				return (-1);
    220 			}
    221 			kqop->changes = newchanges;
    222 			kqop->changes_size = newsize;
    223 		}
    224 		if (in_ch->read_change) {
    225 			out_ch = &kqop->changes[n_changes++];
    226 			kq_setup_kevent(out_ch, in_ch->fd, EVFILT_READ,
    227 			    in_ch->read_change);
    228 		}
    229 		if (in_ch->write_change) {
    230 			out_ch = &kqop->changes[n_changes++];
    231 			kq_setup_kevent(out_ch, in_ch->fd, EVFILT_WRITE,
    232 			    in_ch->write_change);
    233 		}
    234 	}
    235 	return n_changes;
    236 }
    237 
    238 static int
    239 kq_grow_events(struct kqop *kqop, size_t new_size)
    240 {
    241 	struct kevent *newresult;
    242 
    243 	newresult = mm_realloc(kqop->events,
    244 	    new_size * sizeof(struct kevent));
    245 
    246 	if (newresult) {
    247 		kqop->events = newresult;
    248 		kqop->events_size = new_size;
    249 		return 0;
    250 	} else {
    251 		return -1;
    252 	}
    253 }
    254 
    255 static int
    256 kq_dispatch(struct event_base *base, struct timeval *tv)
    257 {
    258 	struct kqop *kqop = base->evbase;
    259 	struct kevent *events = kqop->events;
    260 	struct kevent *changes;
    261 	struct timespec ts, *ts_p = NULL;
    262 	int i, n_changes, res;
    263 
    264 	if (tv != NULL) {
    265 		TIMEVAL_TO_TIMESPEC(tv, &ts);
    266 		ts_p = &ts;
    267 	}
    268 
    269 	/* Build "changes" from "base->changes" */
    270 	EVUTIL_ASSERT(kqop->changes);
    271 	n_changes = kq_build_changes_list(&base->changelist, kqop);
    272 	if (n_changes < 0)
    273 		return -1;
    274 
    275 	event_changelist_remove_all(&base->changelist, base);
    276 
    277 	/* steal the changes array in case some broken code tries to call
    278 	 * dispatch twice at once. */
    279 	changes = kqop->changes;
    280 	kqop->changes = NULL;
    281 
    282 	/* Make sure that 'events' is at least as long as the list of changes:
    283 	 * otherwise errors in the changes can get reported as a -1 return
    284 	 * value from kevent() rather than as EV_ERROR events in the events
    285 	 * array.
    286 	 *
    287 	 * (We could instead handle -1 return values from kevent() by
    288 	 * retrying with a smaller changes array or a larger events array,
    289 	 * but this approach seems less risky for now.)
    290 	 */
    291 	if (kqop->events_size < n_changes) {
    292 		int new_size = kqop->events_size;
    293 		do {
    294 			new_size *= 2;
    295 		} while (new_size < n_changes);
    296 
    297 		kq_grow_events(kqop, new_size);
    298 		events = kqop->events;
    299 	}
    300 
    301 	EVBASE_RELEASE_LOCK(base, th_base_lock);
    302 
    303 	res = kevent(kqop->kq, changes, n_changes,
    304 	    events, kqop->events_size, ts_p);
    305 
    306 	EVBASE_ACQUIRE_LOCK(base, th_base_lock);
    307 
    308 	EVUTIL_ASSERT(kqop->changes == NULL);
    309 	kqop->changes = changes;
    310 
    311 	if (res == -1) {
    312 		if (errno != EINTR) {
    313 			event_warn("kevent");
    314 			return (-1);
    315 		}
    316 
    317 		return (0);
    318 	}
    319 
    320 	event_debug(("%s: kevent reports %d", __func__, res));
    321 
    322 	for (i = 0; i < res; i++) {
    323 		int which = 0;
    324 
    325 		if (events[i].flags & EV_ERROR) {
    326 			switch (events[i].data) {
    327 
    328 			/* Can occur on delete if we are not currently
    329 			 * watching any events on this fd.  That can
    330 			 * happen when the fd was closed and another
    331 			 * file was opened with that fd. */
    332 			case ENOENT:
    333 			/* Can occur for reasons not fully understood
    334 			 * on FreeBSD. */
    335 			case EINVAL:
    336 				continue;
    337 
    338 			/* Can occur on a delete if the fd is closed. */
    339 			case EBADF:
    340 				/* XXXX On NetBSD, we can also get EBADF if we
    341 				 * try to add the write side of a pipe, but
    342 				 * the read side has already been closed.
    343 				 * Other BSDs call this situation 'EPIPE'. It
    344 				 * would be good if we had a way to report
    345 				 * this situation. */
    346 				continue;
    347 			/* These two can occur on an add if the fd was one side
    348 			 * of a pipe, and the other side was closed. */
    349 			case EPERM:
    350 			case EPIPE:
    351 				/* Report read events, if we're listening for
    352 				 * them, so that the user can learn about any
    353 				 * add errors.  (If the operation was a
    354 				 * delete, then udata should be cleared.) */
    355 				if (events[i].udata) {
    356 					/* The operation was an add:
    357 					 * report the error as a read. */
    358 					which |= EV_READ;
    359 					break;
    360 				} else {
    361 					/* The operation was a del:
    362 					 * report nothing. */
    363 					continue;
    364 				}
    365 
    366 			/* Other errors shouldn't occur. */
    367 			default:
    368 				errno = events[i].data;
    369 				return (-1);
    370 			}
    371 		} else if (events[i].filter == EVFILT_READ) {
    372 			which |= EV_READ;
    373 		} else if (events[i].filter == EVFILT_WRITE) {
    374 			which |= EV_WRITE;
    375 		} else if (events[i].filter == EVFILT_SIGNAL) {
    376 			which |= EV_SIGNAL;
    377 		}
    378 
    379 		if (!which)
    380 			continue;
    381 
    382 		if (events[i].filter == EVFILT_SIGNAL) {
    383 			evmap_signal_active(base, events[i].ident, 1);
    384 		} else {
    385 			evmap_io_active(base, events[i].ident, which | EV_ET);
    386 		}
    387 	}
    388 
    389 	if (res == kqop->events_size) {
    390 		/* We used all the events space that we have. Maybe we should
    391 		   make it bigger. */
    392 		kq_grow_events(kqop, kqop->events_size * 2);
    393 	}
    394 
    395 	return (0);
    396 }
    397 
    398 static void
    399 kqop_free(struct kqop *kqop)
    400 {
    401 	if (kqop->changes)
    402 		mm_free(kqop->changes);
    403 	if (kqop->events)
    404 		mm_free(kqop->events);
    405 	if (kqop->kq >= 0 && kqop->pid == getpid())
    406 		close(kqop->kq);
    407 	memset(kqop, 0, sizeof(struct kqop));
    408 	mm_free(kqop);
    409 }
    410 
    411 static void
    412 kq_dealloc(struct event_base *base)
    413 {
    414 	struct kqop *kqop = base->evbase;
    415 	evsig_dealloc(base);
    416 	kqop_free(kqop);
    417 }
    418 
    419 /* signal handling */
    420 static int
    421 kq_sig_add(struct event_base *base, int nsignal, short old, short events, void *p)
    422 {
    423 	struct kqop *kqop = base->evbase;
    424 	struct kevent kev;
    425 	struct timespec timeout = { 0, 0 };
    426 	(void)p;
    427 
    428 	EVUTIL_ASSERT(nsignal >= 0 && nsignal < NSIG);
    429 
    430 	memset(&kev, 0, sizeof(kev));
    431 	kev.ident = nsignal;
    432 	kev.filter = EVFILT_SIGNAL;
    433 	kev.flags = EV_ADD;
    434 
    435 	/* Be ready for the signal if it is sent any
    436 	 * time between now and the next call to
    437 	 * kq_dispatch. */
    438 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
    439 		return (-1);
    440 
    441 	/* XXXX The manpage suggest we could use SIG_IGN instead of a
    442 	 * do-nothing handler */
    443 	if (_evsig_set_handler(base, nsignal, kq_sighandler) == -1)
    444 		return (-1);
    445 
    446 	return (0);
    447 }
    448 
    449 static int
    450 kq_sig_del(struct event_base *base, int nsignal, short old, short events, void *p)
    451 {
    452 	struct kqop *kqop = base->evbase;
    453 	struct kevent kev;
    454 
    455 	struct timespec timeout = { 0, 0 };
    456 	(void)p;
    457 
    458 	EVUTIL_ASSERT(nsignal >= 0 && nsignal < NSIG);
    459 
    460 	memset(&kev, 0, sizeof(kev));
    461 	kev.ident = nsignal;
    462 	kev.filter = EVFILT_SIGNAL;
    463 	kev.flags = EV_DELETE;
    464 
    465 	/* Because we insert signal events
    466 	 * immediately, we need to delete them
    467 	 * immediately, too */
    468 	if (kevent(kqop->kq, &kev, 1, NULL, 0, &timeout) == -1)
    469 		return (-1);
    470 
    471 	if (_evsig_restore_handler(base, nsignal) == -1)
    472 		return (-1);
    473 
    474 	return (0);
    475 }
    476