Home | History | Annotate | Download | only in libevent
      1 /*
      2  * Submitted by David Pacheco (dp.spambait (at) gmail.com)
      3  *
      4  * Copyright 2006-2007 Niels Provos
      5  * Copyright 2007-2012 Niels Provos and Nick Mathewson
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. The name of the author may not be used to endorse or promote products
     16  *    derived from this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY SUN MICROSYSTEMS, INC. ``AS IS'' AND ANY
     19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     21  * DISCLAIMED. IN NO EVENT SHALL SUN MICROSYSTEMS, INC. BE LIABLE FOR ANY
     22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     25  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 
     30 /*
     31  * Copyright (c) 2007 Sun Microsystems. All rights reserved.
     32  * Use is subject to license terms.
     33  */
     34 
     35 /*
     36  * evport.c: event backend using Solaris 10 event ports. See port_create(3C).
     37  * This implementation is loosely modeled after the one used for select(2) (in
     38  * select.c).
     39  *
     40  * The outstanding events are tracked in a data structure called evport_data.
     41  * Each entry in the ed_fds array corresponds to a file descriptor, and contains
     42  * pointers to the read and write events that correspond to that fd. (That is,
     43  * when the file is readable, the "read" event should handle it, etc.)
     44  *
     45  * evport_add and evport_del update this data structure. evport_dispatch uses it
     46  * to determine where to callback when an event occurs (which it gets from
     47  * port_getn).
     48  *
     49  * Helper functions are used: grow() grows the file descriptor array as
     50  * necessary when large fd's come in. reassociate() takes care of maintaining
     51  * the proper file-descriptor/event-port associations.
     52  *
     53  * As in the select(2) implementation, signals are handled by evsignal.
     54  */
     55 
     56 #include "event2/event-config.h"
     57 #include "evconfig-private.h"
     58 
     59 #ifdef EVENT__HAVE_EVENT_PORTS
     60 
     61 #include <sys/time.h>
     62 #include <sys/queue.h>
     63 #include <errno.h>
     64 #include <poll.h>
     65 #include <port.h>
     66 #include <signal.h>
     67 #include <stdio.h>
     68 #include <stdlib.h>
     69 #include <string.h>
     70 #include <time.h>
     71 #include <unistd.h>
     72 
     73 #include "event2/thread.h"
     74 
     75 #include "evthread-internal.h"
     76 #include "event-internal.h"
     77 #include "log-internal.h"
     78 #include "evsignal-internal.h"
     79 #include "evmap-internal.h"
     80 
     81 #define INITIAL_EVENTS_PER_GETN 8
     82 #define MAX_EVENTS_PER_GETN 4096
     83 
     84 /*
     85  * Per-file-descriptor information about what events we're subscribed to. These
     86  * fields are NULL if no event is subscribed to either of them.
     87  */
     88 
     89 struct fd_info {
     90 	/* combinations of EV_READ and EV_WRITE */
     91 	short fdi_what;
     92 	/* Index of this fd within ed_pending, plus 1.  Zero if this fd is
     93 	 * not in ed_pending.  (The +1 is a hack so that memset(0) will set
     94 	 * it to a nil index. */
     95 	int pending_idx_plus_1;
     96 };
     97 
     98 #define FDI_HAS_READ(fdi)  ((fdi)->fdi_what & EV_READ)
     99 #define FDI_HAS_WRITE(fdi) ((fdi)->fdi_what & EV_WRITE)
    100 #define FDI_HAS_EVENTS(fdi) (FDI_HAS_READ(fdi) || FDI_HAS_WRITE(fdi))
    101 #define FDI_TO_SYSEVENTS(fdi) (FDI_HAS_READ(fdi) ? POLLIN : 0) | \
    102     (FDI_HAS_WRITE(fdi) ? POLLOUT : 0)
    103 
    104 struct evport_data {
    105 	int		ed_port;	/* event port for system events  */
    106 	/* How many elements of ed_pending should we look at? */
    107 	int ed_npending;
    108 	/* How many elements are allocated in ed_pending and pevtlist? */
    109 	int ed_maxevents;
    110 	/* fdi's that we need to reassoc */
    111 	int *ed_pending;
    112 	/* storage space for incoming events. */
    113 	port_event_t *ed_pevtlist;
    114 
    115 };
    116 
    117 static void*	evport_init(struct event_base *);
    118 static int evport_add(struct event_base *, int fd, short old, short events, void *);
    119 static int evport_del(struct event_base *, int fd, short old, short events, void *);
    120 static int	evport_dispatch(struct event_base *, struct timeval *);
    121 static void	evport_dealloc(struct event_base *);
    122 static int	grow(struct evport_data *, int min_events);
    123 
    124 const struct eventop evportops = {
    125 	"evport",
    126 	evport_init,
    127 	evport_add,
    128 	evport_del,
    129 	evport_dispatch,
    130 	evport_dealloc,
    131 	1, /* need reinit */
    132 	0, /* features */
    133 	sizeof(struct fd_info), /* fdinfo length */
    134 };
    135 
    136 /*
    137  * Initialize the event port implementation.
    138  */
    139 
    140 static void*
    141 evport_init(struct event_base *base)
    142 {
    143 	struct evport_data *evpd;
    144 
    145 	if (!(evpd = mm_calloc(1, sizeof(struct evport_data))))
    146 		return (NULL);
    147 
    148 	if ((evpd->ed_port = port_create()) == -1) {
    149 		mm_free(evpd);
    150 		return (NULL);
    151 	}
    152 
    153 	if (grow(evpd, INITIAL_EVENTS_PER_GETN) < 0) {
    154 		close(evpd->ed_port);
    155 		mm_free(evpd);
    156 		return NULL;
    157 	}
    158 
    159 	evpd->ed_npending = 0;
    160 
    161 	evsig_init_(base);
    162 
    163 	return (evpd);
    164 }
    165 
    166 static int
    167 grow(struct evport_data *data, int min_events)
    168 {
    169 	int newsize;
    170 	int *new_pending;
    171 	port_event_t *new_pevtlist;
    172 	if (data->ed_maxevents) {
    173 		newsize = data->ed_maxevents;
    174 		do {
    175 			newsize *= 2;
    176 		} while (newsize < min_events);
    177 	} else {
    178 		newsize = min_events;
    179 	}
    180 
    181 	new_pending = mm_realloc(data->ed_pending, sizeof(int)*newsize);
    182 	if (new_pending == NULL)
    183 		return -1;
    184 	data->ed_pending = new_pending;
    185 	new_pevtlist = mm_realloc(data->ed_pevtlist, sizeof(port_event_t)*newsize);
    186 	if (new_pevtlist == NULL)
    187 		return -1;
    188 	data->ed_pevtlist = new_pevtlist;
    189 
    190 	data->ed_maxevents = newsize;
    191 	return 0;
    192 }
    193 
    194 #ifdef CHECK_INVARIANTS
    195 /*
    196  * Checks some basic properties about the evport_data structure. Because it
    197  * checks all file descriptors, this function can be expensive when the maximum
    198  * file descriptor ever used is rather large.
    199  */
    200 
    201 static void
    202 check_evportop(struct evport_data *evpd)
    203 {
    204 	EVUTIL_ASSERT(evpd);
    205 	EVUTIL_ASSERT(evpd->ed_port > 0);
    206 }
    207 
    208 /*
    209  * Verifies very basic integrity of a given port_event.
    210  */
    211 static void
    212 check_event(port_event_t* pevt)
    213 {
    214 	/*
    215 	 * We've only registered for PORT_SOURCE_FD events. The only
    216 	 * other thing we can legitimately receive is PORT_SOURCE_ALERT,
    217 	 * but since we're not using port_alert either, we can assume
    218 	 * PORT_SOURCE_FD.
    219 	 */
    220 	EVUTIL_ASSERT(pevt->portev_source == PORT_SOURCE_FD);
    221 }
    222 
    223 #else
    224 #define check_evportop(epop)
    225 #define check_event(pevt)
    226 #endif /* CHECK_INVARIANTS */
    227 
    228 /*
    229  * (Re)associates the given file descriptor with the event port. The OS events
    230  * are specified (implicitly) from the fd_info struct.
    231  */
    232 static int
    233 reassociate(struct evport_data *epdp, struct fd_info *fdip, int fd)
    234 {
    235 	int sysevents = FDI_TO_SYSEVENTS(fdip);
    236 
    237 	if (sysevents != 0) {
    238 		if (port_associate(epdp->ed_port, PORT_SOURCE_FD,
    239 				   fd, sysevents, fdip) == -1) {
    240 			event_warn("port_associate");
    241 			return (-1);
    242 		}
    243 	}
    244 
    245 	check_evportop(epdp);
    246 
    247 	return (0);
    248 }
    249 
    250 /*
    251  * Main event loop - polls port_getn for some number of events, and processes
    252  * them.
    253  */
    254 
    255 static int
    256 evport_dispatch(struct event_base *base, struct timeval *tv)
    257 {
    258 	int i, res;
    259 	struct evport_data *epdp = base->evbase;
    260 	port_event_t *pevtlist = epdp->ed_pevtlist;
    261 
    262 	/*
    263 	 * port_getn will block until it has at least nevents events. It will
    264 	 * also return how many it's given us (which may be more than we asked
    265 	 * for, as long as it's less than our maximum (ed_maxevents)) in
    266 	 * nevents.
    267 	 */
    268 	int nevents = 1;
    269 
    270 	/*
    271 	 * We have to convert a struct timeval to a struct timespec
    272 	 * (only difference is nanoseconds vs. microseconds). If no time-based
    273 	 * events are active, we should wait for I/O (and tv == NULL).
    274 	 */
    275 	struct timespec ts;
    276 	struct timespec *ts_p = NULL;
    277 	if (tv != NULL) {
    278 		ts.tv_sec = tv->tv_sec;
    279 		ts.tv_nsec = tv->tv_usec * 1000;
    280 		ts_p = &ts;
    281 	}
    282 
    283 	/*
    284 	 * Before doing anything else, we need to reassociate the events we hit
    285 	 * last time which need reassociation. See comment at the end of the
    286 	 * loop below.
    287 	 */
    288 	for (i = 0; i < epdp->ed_npending; ++i) {
    289 		struct fd_info *fdi = NULL;
    290 		const int fd = epdp->ed_pending[i];
    291 		if (fd != -1) {
    292 			/* We might have cleared out this event; we need
    293 			 * to be sure that it's still set. */
    294 			fdi = evmap_io_get_fdinfo_(&base->io, fd);
    295 		}
    296 
    297 		if (fdi != NULL && FDI_HAS_EVENTS(fdi)) {
    298 			reassociate(epdp, fdi, fd);
    299 			/* epdp->ed_pending[i] = -1; */
    300 			fdi->pending_idx_plus_1 = 0;
    301 		}
    302 	}
    303 
    304 	EVBASE_RELEASE_LOCK(base, th_base_lock);
    305 
    306 	res = port_getn(epdp->ed_port, pevtlist, epdp->ed_maxevents,
    307 	    (unsigned int *) &nevents, ts_p);
    308 
    309 	EVBASE_ACQUIRE_LOCK(base, th_base_lock);
    310 
    311 	if (res == -1) {
    312 		if (errno == EINTR || errno == EAGAIN) {
    313 			return (0);
    314 		} else if (errno == ETIME) {
    315 			if (nevents == 0)
    316 				return (0);
    317 		} else {
    318 			event_warn("port_getn");
    319 			return (-1);
    320 		}
    321 	}
    322 
    323 	event_debug(("%s: port_getn reports %d events", __func__, nevents));
    324 
    325 	for (i = 0; i < nevents; ++i) {
    326 		port_event_t *pevt = &pevtlist[i];
    327 		int fd = (int) pevt->portev_object;
    328 		struct fd_info *fdi = pevt->portev_user;
    329 		/*EVUTIL_ASSERT(evmap_io_get_fdinfo_(&base->io, fd) == fdi);*/
    330 
    331 		check_evportop(epdp);
    332 		check_event(pevt);
    333 		epdp->ed_pending[i] = fd;
    334 		fdi->pending_idx_plus_1 = i + 1;
    335 
    336 		/*
    337 		 * Figure out what kind of event it was
    338 		 * (because we have to pass this to the callback)
    339 		 */
    340 		res = 0;
    341 		if (pevt->portev_events & (POLLERR|POLLHUP)) {
    342 			res = EV_READ | EV_WRITE;
    343 		} else {
    344 			if (pevt->portev_events & POLLIN)
    345 				res |= EV_READ;
    346 			if (pevt->portev_events & POLLOUT)
    347 				res |= EV_WRITE;
    348 		}
    349 
    350 		/*
    351 		 * Check for the error situations or a hangup situation
    352 		 */
    353 		if (pevt->portev_events & (POLLERR|POLLHUP|POLLNVAL))
    354 			res |= EV_READ|EV_WRITE;
    355 
    356 		evmap_io_active_(base, fd, res);
    357 	} /* end of all events gotten */
    358 	epdp->ed_npending = nevents;
    359 
    360 	if (nevents == epdp->ed_maxevents &&
    361 	    epdp->ed_maxevents < MAX_EVENTS_PER_GETN) {
    362 		/* we used all the space this time.  We should be ready
    363 		 * for more events next time around. */
    364 		grow(epdp, epdp->ed_maxevents * 2);
    365 	}
    366 
    367 	check_evportop(epdp);
    368 
    369 	return (0);
    370 }
    371 
    372 
    373 /*
    374  * Adds the given event (so that you will be notified when it happens via
    375  * the callback function).
    376  */
    377 
    378 static int
    379 evport_add(struct event_base *base, int fd, short old, short events, void *p)
    380 {
    381 	struct evport_data *evpd = base->evbase;
    382 	struct fd_info *fdi = p;
    383 
    384 	check_evportop(evpd);
    385 
    386 	fdi->fdi_what |= events;
    387 
    388 	return reassociate(evpd, fdi, fd);
    389 }
    390 
    391 /*
    392  * Removes the given event from the list of events to wait for.
    393  */
    394 
    395 static int
    396 evport_del(struct event_base *base, int fd, short old, short events, void *p)
    397 {
    398 	struct evport_data *evpd = base->evbase;
    399 	struct fd_info *fdi = p;
    400 	int associated = ! fdi->pending_idx_plus_1;
    401 
    402 	check_evportop(evpd);
    403 
    404 	fdi->fdi_what &= ~(events &(EV_READ|EV_WRITE));
    405 
    406 	if (associated) {
    407 		if (!FDI_HAS_EVENTS(fdi) &&
    408 		    port_dissociate(evpd->ed_port, PORT_SOURCE_FD, fd) == -1) {
    409 			/*
    410 			 * Ignore EBADFD error the fd could have been closed
    411 			 * before event_del() was called.
    412 			 */
    413 			if (errno != EBADFD) {
    414 				event_warn("port_dissociate");
    415 				return (-1);
    416 			}
    417 		} else {
    418 			if (FDI_HAS_EVENTS(fdi)) {
    419 				return (reassociate(evpd, fdi, fd));
    420 			}
    421 		}
    422 	} else {
    423 		if ((fdi->fdi_what & (EV_READ|EV_WRITE)) == 0) {
    424 			const int i = fdi->pending_idx_plus_1 - 1;
    425 			EVUTIL_ASSERT(evpd->ed_pending[i] == fd);
    426 			evpd->ed_pending[i] = -1;
    427 			fdi->pending_idx_plus_1 = 0;
    428 		}
    429 	}
    430 	return 0;
    431 }
    432 
    433 
    434 static void
    435 evport_dealloc(struct event_base *base)
    436 {
    437 	struct evport_data *evpd = base->evbase;
    438 
    439 	evsig_dealloc_(base);
    440 
    441 	close(evpd->ed_port);
    442 
    443 	if (evpd->ed_pending)
    444 		mm_free(evpd->ed_pending);
    445 	if (evpd->ed_pevtlist)
    446 		mm_free(evpd->ed_pevtlist);
    447 
    448 	mm_free(evpd);
    449 }
    450 
    451 #endif /* EVENT__HAVE_EVENT_PORTS */
    452