Home | History | Annotate | Download | only in libevent
      1 /*
      2  * Submitted by David Pacheco (dp.spambait (at) gmail.com)
      3  *
      4  * Copyright 2006-2007 Niels Provos
      5  * Copyright 2007-2012 Niels Provos and Nick Mathewson
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. The name of the author may not be used to endorse or promote products
     16  *    derived from this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY SUN MICROSYSTEMS, INC. ``AS IS'' AND ANY
     19  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
     20  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     21  * DISCLAIMED. IN NO EVENT SHALL SUN MICROSYSTEMS, INC. BE LIABLE FOR ANY
     22  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     23  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     24  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
     25  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     27  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 
     30 /*
     31  * Copyright (c) 2007 Sun Microsystems. All rights reserved.
     32  * Use is subject to license terms.
     33  */
     34 
     35 /*
     36  * evport.c: event backend using Solaris 10 event ports. See port_create(3C).
     37  * This implementation is loosely modeled after the one used for select(2) (in
     38  * select.c).
     39  *
     40  * The outstanding events are tracked in a data structure called evport_data.
     41  * Each entry in the ed_fds array corresponds to a file descriptor, and contains
     42  * pointers to the read and write events that correspond to that fd. (That is,
     43  * when the file is readable, the "read" event should handle it, etc.)
     44  *
     45  * evport_add and evport_del update this data structure. evport_dispatch uses it
     46  * to determine where to callback when an event occurs (which it gets from
     47  * port_getn).
     48  *
     49  * Helper functions are used: grow() grows the file descriptor array as
     50  * necessary when large fd's come in. reassociate() takes care of maintaining
     51  * the proper file-descriptor/event-port associations.
     52  *
     53  * As in the select(2) implementation, signals are handled by evsignal.
     54  */
     55 
     56 #include "event2/event-config.h"
     57 
     58 #include <sys/time.h>
     59 #include <sys/queue.h>
     60 #include <errno.h>
     61 #include <poll.h>
     62 #include <port.h>
     63 #include <signal.h>
     64 #include <stdio.h>
     65 #include <stdlib.h>
     66 #include <string.h>
     67 #include <time.h>
     68 #include <unistd.h>
     69 
     70 #include "event2/thread.h"
     71 
     72 #include "evthread-internal.h"
     73 #include "event-internal.h"
     74 #include "log-internal.h"
     75 #include "evsignal-internal.h"
     76 #include "evmap-internal.h"
     77 
     78 /*
     79  * Default value for ed_nevents, which is the maximum file descriptor number we
     80  * can handle. If an event comes in for a file descriptor F > nevents, we will
     81  * grow the array of file descriptors, doubling its size.
     82  */
     83 #define DEFAULT_NFDS	16
     84 
     85 
     86 /*
     87  * EVENTS_PER_GETN is the maximum number of events to retrieve from port_getn on
     88  * any particular call. You can speed things up by increasing this, but it will
     89  * (obviously) require more memory.
     90  */
     91 #define EVENTS_PER_GETN 8
     92 
     93 /*
     94  * Per-file-descriptor information about what events we're subscribed to. These
     95  * fields are NULL if no event is subscribed to either of them.
     96  */
     97 
     98 struct fd_info {
     99 	short fdi_what;		/* combinations of EV_READ and EV_WRITE */
    100 };
    101 
    102 #define FDI_HAS_READ(fdi)  ((fdi)->fdi_what & EV_READ)
    103 #define FDI_HAS_WRITE(fdi) ((fdi)->fdi_what & EV_WRITE)
    104 #define FDI_HAS_EVENTS(fdi) (FDI_HAS_READ(fdi) || FDI_HAS_WRITE(fdi))
    105 #define FDI_TO_SYSEVENTS(fdi) (FDI_HAS_READ(fdi) ? POLLIN : 0) | \
    106     (FDI_HAS_WRITE(fdi) ? POLLOUT : 0)
    107 
    108 struct evport_data {
    109 	int		ed_port;	/* event port for system events  */
    110 	int		ed_nevents;	/* number of allocated fdi's	 */
    111 	struct fd_info *ed_fds;		/* allocated fdi table		 */
    112 	/* fdi's that we need to reassoc */
    113 	int ed_pending[EVENTS_PER_GETN]; /* fd's with pending events */
    114 };
    115 
    116 static void*	evport_init(struct event_base *);
    117 static int evport_add(struct event_base *, int fd, short old, short events, void *);
    118 static int evport_del(struct event_base *, int fd, short old, short events, void *);
    119 static int	evport_dispatch(struct event_base *, struct timeval *);
    120 static void	evport_dealloc(struct event_base *);
    121 
    122 const struct eventop evportops = {
    123 	"evport",
    124 	evport_init,
    125 	evport_add,
    126 	evport_del,
    127 	evport_dispatch,
    128 	evport_dealloc,
    129 	1, /* need reinit */
    130 	0, /* features */
    131 	0, /* fdinfo length */
    132 };
    133 
    134 /*
    135  * Initialize the event port implementation.
    136  */
    137 
    138 static void*
    139 evport_init(struct event_base *base)
    140 {
    141 	struct evport_data *evpd;
    142 	int i;
    143 
    144 	if (!(evpd = mm_calloc(1, sizeof(struct evport_data))))
    145 		return (NULL);
    146 
    147 	if ((evpd->ed_port = port_create()) == -1) {
    148 		mm_free(evpd);
    149 		return (NULL);
    150 	}
    151 
    152 	/*
    153 	 * Initialize file descriptor structure
    154 	 */
    155 	evpd->ed_fds = mm_calloc(DEFAULT_NFDS, sizeof(struct fd_info));
    156 	if (evpd->ed_fds == NULL) {
    157 		close(evpd->ed_port);
    158 		mm_free(evpd);
    159 		return (NULL);
    160 	}
    161 	evpd->ed_nevents = DEFAULT_NFDS;
    162 	for (i = 0; i < EVENTS_PER_GETN; i++)
    163 		evpd->ed_pending[i] = -1;
    164 
    165 	evsig_init(base);
    166 
    167 	return (evpd);
    168 }
    169 
    170 #ifdef CHECK_INVARIANTS
    171 /*
    172  * Checks some basic properties about the evport_data structure. Because it
    173  * checks all file descriptors, this function can be expensive when the maximum
    174  * file descriptor ever used is rather large.
    175  */
    176 
    177 static void
    178 check_evportop(struct evport_data *evpd)
    179 {
    180 	EVUTIL_ASSERT(evpd);
    181 	EVUTIL_ASSERT(evpd->ed_nevents > 0);
    182 	EVUTIL_ASSERT(evpd->ed_port > 0);
    183 	EVUTIL_ASSERT(evpd->ed_fds > 0);
    184 }
    185 
    186 /*
    187  * Verifies very basic integrity of a given port_event.
    188  */
    189 static void
    190 check_event(port_event_t* pevt)
    191 {
    192 	/*
    193 	 * We've only registered for PORT_SOURCE_FD events. The only
    194 	 * other thing we can legitimately receive is PORT_SOURCE_ALERT,
    195 	 * but since we're not using port_alert either, we can assume
    196 	 * PORT_SOURCE_FD.
    197 	 */
    198 	EVUTIL_ASSERT(pevt->portev_source == PORT_SOURCE_FD);
    199 	EVUTIL_ASSERT(pevt->portev_user == NULL);
    200 }
    201 
    202 #else
    203 #define check_evportop(epop)
    204 #define check_event(pevt)
    205 #endif /* CHECK_INVARIANTS */
    206 
    207 /*
    208  * Doubles the size of the allocated file descriptor array.
    209  */
    210 static int
    211 grow(struct evport_data *epdp, int factor)
    212 {
    213 	struct fd_info *tmp;
    214 	int oldsize = epdp->ed_nevents;
    215 	int newsize = factor * oldsize;
    216 	EVUTIL_ASSERT(factor > 1);
    217 
    218 	check_evportop(epdp);
    219 
    220 	tmp = mm_realloc(epdp->ed_fds, sizeof(struct fd_info) * newsize);
    221 	if (NULL == tmp)
    222 		return -1;
    223 	epdp->ed_fds = tmp;
    224 	memset((char*) (epdp->ed_fds + oldsize), 0,
    225 	    (newsize - oldsize)*sizeof(struct fd_info));
    226 	epdp->ed_nevents = newsize;
    227 
    228 	check_evportop(epdp);
    229 
    230 	return 0;
    231 }
    232 
    233 
    234 /*
    235  * (Re)associates the given file descriptor with the event port. The OS events
    236  * are specified (implicitly) from the fd_info struct.
    237  */
    238 static int
    239 reassociate(struct evport_data *epdp, struct fd_info *fdip, int fd)
    240 {
    241 	int sysevents = FDI_TO_SYSEVENTS(fdip);
    242 
    243 	if (sysevents != 0) {
    244 		if (port_associate(epdp->ed_port, PORT_SOURCE_FD,
    245 				   fd, sysevents, NULL) == -1) {
    246 			event_warn("port_associate");
    247 			return (-1);
    248 		}
    249 	}
    250 
    251 	check_evportop(epdp);
    252 
    253 	return (0);
    254 }
    255 
    256 /*
    257  * Main event loop - polls port_getn for some number of events, and processes
    258  * them.
    259  */
    260 
    261 static int
    262 evport_dispatch(struct event_base *base, struct timeval *tv)
    263 {
    264 	int i, res;
    265 	struct evport_data *epdp = base->evbase;
    266 	port_event_t pevtlist[EVENTS_PER_GETN];
    267 
    268 	/*
    269 	 * port_getn will block until it has at least nevents events. It will
    270 	 * also return how many it's given us (which may be more than we asked
    271 	 * for, as long as it's less than our maximum (EVENTS_PER_GETN)) in
    272 	 * nevents.
    273 	 */
    274 	int nevents = 1;
    275 
    276 	/*
    277 	 * We have to convert a struct timeval to a struct timespec
    278 	 * (only difference is nanoseconds vs. microseconds). If no time-based
    279 	 * events are active, we should wait for I/O (and tv == NULL).
    280 	 */
    281 	struct timespec ts;
    282 	struct timespec *ts_p = NULL;
    283 	if (tv != NULL) {
    284 		ts.tv_sec = tv->tv_sec;
    285 		ts.tv_nsec = tv->tv_usec * 1000;
    286 		ts_p = &ts;
    287 	}
    288 
    289 	/*
    290 	 * Before doing anything else, we need to reassociate the events we hit
    291 	 * last time which need reassociation. See comment at the end of the
    292 	 * loop below.
    293 	 */
    294 	for (i = 0; i < EVENTS_PER_GETN; ++i) {
    295 		struct fd_info *fdi = NULL;
    296 		if (epdp->ed_pending[i] != -1) {
    297 			fdi = &(epdp->ed_fds[epdp->ed_pending[i]]);
    298 		}
    299 
    300 		if (fdi != NULL && FDI_HAS_EVENTS(fdi)) {
    301 			int fd = epdp->ed_pending[i];
    302 			reassociate(epdp, fdi, fd);
    303 			epdp->ed_pending[i] = -1;
    304 		}
    305 	}
    306 
    307 	EVBASE_RELEASE_LOCK(base, th_base_lock);
    308 
    309 	res = port_getn(epdp->ed_port, pevtlist, EVENTS_PER_GETN,
    310 	    (unsigned int *) &nevents, ts_p);
    311 
    312 	EVBASE_ACQUIRE_LOCK(base, th_base_lock);
    313 
    314 	if (res == -1) {
    315 		if (errno == EINTR || errno == EAGAIN) {
    316 			return (0);
    317 		} else if (errno == ETIME) {
    318 			if (nevents == 0)
    319 				return (0);
    320 		} else {
    321 			event_warn("port_getn");
    322 			return (-1);
    323 		}
    324 	}
    325 
    326 	event_debug(("%s: port_getn reports %d events", __func__, nevents));
    327 
    328 	for (i = 0; i < nevents; ++i) {
    329 		struct fd_info *fdi;
    330 		port_event_t *pevt = &pevtlist[i];
    331 		int fd = (int) pevt->portev_object;
    332 
    333 		check_evportop(epdp);
    334 		check_event(pevt);
    335 		epdp->ed_pending[i] = fd;
    336 
    337 		/*
    338 		 * Figure out what kind of event it was
    339 		 * (because we have to pass this to the callback)
    340 		 */
    341 		res = 0;
    342 		if (pevt->portev_events & (POLLERR|POLLHUP)) {
    343 			res = EV_READ | EV_WRITE;
    344 		} else {
    345 			if (pevt->portev_events & POLLIN)
    346 				res |= EV_READ;
    347 			if (pevt->portev_events & POLLOUT)
    348 				res |= EV_WRITE;
    349 		}
    350 
    351 		/*
    352 		 * Check for the error situations or a hangup situation
    353 		 */
    354 		if (pevt->portev_events & (POLLERR|POLLHUP|POLLNVAL))
    355 			res |= EV_READ|EV_WRITE;
    356 
    357 		EVUTIL_ASSERT(epdp->ed_nevents > fd);
    358 		fdi = &(epdp->ed_fds[fd]);
    359 
    360 		evmap_io_active(base, fd, res);
    361 	} /* end of all events gotten */
    362 
    363 	check_evportop(epdp);
    364 
    365 	return (0);
    366 }
    367 
    368 
    369 /*
    370  * Adds the given event (so that you will be notified when it happens via
    371  * the callback function).
    372  */
    373 
    374 static int
    375 evport_add(struct event_base *base, int fd, short old, short events, void *p)
    376 {
    377 	struct evport_data *evpd = base->evbase;
    378 	struct fd_info *fdi;
    379 	int factor;
    380 	(void)p;
    381 
    382 	check_evportop(evpd);
    383 
    384 	/*
    385 	 * If necessary, grow the file descriptor info table
    386 	 */
    387 
    388 	factor = 1;
    389 	while (fd >= factor * evpd->ed_nevents)
    390 		factor *= 2;
    391 
    392 	if (factor > 1) {
    393 		if (-1 == grow(evpd, factor)) {
    394 			return (-1);
    395 		}
    396 	}
    397 
    398 	fdi = &evpd->ed_fds[fd];
    399 	fdi->fdi_what |= events;
    400 
    401 	return reassociate(evpd, fdi, fd);
    402 }
    403 
    404 /*
    405  * Removes the given event from the list of events to wait for.
    406  */
    407 
    408 static int
    409 evport_del(struct event_base *base, int fd, short old, short events, void *p)
    410 {
    411 	struct evport_data *evpd = base->evbase;
    412 	struct fd_info *fdi;
    413 	int i;
    414 	int associated = 1;
    415 	(void)p;
    416 
    417 	check_evportop(evpd);
    418 
    419 	if (evpd->ed_nevents < fd) {
    420 		return (-1);
    421 	}
    422 
    423 	for (i = 0; i < EVENTS_PER_GETN; ++i) {
    424 		if (evpd->ed_pending[i] == fd) {
    425 			associated = 0;
    426 			break;
    427 		}
    428 	}
    429 
    430 	fdi = &evpd->ed_fds[fd];
    431 	if (events & EV_READ)
    432 		fdi->fdi_what &= ~EV_READ;
    433 	if (events & EV_WRITE)
    434 		fdi->fdi_what &= ~EV_WRITE;
    435 
    436 	if (associated) {
    437 		if (!FDI_HAS_EVENTS(fdi) &&
    438 		    port_dissociate(evpd->ed_port, PORT_SOURCE_FD, fd) == -1) {
    439 			/*
    440 			 * Ignore EBADFD error the fd could have been closed
    441 			 * before event_del() was called.
    442 			 */
    443 			if (errno != EBADFD) {
    444 				event_warn("port_dissociate");
    445 				return (-1);
    446 			}
    447 		} else {
    448 			if (FDI_HAS_EVENTS(fdi)) {
    449 				return (reassociate(evpd, fdi, fd));
    450 			}
    451 		}
    452 	} else {
    453 		if ((fdi->fdi_what & (EV_READ|EV_WRITE)) == 0) {
    454 			evpd->ed_pending[i] = -1;
    455 		}
    456 	}
    457 	return 0;
    458 }
    459 
    460 
    461 static void
    462 evport_dealloc(struct event_base *base)
    463 {
    464 	struct evport_data *evpd = base->evbase;
    465 
    466 	evsig_dealloc(base);
    467 
    468 	close(evpd->ed_port);
    469 
    470 	if (evpd->ed_fds)
    471 		mm_free(evpd->ed_fds);
    472 	mm_free(evpd);
    473 }
    474