Home | History | Annotate | Download | only in libevent
      1 /*
      2  * Copyright 2000-2004 Niels Provos <provos (at) citi.umich.edu>
      3  * All rights reserved.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions
      7  * are met:
      8  * 1. Redistributions of source code must retain the above copyright
      9  *    notice, this list of conditions and the following disclaimer.
     10  * 2. Redistributions in binary form must reproduce the above copyright
     11  *    notice, this list of conditions and the following disclaimer in the
     12  *    documentation and/or other materials provided with the distribution.
     13  * 3. The name of the author may not be used to endorse or promote products
     14  *    derived from this software without specific prior written permission.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     17  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     19  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     21  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     22  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     23  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
     25  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 #ifdef HAVE_CONFIG_H
     28 #include "config.h"
     29 #endif
     30 
     31 #include <sys/types.h>
     32 #include <sys/resource.h>
     33 #ifdef HAVE_SYS_TIME_H
     34 #include <sys/time.h>
     35 #else
     36 #include <sys/_libevent_time.h>
     37 #endif
     38 #include <sys/queue.h>
     39 #include <sys/devpoll.h>
     40 #include <signal.h>
     41 #include <stdio.h>
     42 #include <stdlib.h>
     43 #include <string.h>
     44 #include <unistd.h>
     45 #include <fcntl.h>
     46 #include <errno.h>
     47 #include <assert.h>
     48 
     49 #include "event.h"
     50 #include "event-internal.h"
     51 #include "evsignal.h"
     52 #include "log.h"
     53 
     54 /* due to limitations in the devpoll interface, we need to keep track of
     55  * all file descriptors outself.
     56  */
     57 struct evdevpoll {
     58 	struct event *evread;
     59 	struct event *evwrite;
     60 };
     61 
     62 struct devpollop {
     63 	struct evdevpoll *fds;
     64 	int nfds;
     65 	struct pollfd *events;
     66 	int nevents;
     67 	int dpfd;
     68 	struct pollfd *changes;
     69 	int nchanges;
     70 };
     71 
     72 static void *devpoll_init	(struct event_base *);
     73 static int devpoll_add	(void *, struct event *);
     74 static int devpoll_del	(void *, struct event *);
     75 static int devpoll_dispatch	(struct event_base *, void *, struct timeval *);
     76 static void devpoll_dealloc	(struct event_base *, void *);
     77 
     78 const struct eventop devpollops = {
     79 	"devpoll",
     80 	devpoll_init,
     81 	devpoll_add,
     82 	devpoll_del,
     83 	devpoll_dispatch,
     84 	devpoll_dealloc,
     85 	1 /* need reinit */
     86 };
     87 
     88 #define NEVENT	32000
     89 
     90 static int
     91 devpoll_commit(struct devpollop *devpollop)
     92 {
     93 	/*
     94 	 * Due to a bug in Solaris, we have to use pwrite with an offset of 0.
     95 	 * Write is limited to 2GB of data, until it will fail.
     96 	 */
     97 	if (pwrite(devpollop->dpfd, devpollop->changes,
     98 		sizeof(struct pollfd) * devpollop->nchanges, 0) == -1)
     99 		return(-1);
    100 
    101 	devpollop->nchanges = 0;
    102 	return(0);
    103 }
    104 
    105 static int
    106 devpoll_queue(struct devpollop *devpollop, int fd, int events) {
    107 	struct pollfd *pfd;
    108 
    109 	if (devpollop->nchanges >= devpollop->nevents) {
    110 		/*
    111 		 * Change buffer is full, must commit it to /dev/poll before
    112 		 * adding more
    113 		 */
    114 		if (devpoll_commit(devpollop) != 0)
    115 			return(-1);
    116 	}
    117 
    118 	pfd = &devpollop->changes[devpollop->nchanges++];
    119 	pfd->fd = fd;
    120 	pfd->events = events;
    121 	pfd->revents = 0;
    122 
    123 	return(0);
    124 }
    125 
    126 static void *
    127 devpoll_init(struct event_base *base)
    128 {
    129 	int dpfd, nfiles = NEVENT;
    130 	struct rlimit rl;
    131 	struct devpollop *devpollop;
    132 
    133 	/* Disable devpoll when this environment variable is set */
    134 	if (evutil_getenv("EVENT_NODEVPOLL"))
    135 		return (NULL);
    136 
    137 	if (!(devpollop = calloc(1, sizeof(struct devpollop))))
    138 		return (NULL);
    139 
    140 	if (getrlimit(RLIMIT_NOFILE, &rl) == 0 &&
    141 	    rl.rlim_cur != RLIM_INFINITY)
    142 		nfiles = rl.rlim_cur;
    143 
    144 	/* Initialize the kernel queue */
    145 	if ((dpfd = open("/dev/poll", O_RDWR)) == -1) {
    146                 event_warn("open: /dev/poll");
    147 		free(devpollop);
    148 		return (NULL);
    149 	}
    150 
    151 	devpollop->dpfd = dpfd;
    152 
    153 	/* Initialize fields */
    154 	devpollop->events = calloc(nfiles, sizeof(struct pollfd));
    155 	if (devpollop->events == NULL) {
    156 		free(devpollop);
    157 		close(dpfd);
    158 		return (NULL);
    159 	}
    160 	devpollop->nevents = nfiles;
    161 
    162 	devpollop->fds = calloc(nfiles, sizeof(struct evdevpoll));
    163 	if (devpollop->fds == NULL) {
    164 		free(devpollop->events);
    165 		free(devpollop);
    166 		close(dpfd);
    167 		return (NULL);
    168 	}
    169 	devpollop->nfds = nfiles;
    170 
    171 	devpollop->changes = calloc(nfiles, sizeof(struct pollfd));
    172 	if (devpollop->changes == NULL) {
    173 		free(devpollop->fds);
    174 		free(devpollop->events);
    175 		free(devpollop);
    176 		close(dpfd);
    177 		return (NULL);
    178 	}
    179 
    180 	evsignal_init(base);
    181 
    182 	return (devpollop);
    183 }
    184 
    185 static int
    186 devpoll_recalc(struct event_base *base, void *arg, int max)
    187 {
    188 	struct devpollop *devpollop = arg;
    189 
    190 	if (max >= devpollop->nfds) {
    191 		struct evdevpoll *fds;
    192 		int nfds;
    193 
    194 		nfds = devpollop->nfds;
    195 		while (nfds <= max)
    196 			nfds <<= 1;
    197 
    198 		fds = realloc(devpollop->fds, nfds * sizeof(struct evdevpoll));
    199 		if (fds == NULL) {
    200 			event_warn("realloc");
    201 			return (-1);
    202 		}
    203 		devpollop->fds = fds;
    204 		memset(fds + devpollop->nfds, 0,
    205 		    (nfds - devpollop->nfds) * sizeof(struct evdevpoll));
    206 		devpollop->nfds = nfds;
    207 	}
    208 
    209 	return (0);
    210 }
    211 
    212 static int
    213 devpoll_dispatch(struct event_base *base, void *arg, struct timeval *tv)
    214 {
    215 	struct devpollop *devpollop = arg;
    216 	struct pollfd *events = devpollop->events;
    217 	struct dvpoll dvp;
    218 	struct evdevpoll *evdp;
    219 	int i, res, timeout = -1;
    220 
    221 	if (devpollop->nchanges)
    222 		devpoll_commit(devpollop);
    223 
    224 	if (tv != NULL)
    225 		timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000;
    226 
    227 	dvp.dp_fds = devpollop->events;
    228 	dvp.dp_nfds = devpollop->nevents;
    229 	dvp.dp_timeout = timeout;
    230 
    231 	res = ioctl(devpollop->dpfd, DP_POLL, &dvp);
    232 
    233 	if (res == -1) {
    234 		if (errno != EINTR) {
    235 			event_warn("ioctl: DP_POLL");
    236 			return (-1);
    237 		}
    238 
    239 		evsignal_process(base);
    240 		return (0);
    241 	} else if (base->sig.evsignal_caught) {
    242 		evsignal_process(base);
    243 	}
    244 
    245 	event_debug(("%s: devpoll_wait reports %d", __func__, res));
    246 
    247 	for (i = 0; i < res; i++) {
    248 		int which = 0;
    249 		int what = events[i].revents;
    250 		struct event *evread = NULL, *evwrite = NULL;
    251 
    252 		assert(events[i].fd < devpollop->nfds);
    253 		evdp = &devpollop->fds[events[i].fd];
    254 
    255                 if (what & POLLHUP)
    256                         what |= POLLIN | POLLOUT;
    257                 else if (what & POLLERR)
    258                         what |= POLLIN | POLLOUT;
    259 
    260 		if (what & POLLIN) {
    261 			evread = evdp->evread;
    262 			which |= EV_READ;
    263 		}
    264 
    265 		if (what & POLLOUT) {
    266 			evwrite = evdp->evwrite;
    267 			which |= EV_WRITE;
    268 		}
    269 
    270 		if (!which)
    271 			continue;
    272 
    273 		if (evread != NULL && !(evread->ev_events & EV_PERSIST))
    274 			event_del(evread);
    275 		if (evwrite != NULL && evwrite != evread &&
    276 		    !(evwrite->ev_events & EV_PERSIST))
    277 			event_del(evwrite);
    278 
    279 		if (evread != NULL)
    280 			event_active(evread, EV_READ, 1);
    281 		if (evwrite != NULL)
    282 			event_active(evwrite, EV_WRITE, 1);
    283 	}
    284 
    285 	return (0);
    286 }
    287 
    288 
    289 static int
    290 devpoll_add(void *arg, struct event *ev)
    291 {
    292 	struct devpollop *devpollop = arg;
    293 	struct evdevpoll *evdp;
    294 	int fd, events;
    295 
    296 	if (ev->ev_events & EV_SIGNAL)
    297 		return (evsignal_add(ev));
    298 
    299 	fd = ev->ev_fd;
    300 	if (fd >= devpollop->nfds) {
    301 		/* Extend the file descriptor array as necessary */
    302 		if (devpoll_recalc(ev->ev_base, devpollop, fd) == -1)
    303 			return (-1);
    304 	}
    305 	evdp = &devpollop->fds[fd];
    306 
    307 	/*
    308 	 * It's not necessary to OR the existing read/write events that we
    309 	 * are currently interested in with the new event we are adding.
    310 	 * The /dev/poll driver ORs any new events with the existing events
    311 	 * that it has cached for the fd.
    312 	 */
    313 
    314 	events = 0;
    315 	if (ev->ev_events & EV_READ) {
    316 		if (evdp->evread && evdp->evread != ev) {
    317 		   /* There is already a different read event registered */
    318 		   return(-1);
    319 		}
    320 		events |= POLLIN;
    321 	}
    322 
    323 	if (ev->ev_events & EV_WRITE) {
    324 		if (evdp->evwrite && evdp->evwrite != ev) {
    325 		   /* There is already a different write event registered */
    326 		   return(-1);
    327 		}
    328 		events |= POLLOUT;
    329 	}
    330 
    331 	if (devpoll_queue(devpollop, fd, events) != 0)
    332 		return(-1);
    333 
    334 	/* Update events responsible */
    335 	if (ev->ev_events & EV_READ)
    336 		evdp->evread = ev;
    337 	if (ev->ev_events & EV_WRITE)
    338 		evdp->evwrite = ev;
    339 
    340 	return (0);
    341 }
    342 
    343 static int
    344 devpoll_del(void *arg, struct event *ev)
    345 {
    346 	struct devpollop *devpollop = arg;
    347 	struct evdevpoll *evdp;
    348 	int fd, events;
    349 	int needwritedelete = 1, needreaddelete = 1;
    350 
    351 	if (ev->ev_events & EV_SIGNAL)
    352 		return (evsignal_del(ev));
    353 
    354 	fd = ev->ev_fd;
    355 	if (fd >= devpollop->nfds)
    356 		return (0);
    357 	evdp = &devpollop->fds[fd];
    358 
    359 	events = 0;
    360 	if (ev->ev_events & EV_READ)
    361 		events |= POLLIN;
    362 	if (ev->ev_events & EV_WRITE)
    363 		events |= POLLOUT;
    364 
    365 	/*
    366 	 * The only way to remove an fd from the /dev/poll monitored set is
    367 	 * to use POLLREMOVE by itself.  This removes ALL events for the fd
    368 	 * provided so if we care about two events and are only removing one
    369 	 * we must re-add the other event after POLLREMOVE.
    370 	 */
    371 
    372 	if (devpoll_queue(devpollop, fd, POLLREMOVE) != 0)
    373 		return(-1);
    374 
    375 	if ((events & (POLLIN|POLLOUT)) != (POLLIN|POLLOUT)) {
    376 		/*
    377 		 * We're not deleting all events, so we must resubmit the
    378 		 * event that we are still interested in if one exists.
    379 		 */
    380 
    381 		if ((events & POLLIN) && evdp->evwrite != NULL) {
    382 			/* Deleting read, still care about write */
    383 			devpoll_queue(devpollop, fd, POLLOUT);
    384 			needwritedelete = 0;
    385 		} else if ((events & POLLOUT) && evdp->evread != NULL) {
    386 			/* Deleting write, still care about read */
    387 			devpoll_queue(devpollop, fd, POLLIN);
    388 			needreaddelete = 0;
    389 		}
    390 	}
    391 
    392 	if (needreaddelete)
    393 		evdp->evread = NULL;
    394 	if (needwritedelete)
    395 		evdp->evwrite = NULL;
    396 
    397 	return (0);
    398 }
    399 
    400 static void
    401 devpoll_dealloc(struct event_base *base, void *arg)
    402 {
    403 	struct devpollop *devpollop = arg;
    404 
    405 	evsignal_dealloc(base);
    406 	if (devpollop->fds)
    407 		free(devpollop->fds);
    408 	if (devpollop->events)
    409 		free(devpollop->events);
    410 	if (devpollop->changes)
    411 		free(devpollop->changes);
    412 	if (devpollop->dpfd >= 0)
    413 		close(devpollop->dpfd);
    414 
    415 	memset(devpollop, 0, sizeof(struct devpollop));
    416 	free(devpollop);
    417 }
    418