1 /* 2 * Copyright 2000-2004 Niels Provos <provos (at) citi.umich.edu> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 #ifdef HAVE_CONFIG_H 28 #include "config.h" 29 #endif 30 31 #include <sys/types.h> 32 #include <sys/resource.h> 33 #ifdef HAVE_SYS_TIME_H 34 #include <sys/time.h> 35 #else 36 #include <sys/_libevent_time.h> 37 #endif 38 #include <sys/queue.h> 39 #include <sys/devpoll.h> 40 #include <signal.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <string.h> 44 #include <unistd.h> 45 #include <fcntl.h> 46 #include <errno.h> 47 #include <assert.h> 48 49 #include "event.h" 50 #include "event-internal.h" 51 #include "evsignal.h" 52 #include "log.h" 53 54 /* due to limitations in the devpoll interface, we need to keep track of 55 * all file descriptors outself. 56 */ 57 struct evdevpoll { 58 struct event *evread; 59 struct event *evwrite; 60 }; 61 62 struct devpollop { 63 struct evdevpoll *fds; 64 int nfds; 65 struct pollfd *events; 66 int nevents; 67 int dpfd; 68 struct pollfd *changes; 69 int nchanges; 70 }; 71 72 static void *devpoll_init (struct event_base *); 73 static int devpoll_add (void *, struct event *); 74 static int devpoll_del (void *, struct event *); 75 static int devpoll_dispatch (struct event_base *, void *, struct timeval *); 76 static void devpoll_dealloc (struct event_base *, void *); 77 78 const struct eventop devpollops = { 79 "devpoll", 80 devpoll_init, 81 devpoll_add, 82 devpoll_del, 83 devpoll_dispatch, 84 devpoll_dealloc, 85 1 /* need reinit */ 86 }; 87 88 #define NEVENT 32000 89 90 static int 91 devpoll_commit(struct devpollop *devpollop) 92 { 93 /* 94 * Due to a bug in Solaris, we have to use pwrite with an offset of 0. 95 * Write is limited to 2GB of data, until it will fail. 96 */ 97 if (pwrite(devpollop->dpfd, devpollop->changes, 98 sizeof(struct pollfd) * devpollop->nchanges, 0) == -1) 99 return(-1); 100 101 devpollop->nchanges = 0; 102 return(0); 103 } 104 105 static int 106 devpoll_queue(struct devpollop *devpollop, int fd, int events) { 107 struct pollfd *pfd; 108 109 if (devpollop->nchanges >= devpollop->nevents) { 110 /* 111 * Change buffer is full, must commit it to /dev/poll before 112 * adding more 113 */ 114 if (devpoll_commit(devpollop) != 0) 115 return(-1); 116 } 117 118 pfd = &devpollop->changes[devpollop->nchanges++]; 119 pfd->fd = fd; 120 pfd->events = events; 121 pfd->revents = 0; 122 123 return(0); 124 } 125 126 static void * 127 devpoll_init(struct event_base *base) 128 { 129 int dpfd, nfiles = NEVENT; 130 struct rlimit rl; 131 struct devpollop *devpollop; 132 133 /* Disable devpoll when this environment variable is set */ 134 if (evutil_getenv("EVENT_NODEVPOLL")) 135 return (NULL); 136 137 if (!(devpollop = calloc(1, sizeof(struct devpollop)))) 138 return (NULL); 139 140 if (getrlimit(RLIMIT_NOFILE, &rl) == 0 && 141 rl.rlim_cur != RLIM_INFINITY) 142 nfiles = rl.rlim_cur; 143 144 /* Initialize the kernel queue */ 145 if ((dpfd = open("/dev/poll", O_RDWR)) == -1) { 146 event_warn("open: /dev/poll"); 147 free(devpollop); 148 return (NULL); 149 } 150 151 devpollop->dpfd = dpfd; 152 153 /* Initialize fields */ 154 devpollop->events = calloc(nfiles, sizeof(struct pollfd)); 155 if (devpollop->events == NULL) { 156 free(devpollop); 157 close(dpfd); 158 return (NULL); 159 } 160 devpollop->nevents = nfiles; 161 162 devpollop->fds = calloc(nfiles, sizeof(struct evdevpoll)); 163 if (devpollop->fds == NULL) { 164 free(devpollop->events); 165 free(devpollop); 166 close(dpfd); 167 return (NULL); 168 } 169 devpollop->nfds = nfiles; 170 171 devpollop->changes = calloc(nfiles, sizeof(struct pollfd)); 172 if (devpollop->changes == NULL) { 173 free(devpollop->fds); 174 free(devpollop->events); 175 free(devpollop); 176 close(dpfd); 177 return (NULL); 178 } 179 180 evsignal_init(base); 181 182 return (devpollop); 183 } 184 185 static int 186 devpoll_recalc(struct event_base *base, void *arg, int max) 187 { 188 struct devpollop *devpollop = arg; 189 190 if (max >= devpollop->nfds) { 191 struct evdevpoll *fds; 192 int nfds; 193 194 nfds = devpollop->nfds; 195 while (nfds <= max) 196 nfds <<= 1; 197 198 fds = realloc(devpollop->fds, nfds * sizeof(struct evdevpoll)); 199 if (fds == NULL) { 200 event_warn("realloc"); 201 return (-1); 202 } 203 devpollop->fds = fds; 204 memset(fds + devpollop->nfds, 0, 205 (nfds - devpollop->nfds) * sizeof(struct evdevpoll)); 206 devpollop->nfds = nfds; 207 } 208 209 return (0); 210 } 211 212 static int 213 devpoll_dispatch(struct event_base *base, void *arg, struct timeval *tv) 214 { 215 struct devpollop *devpollop = arg; 216 struct pollfd *events = devpollop->events; 217 struct dvpoll dvp; 218 struct evdevpoll *evdp; 219 int i, res, timeout = -1; 220 221 if (devpollop->nchanges) 222 devpoll_commit(devpollop); 223 224 if (tv != NULL) 225 timeout = tv->tv_sec * 1000 + (tv->tv_usec + 999) / 1000; 226 227 dvp.dp_fds = devpollop->events; 228 dvp.dp_nfds = devpollop->nevents; 229 dvp.dp_timeout = timeout; 230 231 res = ioctl(devpollop->dpfd, DP_POLL, &dvp); 232 233 if (res == -1) { 234 if (errno != EINTR) { 235 event_warn("ioctl: DP_POLL"); 236 return (-1); 237 } 238 239 evsignal_process(base); 240 return (0); 241 } else if (base->sig.evsignal_caught) { 242 evsignal_process(base); 243 } 244 245 event_debug(("%s: devpoll_wait reports %d", __func__, res)); 246 247 for (i = 0; i < res; i++) { 248 int which = 0; 249 int what = events[i].revents; 250 struct event *evread = NULL, *evwrite = NULL; 251 252 assert(events[i].fd < devpollop->nfds); 253 evdp = &devpollop->fds[events[i].fd]; 254 255 if (what & POLLHUP) 256 what |= POLLIN | POLLOUT; 257 else if (what & POLLERR) 258 what |= POLLIN | POLLOUT; 259 260 if (what & POLLIN) { 261 evread = evdp->evread; 262 which |= EV_READ; 263 } 264 265 if (what & POLLOUT) { 266 evwrite = evdp->evwrite; 267 which |= EV_WRITE; 268 } 269 270 if (!which) 271 continue; 272 273 if (evread != NULL && !(evread->ev_events & EV_PERSIST)) 274 event_del(evread); 275 if (evwrite != NULL && evwrite != evread && 276 !(evwrite->ev_events & EV_PERSIST)) 277 event_del(evwrite); 278 279 if (evread != NULL) 280 event_active(evread, EV_READ, 1); 281 if (evwrite != NULL) 282 event_active(evwrite, EV_WRITE, 1); 283 } 284 285 return (0); 286 } 287 288 289 static int 290 devpoll_add(void *arg, struct event *ev) 291 { 292 struct devpollop *devpollop = arg; 293 struct evdevpoll *evdp; 294 int fd, events; 295 296 if (ev->ev_events & EV_SIGNAL) 297 return (evsignal_add(ev)); 298 299 fd = ev->ev_fd; 300 if (fd >= devpollop->nfds) { 301 /* Extend the file descriptor array as necessary */ 302 if (devpoll_recalc(ev->ev_base, devpollop, fd) == -1) 303 return (-1); 304 } 305 evdp = &devpollop->fds[fd]; 306 307 /* 308 * It's not necessary to OR the existing read/write events that we 309 * are currently interested in with the new event we are adding. 310 * The /dev/poll driver ORs any new events with the existing events 311 * that it has cached for the fd. 312 */ 313 314 events = 0; 315 if (ev->ev_events & EV_READ) { 316 if (evdp->evread && evdp->evread != ev) { 317 /* There is already a different read event registered */ 318 return(-1); 319 } 320 events |= POLLIN; 321 } 322 323 if (ev->ev_events & EV_WRITE) { 324 if (evdp->evwrite && evdp->evwrite != ev) { 325 /* There is already a different write event registered */ 326 return(-1); 327 } 328 events |= POLLOUT; 329 } 330 331 if (devpoll_queue(devpollop, fd, events) != 0) 332 return(-1); 333 334 /* Update events responsible */ 335 if (ev->ev_events & EV_READ) 336 evdp->evread = ev; 337 if (ev->ev_events & EV_WRITE) 338 evdp->evwrite = ev; 339 340 return (0); 341 } 342 343 static int 344 devpoll_del(void *arg, struct event *ev) 345 { 346 struct devpollop *devpollop = arg; 347 struct evdevpoll *evdp; 348 int fd, events; 349 int needwritedelete = 1, needreaddelete = 1; 350 351 if (ev->ev_events & EV_SIGNAL) 352 return (evsignal_del(ev)); 353 354 fd = ev->ev_fd; 355 if (fd >= devpollop->nfds) 356 return (0); 357 evdp = &devpollop->fds[fd]; 358 359 events = 0; 360 if (ev->ev_events & EV_READ) 361 events |= POLLIN; 362 if (ev->ev_events & EV_WRITE) 363 events |= POLLOUT; 364 365 /* 366 * The only way to remove an fd from the /dev/poll monitored set is 367 * to use POLLREMOVE by itself. This removes ALL events for the fd 368 * provided so if we care about two events and are only removing one 369 * we must re-add the other event after POLLREMOVE. 370 */ 371 372 if (devpoll_queue(devpollop, fd, POLLREMOVE) != 0) 373 return(-1); 374 375 if ((events & (POLLIN|POLLOUT)) != (POLLIN|POLLOUT)) { 376 /* 377 * We're not deleting all events, so we must resubmit the 378 * event that we are still interested in if one exists. 379 */ 380 381 if ((events & POLLIN) && evdp->evwrite != NULL) { 382 /* Deleting read, still care about write */ 383 devpoll_queue(devpollop, fd, POLLOUT); 384 needwritedelete = 0; 385 } else if ((events & POLLOUT) && evdp->evread != NULL) { 386 /* Deleting write, still care about read */ 387 devpoll_queue(devpollop, fd, POLLIN); 388 needreaddelete = 0; 389 } 390 } 391 392 if (needreaddelete) 393 evdp->evread = NULL; 394 if (needwritedelete) 395 evdp->evwrite = NULL; 396 397 return (0); 398 } 399 400 static void 401 devpoll_dealloc(struct event_base *base, void *arg) 402 { 403 struct devpollop *devpollop = arg; 404 405 evsignal_dealloc(base); 406 if (devpollop->fds) 407 free(devpollop->fds); 408 if (devpollop->events) 409 free(devpollop->events); 410 if (devpollop->changes) 411 free(devpollop->changes); 412 if (devpollop->dpfd >= 0) 413 close(devpollop->dpfd); 414 415 memset(devpollop, 0, sizeof(struct devpollop)); 416 free(devpollop); 417 } 418