1 /* 2 * Submitted by David Pacheco (dp.spambait (at) gmail.com) 3 * 4 * Copyright 2006-2007 Niels Provos 5 * Copyright 2007-2012 Niels Provos and Nick Mathewson 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 3. The name of the author may not be used to endorse or promote products 16 * derived from this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY SUN MICROSYSTEMS, INC. ``AS IS'' AND ANY 19 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 * DISCLAIMED. IN NO EVENT SHALL SUN MICROSYSTEMS, INC. BE LIABLE FOR ANY 22 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 23 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 24 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 25 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 27 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30 /* 31 * Copyright (c) 2007 Sun Microsystems. All rights reserved. 32 * Use is subject to license terms. 33 */ 34 35 /* 36 * evport.c: event backend using Solaris 10 event ports. See port_create(3C). 37 * This implementation is loosely modeled after the one used for select(2) (in 38 * select.c). 39 * 40 * The outstanding events are tracked in a data structure called evport_data. 41 * Each entry in the ed_fds array corresponds to a file descriptor, and contains 42 * pointers to the read and write events that correspond to that fd. (That is, 43 * when the file is readable, the "read" event should handle it, etc.) 44 * 45 * evport_add and evport_del update this data structure. evport_dispatch uses it 46 * to determine where to callback when an event occurs (which it gets from 47 * port_getn). 48 * 49 * Helper functions are used: grow() grows the file descriptor array as 50 * necessary when large fd's come in. reassociate() takes care of maintaining 51 * the proper file-descriptor/event-port associations. 52 * 53 * As in the select(2) implementation, signals are handled by evsignal. 54 */ 55 56 #include "event2/event-config.h" 57 58 #include <sys/time.h> 59 #include <sys/queue.h> 60 #include <errno.h> 61 #include <poll.h> 62 #include <port.h> 63 #include <signal.h> 64 #include <stdio.h> 65 #include <stdlib.h> 66 #include <string.h> 67 #include <time.h> 68 #include <unistd.h> 69 70 #include "event2/thread.h" 71 72 #include "evthread-internal.h" 73 #include "event-internal.h" 74 #include "log-internal.h" 75 #include "evsignal-internal.h" 76 #include "evmap-internal.h" 77 78 /* 79 * Default value for ed_nevents, which is the maximum file descriptor number we 80 * can handle. If an event comes in for a file descriptor F > nevents, we will 81 * grow the array of file descriptors, doubling its size. 82 */ 83 #define DEFAULT_NFDS 16 84 85 86 /* 87 * EVENTS_PER_GETN is the maximum number of events to retrieve from port_getn on 88 * any particular call. You can speed things up by increasing this, but it will 89 * (obviously) require more memory. 90 */ 91 #define EVENTS_PER_GETN 8 92 93 /* 94 * Per-file-descriptor information about what events we're subscribed to. These 95 * fields are NULL if no event is subscribed to either of them. 96 */ 97 98 struct fd_info { 99 short fdi_what; /* combinations of EV_READ and EV_WRITE */ 100 }; 101 102 #define FDI_HAS_READ(fdi) ((fdi)->fdi_what & EV_READ) 103 #define FDI_HAS_WRITE(fdi) ((fdi)->fdi_what & EV_WRITE) 104 #define FDI_HAS_EVENTS(fdi) (FDI_HAS_READ(fdi) || FDI_HAS_WRITE(fdi)) 105 #define FDI_TO_SYSEVENTS(fdi) (FDI_HAS_READ(fdi) ? POLLIN : 0) | \ 106 (FDI_HAS_WRITE(fdi) ? POLLOUT : 0) 107 108 struct evport_data { 109 int ed_port; /* event port for system events */ 110 int ed_nevents; /* number of allocated fdi's */ 111 struct fd_info *ed_fds; /* allocated fdi table */ 112 /* fdi's that we need to reassoc */ 113 int ed_pending[EVENTS_PER_GETN]; /* fd's with pending events */ 114 }; 115 116 static void* evport_init(struct event_base *); 117 static int evport_add(struct event_base *, int fd, short old, short events, void *); 118 static int evport_del(struct event_base *, int fd, short old, short events, void *); 119 static int evport_dispatch(struct event_base *, struct timeval *); 120 static void evport_dealloc(struct event_base *); 121 122 const struct eventop evportops = { 123 "evport", 124 evport_init, 125 evport_add, 126 evport_del, 127 evport_dispatch, 128 evport_dealloc, 129 1, /* need reinit */ 130 0, /* features */ 131 0, /* fdinfo length */ 132 }; 133 134 /* 135 * Initialize the event port implementation. 136 */ 137 138 static void* 139 evport_init(struct event_base *base) 140 { 141 struct evport_data *evpd; 142 int i; 143 144 if (!(evpd = mm_calloc(1, sizeof(struct evport_data)))) 145 return (NULL); 146 147 if ((evpd->ed_port = port_create()) == -1) { 148 mm_free(evpd); 149 return (NULL); 150 } 151 152 /* 153 * Initialize file descriptor structure 154 */ 155 evpd->ed_fds = mm_calloc(DEFAULT_NFDS, sizeof(struct fd_info)); 156 if (evpd->ed_fds == NULL) { 157 close(evpd->ed_port); 158 mm_free(evpd); 159 return (NULL); 160 } 161 evpd->ed_nevents = DEFAULT_NFDS; 162 for (i = 0; i < EVENTS_PER_GETN; i++) 163 evpd->ed_pending[i] = -1; 164 165 evsig_init(base); 166 167 return (evpd); 168 } 169 170 #ifdef CHECK_INVARIANTS 171 /* 172 * Checks some basic properties about the evport_data structure. Because it 173 * checks all file descriptors, this function can be expensive when the maximum 174 * file descriptor ever used is rather large. 175 */ 176 177 static void 178 check_evportop(struct evport_data *evpd) 179 { 180 EVUTIL_ASSERT(evpd); 181 EVUTIL_ASSERT(evpd->ed_nevents > 0); 182 EVUTIL_ASSERT(evpd->ed_port > 0); 183 EVUTIL_ASSERT(evpd->ed_fds > 0); 184 } 185 186 /* 187 * Verifies very basic integrity of a given port_event. 188 */ 189 static void 190 check_event(port_event_t* pevt) 191 { 192 /* 193 * We've only registered for PORT_SOURCE_FD events. The only 194 * other thing we can legitimately receive is PORT_SOURCE_ALERT, 195 * but since we're not using port_alert either, we can assume 196 * PORT_SOURCE_FD. 197 */ 198 EVUTIL_ASSERT(pevt->portev_source == PORT_SOURCE_FD); 199 EVUTIL_ASSERT(pevt->portev_user == NULL); 200 } 201 202 #else 203 #define check_evportop(epop) 204 #define check_event(pevt) 205 #endif /* CHECK_INVARIANTS */ 206 207 /* 208 * Doubles the size of the allocated file descriptor array. 209 */ 210 static int 211 grow(struct evport_data *epdp, int factor) 212 { 213 struct fd_info *tmp; 214 int oldsize = epdp->ed_nevents; 215 int newsize = factor * oldsize; 216 EVUTIL_ASSERT(factor > 1); 217 218 check_evportop(epdp); 219 220 tmp = mm_realloc(epdp->ed_fds, sizeof(struct fd_info) * newsize); 221 if (NULL == tmp) 222 return -1; 223 epdp->ed_fds = tmp; 224 memset((char*) (epdp->ed_fds + oldsize), 0, 225 (newsize - oldsize)*sizeof(struct fd_info)); 226 epdp->ed_nevents = newsize; 227 228 check_evportop(epdp); 229 230 return 0; 231 } 232 233 234 /* 235 * (Re)associates the given file descriptor with the event port. The OS events 236 * are specified (implicitly) from the fd_info struct. 237 */ 238 static int 239 reassociate(struct evport_data *epdp, struct fd_info *fdip, int fd) 240 { 241 int sysevents = FDI_TO_SYSEVENTS(fdip); 242 243 if (sysevents != 0) { 244 if (port_associate(epdp->ed_port, PORT_SOURCE_FD, 245 fd, sysevents, NULL) == -1) { 246 event_warn("port_associate"); 247 return (-1); 248 } 249 } 250 251 check_evportop(epdp); 252 253 return (0); 254 } 255 256 /* 257 * Main event loop - polls port_getn for some number of events, and processes 258 * them. 259 */ 260 261 static int 262 evport_dispatch(struct event_base *base, struct timeval *tv) 263 { 264 int i, res; 265 struct evport_data *epdp = base->evbase; 266 port_event_t pevtlist[EVENTS_PER_GETN]; 267 268 /* 269 * port_getn will block until it has at least nevents events. It will 270 * also return how many it's given us (which may be more than we asked 271 * for, as long as it's less than our maximum (EVENTS_PER_GETN)) in 272 * nevents. 273 */ 274 int nevents = 1; 275 276 /* 277 * We have to convert a struct timeval to a struct timespec 278 * (only difference is nanoseconds vs. microseconds). If no time-based 279 * events are active, we should wait for I/O (and tv == NULL). 280 */ 281 struct timespec ts; 282 struct timespec *ts_p = NULL; 283 if (tv != NULL) { 284 ts.tv_sec = tv->tv_sec; 285 ts.tv_nsec = tv->tv_usec * 1000; 286 ts_p = &ts; 287 } 288 289 /* 290 * Before doing anything else, we need to reassociate the events we hit 291 * last time which need reassociation. See comment at the end of the 292 * loop below. 293 */ 294 for (i = 0; i < EVENTS_PER_GETN; ++i) { 295 struct fd_info *fdi = NULL; 296 if (epdp->ed_pending[i] != -1) { 297 fdi = &(epdp->ed_fds[epdp->ed_pending[i]]); 298 } 299 300 if (fdi != NULL && FDI_HAS_EVENTS(fdi)) { 301 int fd = epdp->ed_pending[i]; 302 reassociate(epdp, fdi, fd); 303 epdp->ed_pending[i] = -1; 304 } 305 } 306 307 EVBASE_RELEASE_LOCK(base, th_base_lock); 308 309 res = port_getn(epdp->ed_port, pevtlist, EVENTS_PER_GETN, 310 (unsigned int *) &nevents, ts_p); 311 312 EVBASE_ACQUIRE_LOCK(base, th_base_lock); 313 314 if (res == -1) { 315 if (errno == EINTR || errno == EAGAIN) { 316 return (0); 317 } else if (errno == ETIME) { 318 if (nevents == 0) 319 return (0); 320 } else { 321 event_warn("port_getn"); 322 return (-1); 323 } 324 } 325 326 event_debug(("%s: port_getn reports %d events", __func__, nevents)); 327 328 for (i = 0; i < nevents; ++i) { 329 struct fd_info *fdi; 330 port_event_t *pevt = &pevtlist[i]; 331 int fd = (int) pevt->portev_object; 332 333 check_evportop(epdp); 334 check_event(pevt); 335 epdp->ed_pending[i] = fd; 336 337 /* 338 * Figure out what kind of event it was 339 * (because we have to pass this to the callback) 340 */ 341 res = 0; 342 if (pevt->portev_events & (POLLERR|POLLHUP)) { 343 res = EV_READ | EV_WRITE; 344 } else { 345 if (pevt->portev_events & POLLIN) 346 res |= EV_READ; 347 if (pevt->portev_events & POLLOUT) 348 res |= EV_WRITE; 349 } 350 351 /* 352 * Check for the error situations or a hangup situation 353 */ 354 if (pevt->portev_events & (POLLERR|POLLHUP|POLLNVAL)) 355 res |= EV_READ|EV_WRITE; 356 357 EVUTIL_ASSERT(epdp->ed_nevents > fd); 358 fdi = &(epdp->ed_fds[fd]); 359 360 evmap_io_active(base, fd, res); 361 } /* end of all events gotten */ 362 363 check_evportop(epdp); 364 365 return (0); 366 } 367 368 369 /* 370 * Adds the given event (so that you will be notified when it happens via 371 * the callback function). 372 */ 373 374 static int 375 evport_add(struct event_base *base, int fd, short old, short events, void *p) 376 { 377 struct evport_data *evpd = base->evbase; 378 struct fd_info *fdi; 379 int factor; 380 (void)p; 381 382 check_evportop(evpd); 383 384 /* 385 * If necessary, grow the file descriptor info table 386 */ 387 388 factor = 1; 389 while (fd >= factor * evpd->ed_nevents) 390 factor *= 2; 391 392 if (factor > 1) { 393 if (-1 == grow(evpd, factor)) { 394 return (-1); 395 } 396 } 397 398 fdi = &evpd->ed_fds[fd]; 399 fdi->fdi_what |= events; 400 401 return reassociate(evpd, fdi, fd); 402 } 403 404 /* 405 * Removes the given event from the list of events to wait for. 406 */ 407 408 static int 409 evport_del(struct event_base *base, int fd, short old, short events, void *p) 410 { 411 struct evport_data *evpd = base->evbase; 412 struct fd_info *fdi; 413 int i; 414 int associated = 1; 415 (void)p; 416 417 check_evportop(evpd); 418 419 if (evpd->ed_nevents < fd) { 420 return (-1); 421 } 422 423 for (i = 0; i < EVENTS_PER_GETN; ++i) { 424 if (evpd->ed_pending[i] == fd) { 425 associated = 0; 426 break; 427 } 428 } 429 430 fdi = &evpd->ed_fds[fd]; 431 if (events & EV_READ) 432 fdi->fdi_what &= ~EV_READ; 433 if (events & EV_WRITE) 434 fdi->fdi_what &= ~EV_WRITE; 435 436 if (associated) { 437 if (!FDI_HAS_EVENTS(fdi) && 438 port_dissociate(evpd->ed_port, PORT_SOURCE_FD, fd) == -1) { 439 /* 440 * Ignore EBADFD error the fd could have been closed 441 * before event_del() was called. 442 */ 443 if (errno != EBADFD) { 444 event_warn("port_dissociate"); 445 return (-1); 446 } 447 } else { 448 if (FDI_HAS_EVENTS(fdi)) { 449 return (reassociate(evpd, fdi, fd)); 450 } 451 } 452 } else { 453 if ((fdi->fdi_what & (EV_READ|EV_WRITE)) == 0) { 454 evpd->ed_pending[i] = -1; 455 } 456 } 457 return 0; 458 } 459 460 461 static void 462 evport_dealloc(struct event_base *base) 463 { 464 struct evport_data *evpd = base->evbase; 465 466 evsig_dealloc(base); 467 468 close(evpd->ed_port); 469 470 if (evpd->ed_fds) 471 mm_free(evpd->ed_fds); 472 mm_free(evpd); 473 } 474