1 /* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*- */ 2 /* dbus-socket-set-epoll.c - a socket set implemented via Linux epoll(4) 3 * 4 * Copyright 2011 Nokia Corporation 5 * 6 * Licensed under the Academic Free License version 2.1 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 * 13 * This program is distributed in the hope that it will be useful, 14 * but WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 * GNU General Public License for more details. 17 * 18 * You should have received a copy of the GNU General Public License 19 * along with this program; if not, write to the Free Software 20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, 21 * MA 02110-1301 USA 22 * 23 */ 24 25 #include <config.h> 26 #include "dbus-socket-set.h" 27 28 #include <dbus/dbus-internals.h> 29 #include <dbus/dbus-sysdeps.h> 30 31 #ifndef __linux__ 32 # error This file is for Linux epoll(4) 33 #endif 34 35 #include <errno.h> 36 #include <fcntl.h> 37 #include <sys/epoll.h> 38 #include <unistd.h> 39 40 #ifndef DOXYGEN_SHOULD_SKIP_THIS 41 42 typedef struct { 43 DBusSocketSet parent; 44 int epfd; 45 } DBusSocketSetEpoll; 46 47 static inline DBusSocketSetEpoll * 48 socket_set_epoll_cast (DBusSocketSet *set) 49 { 50 _dbus_assert (set->cls == &_dbus_socket_set_epoll_class); 51 return (DBusSocketSetEpoll *) set; 52 } 53 54 /* this is safe to call on a partially-allocated socket set */ 55 static void 56 socket_set_epoll_free (DBusSocketSet *set) 57 { 58 DBusSocketSetEpoll *self = socket_set_epoll_cast (set); 59 60 if (self == NULL) 61 return; 62 63 if (self->epfd != -1) 64 close (self->epfd); 65 66 dbus_free (self); 67 } 68 69 DBusSocketSet * 70 _dbus_socket_set_epoll_new (void) 71 { 72 DBusSocketSetEpoll *self; 73 74 self = dbus_new0 (DBusSocketSetEpoll, 1); 75 76 if (self == NULL) 77 return NULL; 78 79 self->parent.cls = &_dbus_socket_set_epoll_class; 80 81 self->epfd = epoll_create1 (EPOLL_CLOEXEC); 82 83 if (self->epfd == -1) 84 { 85 int flags; 86 87 /* the size hint is ignored unless you have a rather old kernel, 88 * but must be positive on some versions, so just pick something 89 * arbitrary; it's a hint, not a limit */ 90 self->epfd = epoll_create (42); 91 92 flags = fcntl (self->epfd, F_GETFD, 0); 93 94 if (flags != -1) 95 fcntl (self->epfd, F_SETFD, flags | FD_CLOEXEC); 96 } 97 98 if (self->epfd == -1) 99 { 100 socket_set_epoll_free ((DBusSocketSet *) self); 101 return NULL; 102 } 103 104 return (DBusSocketSet *) self; 105 } 106 107 static uint32_t 108 watch_flags_to_epoll_events (unsigned int flags) 109 { 110 uint32_t events = 0; 111 112 if (flags & DBUS_WATCH_READABLE) 113 events |= EPOLLIN; 114 if (flags & DBUS_WATCH_WRITABLE) 115 events |= EPOLLOUT; 116 117 return events; 118 } 119 120 static unsigned int 121 epoll_events_to_watch_flags (uint32_t events) 122 { 123 short flags = 0; 124 125 if (events & EPOLLIN) 126 flags |= DBUS_WATCH_READABLE; 127 if (events & EPOLLOUT) 128 flags |= DBUS_WATCH_WRITABLE; 129 if (events & EPOLLHUP) 130 flags |= DBUS_WATCH_HANGUP; 131 if (events & EPOLLERR) 132 flags |= DBUS_WATCH_ERROR; 133 134 return flags; 135 } 136 137 static dbus_bool_t 138 socket_set_epoll_add (DBusSocketSet *set, 139 int fd, 140 unsigned int flags, 141 dbus_bool_t enabled) 142 { 143 DBusSocketSetEpoll *self = socket_set_epoll_cast (set); 144 struct epoll_event event; 145 int err; 146 147 event.data.fd = fd; 148 149 if (enabled) 150 { 151 event.events = watch_flags_to_epoll_events (flags); 152 } 153 else 154 { 155 /* We need to add *something* to reserve space in the kernel's data 156 * structures: see socket_set_epoll_disable for more details */ 157 event.events = EPOLLET; 158 } 159 160 if (epoll_ctl (self->epfd, EPOLL_CTL_ADD, fd, &event) == 0) 161 return TRUE; 162 163 /* Anything except ENOMEM, ENOSPC means we have an internal error. */ 164 err = errno; 165 switch (err) 166 { 167 case ENOMEM: 168 case ENOSPC: 169 /* be silent: this is basically OOM, which our callers are expected 170 * to cope with */ 171 break; 172 173 case EBADF: 174 _dbus_warn ("Bad fd %d\n", fd); 175 break; 176 177 case EEXIST: 178 _dbus_warn ("fd %d added and then added again\n", fd); 179 break; 180 181 default: 182 _dbus_warn ("Misc error when trying to watch fd %d: %s\n", fd, 183 strerror (err)); 184 break; 185 } 186 187 return FALSE; 188 } 189 190 static void 191 socket_set_epoll_enable (DBusSocketSet *set, 192 int fd, 193 unsigned int flags) 194 { 195 DBusSocketSetEpoll *self = socket_set_epoll_cast (set); 196 struct epoll_event event; 197 int err; 198 199 event.data.fd = fd; 200 event.events = watch_flags_to_epoll_events (flags); 201 202 if (epoll_ctl (self->epfd, EPOLL_CTL_MOD, fd, &event) == 0) 203 return; 204 205 err = errno; 206 207 /* Enabling a file descriptor isn't allowed to fail, even for OOM, so we 208 * do our best to avoid all of these. */ 209 switch (err) 210 { 211 case EBADF: 212 _dbus_warn ("Bad fd %d\n", fd); 213 break; 214 215 case ENOENT: 216 _dbus_warn ("fd %d enabled before it was added\n", fd); 217 break; 218 219 case ENOMEM: 220 _dbus_warn ("Insufficient memory to change watch for fd %d\n", fd); 221 break; 222 223 default: 224 _dbus_warn ("Misc error when trying to watch fd %d: %s\n", fd, 225 strerror (err)); 226 break; 227 } 228 } 229 230 static void 231 socket_set_epoll_disable (DBusSocketSet *set, 232 int fd) 233 { 234 DBusSocketSetEpoll *self = socket_set_epoll_cast (set); 235 struct epoll_event event; 236 int err; 237 238 /* The naive thing to do would be EPOLL_CTL_DEL, but that'll probably 239 * free resources in the kernel. When we come to do socket_set_epoll_enable, 240 * there might not be enough resources to bring it back! 241 * 242 * The next idea you might have is to set the flags to 0. However, events 243 * always trigger on EPOLLERR and EPOLLHUP, even if libdbus isn't actually 244 * delivering them to a DBusWatch. Because epoll is level-triggered by 245 * default, we'll busy-loop on an unhandled error or hangup; not good. 246 * 247 * So, let's set it to be edge-triggered: then the worst case is that 248 * we return from poll immediately on one iteration, ignore it because no 249 * watch is enabled, then go back to normal. When we re-enable a watch 250 * we'll switch back to level-triggered and be notified again (verified to 251 * work on 2.6.32). Compile this file with -DTEST_BEHAVIOUR_OF_EPOLLET for 252 * test code. 253 */ 254 event.data.fd = fd; 255 event.events = EPOLLET; 256 257 if (epoll_ctl (self->epfd, EPOLL_CTL_MOD, fd, &event) == 0) 258 return; 259 260 err = errno; 261 _dbus_warn ("Error when trying to watch fd %d: %s\n", fd, 262 strerror (err)); 263 } 264 265 static void 266 socket_set_epoll_remove (DBusSocketSet *set, 267 int fd) 268 { 269 DBusSocketSetEpoll *self = socket_set_epoll_cast (set); 270 int err; 271 /* Kernels < 2.6.9 require a non-NULL struct pointer, even though its 272 * contents are ignored */ 273 struct epoll_event dummy = { 0 }; 274 275 if (epoll_ctl (self->epfd, EPOLL_CTL_DEL, fd, &dummy) == 0) 276 return; 277 278 err = errno; 279 _dbus_warn ("Error when trying to remove fd %d: %s\n", fd, strerror (err)); 280 } 281 282 /* Optimally, this should be the same as in DBusLoop: we use it to translate 283 * between struct epoll_event and DBusSocketEvent without allocating heap 284 * memory. */ 285 #define N_STACK_DESCRIPTORS 64 286 287 static int 288 socket_set_epoll_poll (DBusSocketSet *set, 289 DBusSocketEvent *revents, 290 int max_events, 291 int timeout_ms) 292 { 293 DBusSocketSetEpoll *self = socket_set_epoll_cast (set); 294 struct epoll_event events[N_STACK_DESCRIPTORS]; 295 int n_ready; 296 int i; 297 298 _dbus_assert (max_events > 0); 299 300 n_ready = epoll_wait (self->epfd, events, 301 MIN (_DBUS_N_ELEMENTS (events), max_events), 302 timeout_ms); 303 304 if (n_ready <= 0) 305 return n_ready; 306 307 for (i = 0; i < n_ready; i++) 308 { 309 revents[i].fd = events[i].data.fd; 310 revents[i].flags = epoll_events_to_watch_flags (events[i].events); 311 } 312 313 return n_ready; 314 } 315 316 DBusSocketSetClass _dbus_socket_set_epoll_class = { 317 socket_set_epoll_free, 318 socket_set_epoll_add, 319 socket_set_epoll_remove, 320 socket_set_epoll_enable, 321 socket_set_epoll_disable, 322 socket_set_epoll_poll 323 }; 324 325 #ifdef TEST_BEHAVIOUR_OF_EPOLLET 326 /* usage: cat /dev/null | ./epoll 327 * 328 * desired output: 329 * ctl ADD: 0 330 * wait for HUP, edge-triggered: 1 331 * wait for HUP again: 0 332 * ctl MOD: 0 333 * wait for HUP: 1 334 */ 335 336 #include <sys/epoll.h> 337 338 #include <stdio.h> 339 340 int 341 main (void) 342 { 343 struct epoll_event input; 344 struct epoll_event output; 345 int epfd = epoll_create1 (EPOLL_CLOEXEC); 346 int fd = 0; /* stdin */ 347 int ret; 348 349 input.events = EPOLLHUP | EPOLLET; 350 ret = epoll_ctl (epfd, EPOLL_CTL_ADD, fd, &input); 351 printf ("ctl ADD: %d\n", ret); 352 353 ret = epoll_wait (epfd, &output, 1, -1); 354 printf ("wait for HUP, edge-triggered: %d\n", ret); 355 356 ret = epoll_wait (epfd, &output, 1, 1); 357 printf ("wait for HUP again: %d\n", ret); 358 359 input.events = EPOLLHUP; 360 ret = epoll_ctl (epfd, EPOLL_CTL_MOD, fd, &input); 361 printf ("ctl MOD: %d\n", ret); 362 363 ret = epoll_wait (epfd, &output, 1, -1); 364 printf ("wait for HUP: %d\n", ret); 365 366 return 0; 367 } 368 369 #endif /* TEST_BEHAVIOUR_OF_EPOLLET */ 370 371 #endif /* !DOXYGEN_SHOULD_SKIP_THIS */ 372