Home | History | Annotate | Download | only in dbus
      1 /* -*- mode: C; c-file-style: "gnu"; indent-tabs-mode: nil; -*- */
      2 /* dbus-socket-set-epoll.c - a socket set implemented via Linux epoll(4)
      3  *
      4  * Copyright  2011 Nokia Corporation
      5  *
      6  * Licensed under the Academic Free License version 2.1
      7  *
      8  * This program is free software; you can redistribute it and/or modify
      9  * it under the terms of the GNU General Public License as published by
     10  * the Free Software Foundation; either version 2 of the License, or
     11  * (at your option) any later version.
     12  *
     13  * This program is distributed in the hope that it will be useful,
     14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
     15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16  * GNU General Public License for more details.
     17  *
     18  * You should have received a copy of the GNU General Public License
     19  * along with this program; if not, write to the Free Software
     20  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
     21  * MA  02110-1301  USA
     22  *
     23  */
     24 
     25 #include <config.h>
     26 #include "dbus-socket-set.h"
     27 
     28 #include <dbus/dbus-internals.h>
     29 #include <dbus/dbus-sysdeps.h>
     30 
     31 #ifndef __linux__
     32 # error This file is for Linux epoll(4)
     33 #endif
     34 
     35 #include <errno.h>
     36 #include <fcntl.h>
     37 #include <sys/epoll.h>
     38 #include <unistd.h>
     39 
     40 #ifndef DOXYGEN_SHOULD_SKIP_THIS
     41 
     42 typedef struct {
     43     DBusSocketSet parent;
     44     int epfd;
     45 } DBusSocketSetEpoll;
     46 
     47 static inline DBusSocketSetEpoll *
     48 socket_set_epoll_cast (DBusSocketSet *set)
     49 {
     50   _dbus_assert (set->cls == &_dbus_socket_set_epoll_class);
     51   return (DBusSocketSetEpoll *) set;
     52 }
     53 
     54 /* this is safe to call on a partially-allocated socket set */
     55 static void
     56 socket_set_epoll_free (DBusSocketSet *set)
     57 {
     58   DBusSocketSetEpoll *self = socket_set_epoll_cast (set);
     59 
     60   if (self == NULL)
     61     return;
     62 
     63   if (self->epfd != -1)
     64     close (self->epfd);
     65 
     66   dbus_free (self);
     67 }
     68 
     69 DBusSocketSet *
     70 _dbus_socket_set_epoll_new (void)
     71 {
     72   DBusSocketSetEpoll *self;
     73 
     74   self = dbus_new0 (DBusSocketSetEpoll, 1);
     75 
     76   if (self == NULL)
     77     return NULL;
     78 
     79   self->parent.cls = &_dbus_socket_set_epoll_class;
     80 
     81   self->epfd = epoll_create1 (EPOLL_CLOEXEC);
     82 
     83   if (self->epfd == -1)
     84     {
     85       int flags;
     86 
     87       /* the size hint is ignored unless you have a rather old kernel,
     88        * but must be positive on some versions, so just pick something
     89        * arbitrary; it's a hint, not a limit */
     90       self->epfd = epoll_create (42);
     91 
     92       flags = fcntl (self->epfd, F_GETFD, 0);
     93 
     94       if (flags != -1)
     95         fcntl (self->epfd, F_SETFD, flags | FD_CLOEXEC);
     96     }
     97 
     98   if (self->epfd == -1)
     99     {
    100       socket_set_epoll_free ((DBusSocketSet *) self);
    101       return NULL;
    102     }
    103 
    104   return (DBusSocketSet *) self;
    105 }
    106 
    107 static uint32_t
    108 watch_flags_to_epoll_events (unsigned int flags)
    109 {
    110   uint32_t events = 0;
    111 
    112   if (flags & DBUS_WATCH_READABLE)
    113     events |= EPOLLIN;
    114   if (flags & DBUS_WATCH_WRITABLE)
    115     events |= EPOLLOUT;
    116 
    117   return events;
    118 }
    119 
    120 static unsigned int
    121 epoll_events_to_watch_flags (uint32_t events)
    122 {
    123   short flags = 0;
    124 
    125   if (events & EPOLLIN)
    126     flags |= DBUS_WATCH_READABLE;
    127   if (events & EPOLLOUT)
    128     flags |= DBUS_WATCH_WRITABLE;
    129   if (events & EPOLLHUP)
    130     flags |= DBUS_WATCH_HANGUP;
    131   if (events & EPOLLERR)
    132     flags |= DBUS_WATCH_ERROR;
    133 
    134   return flags;
    135 }
    136 
    137 static dbus_bool_t
    138 socket_set_epoll_add (DBusSocketSet  *set,
    139                       int             fd,
    140                       unsigned int    flags,
    141                       dbus_bool_t     enabled)
    142 {
    143   DBusSocketSetEpoll *self = socket_set_epoll_cast (set);
    144   struct epoll_event event;
    145   int err;
    146 
    147   event.data.fd = fd;
    148 
    149   if (enabled)
    150     {
    151       event.events = watch_flags_to_epoll_events (flags);
    152     }
    153   else
    154     {
    155       /* We need to add *something* to reserve space in the kernel's data
    156        * structures: see socket_set_epoll_disable for more details */
    157       event.events = EPOLLET;
    158     }
    159 
    160   if (epoll_ctl (self->epfd, EPOLL_CTL_ADD, fd, &event) == 0)
    161     return TRUE;
    162 
    163   /* Anything except ENOMEM, ENOSPC means we have an internal error. */
    164   err = errno;
    165   switch (err)
    166     {
    167       case ENOMEM:
    168       case ENOSPC:
    169         /* be silent: this is basically OOM, which our callers are expected
    170          * to cope with */
    171         break;
    172 
    173       case EBADF:
    174         _dbus_warn ("Bad fd %d\n", fd);
    175         break;
    176 
    177       case EEXIST:
    178         _dbus_warn ("fd %d added and then added again\n", fd);
    179         break;
    180 
    181       default:
    182         _dbus_warn ("Misc error when trying to watch fd %d: %s\n", fd,
    183                     strerror (err));
    184         break;
    185     }
    186 
    187   return FALSE;
    188 }
    189 
    190 static void
    191 socket_set_epoll_enable (DBusSocketSet  *set,
    192                          int             fd,
    193                          unsigned int    flags)
    194 {
    195   DBusSocketSetEpoll *self = socket_set_epoll_cast (set);
    196   struct epoll_event event;
    197   int err;
    198 
    199   event.data.fd = fd;
    200   event.events = watch_flags_to_epoll_events (flags);
    201 
    202   if (epoll_ctl (self->epfd, EPOLL_CTL_MOD, fd, &event) == 0)
    203     return;
    204 
    205   err = errno;
    206 
    207   /* Enabling a file descriptor isn't allowed to fail, even for OOM, so we
    208    * do our best to avoid all of these. */
    209   switch (err)
    210     {
    211       case EBADF:
    212         _dbus_warn ("Bad fd %d\n", fd);
    213         break;
    214 
    215       case ENOENT:
    216         _dbus_warn ("fd %d enabled before it was added\n", fd);
    217         break;
    218 
    219       case ENOMEM:
    220         _dbus_warn ("Insufficient memory to change watch for fd %d\n", fd);
    221         break;
    222 
    223       default:
    224         _dbus_warn ("Misc error when trying to watch fd %d: %s\n", fd,
    225                     strerror (err));
    226         break;
    227     }
    228 }
    229 
    230 static void
    231 socket_set_epoll_disable (DBusSocketSet  *set,
    232                           int             fd)
    233 {
    234   DBusSocketSetEpoll *self = socket_set_epoll_cast (set);
    235   struct epoll_event event;
    236   int err;
    237 
    238   /* The naive thing to do would be EPOLL_CTL_DEL, but that'll probably
    239    * free resources in the kernel. When we come to do socket_set_epoll_enable,
    240    * there might not be enough resources to bring it back!
    241    *
    242    * The next idea you might have is to set the flags to 0. However, events
    243    * always trigger on EPOLLERR and EPOLLHUP, even if libdbus isn't actually
    244    * delivering them to a DBusWatch. Because epoll is level-triggered by
    245    * default, we'll busy-loop on an unhandled error or hangup; not good.
    246    *
    247    * So, let's set it to be edge-triggered: then the worst case is that
    248    * we return from poll immediately on one iteration, ignore it because no
    249    * watch is enabled, then go back to normal. When we re-enable a watch
    250    * we'll switch back to level-triggered and be notified again (verified to
    251    * work on 2.6.32). Compile this file with -DTEST_BEHAVIOUR_OF_EPOLLET for
    252    * test code.
    253    */
    254   event.data.fd = fd;
    255   event.events = EPOLLET;
    256 
    257   if (epoll_ctl (self->epfd, EPOLL_CTL_MOD, fd, &event) == 0)
    258     return;
    259 
    260   err = errno;
    261   _dbus_warn ("Error when trying to watch fd %d: %s\n", fd,
    262               strerror (err));
    263 }
    264 
    265 static void
    266 socket_set_epoll_remove (DBusSocketSet  *set,
    267                          int             fd)
    268 {
    269   DBusSocketSetEpoll *self = socket_set_epoll_cast (set);
    270   int err;
    271   /* Kernels < 2.6.9 require a non-NULL struct pointer, even though its
    272    * contents are ignored */
    273   struct epoll_event dummy = { 0 };
    274 
    275   if (epoll_ctl (self->epfd, EPOLL_CTL_DEL, fd, &dummy) == 0)
    276     return;
    277 
    278   err = errno;
    279   _dbus_warn ("Error when trying to remove fd %d: %s\n", fd, strerror (err));
    280 }
    281 
    282 /* Optimally, this should be the same as in DBusLoop: we use it to translate
    283  * between struct epoll_event and DBusSocketEvent without allocating heap
    284  * memory. */
    285 #define N_STACK_DESCRIPTORS 64
    286 
    287 static int
    288 socket_set_epoll_poll (DBusSocketSet   *set,
    289                        DBusSocketEvent *revents,
    290                        int              max_events,
    291                        int              timeout_ms)
    292 {
    293   DBusSocketSetEpoll *self = socket_set_epoll_cast (set);
    294   struct epoll_event events[N_STACK_DESCRIPTORS];
    295   int n_ready;
    296   int i;
    297 
    298   _dbus_assert (max_events > 0);
    299 
    300   n_ready = epoll_wait (self->epfd, events,
    301                         MIN (_DBUS_N_ELEMENTS (events), max_events),
    302                         timeout_ms);
    303 
    304   if (n_ready <= 0)
    305     return n_ready;
    306 
    307   for (i = 0; i < n_ready; i++)
    308     {
    309       revents[i].fd = events[i].data.fd;
    310       revents[i].flags = epoll_events_to_watch_flags (events[i].events);
    311     }
    312 
    313   return n_ready;
    314 }
    315 
    316 DBusSocketSetClass _dbus_socket_set_epoll_class = {
    317     socket_set_epoll_free,
    318     socket_set_epoll_add,
    319     socket_set_epoll_remove,
    320     socket_set_epoll_enable,
    321     socket_set_epoll_disable,
    322     socket_set_epoll_poll
    323 };
    324 
    325 #ifdef TEST_BEHAVIOUR_OF_EPOLLET
    326 /* usage: cat /dev/null | ./epoll
    327  *
    328  * desired output:
    329  * ctl ADD: 0
    330  * wait for HUP, edge-triggered: 1
    331  * wait for HUP again: 0
    332  * ctl MOD: 0
    333  * wait for HUP: 1
    334  */
    335 
    336 #include <sys/epoll.h>
    337 
    338 #include <stdio.h>
    339 
    340 int
    341 main (void)
    342 {
    343   struct epoll_event input;
    344   struct epoll_event output;
    345   int epfd = epoll_create1 (EPOLL_CLOEXEC);
    346   int fd = 0; /* stdin */
    347   int ret;
    348 
    349   input.events = EPOLLHUP | EPOLLET;
    350   ret = epoll_ctl (epfd, EPOLL_CTL_ADD, fd, &input);
    351   printf ("ctl ADD: %d\n", ret);
    352 
    353   ret = epoll_wait (epfd, &output, 1, -1);
    354   printf ("wait for HUP, edge-triggered: %d\n", ret);
    355 
    356   ret = epoll_wait (epfd, &output, 1, 1);
    357   printf ("wait for HUP again: %d\n", ret);
    358 
    359   input.events = EPOLLHUP;
    360   ret = epoll_ctl (epfd, EPOLL_CTL_MOD, fd, &input);
    361   printf ("ctl MOD: %d\n", ret);
    362 
    363   ret = epoll_wait (epfd, &output, 1, -1);
    364   printf ("wait for HUP: %d\n", ret);
    365 
    366   return 0;
    367 }
    368 
    369 #endif /* TEST_BEHAVIOUR_OF_EPOLLET */
    370 
    371 #endif /* !DOXYGEN_SHOULD_SKIP_THIS */
    372