Home | History | Annotate | Download | only in flip_server
      1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #ifndef NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_
      6 #define NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_
      7 
      8 #include <fcntl.h>
      9 #include <sys/queue.h>
     10 #include <ext/hash_map>  // it is annoying that gcc does this. oh well.
     11 #include <ext/hash_set>
     12 #include <map>
     13 #include <string>
     14 #include <utility>
     15 #include <set>
     16 #include <vector>
     17 
     18 // #define EPOLL_SERVER_EVENT_TRACING 1
     19 //
     20 // Defining EPOLL_SERVER_EVENT_TRACING
     21 // causes code to exist which didn't before.
     22 // This code tracks each event generated by the epollserver,
     23 // as well as providing a per-fd-registered summary of
     24 // events. Note that enabling this code vastly slows
     25 // down operations, and uses substantially more
     26 // memory. For these reasons, it should only be enabled when doing
     27 // developer debugging at his/her workstation.
     28 //
     29 // A structure called 'EventRecorder' will exist when
     30 // the macro is defined. See the EventRecorder class interface
     31 // within the EpollServer class for more details.
     32 #ifdef EPOLL_SERVER_EVENT_TRACING
     33 #include <iostream>
     34 #include "base/logging.h"
     35 #endif
     36 
     37 #include "base/basictypes.h"
     38 #include "base/scoped_ptr.h"
     39 #include <sys/epoll.h>
     40 
     41 namespace net {
     42 
     43 class EpollServer;
     44 class EpollAlarmCallbackInterface;
     45 class ReadPipeCallback;
     46 class WatchDogToken;
     47 
     48 struct EpollEvent {
     49   EpollEvent(int events, bool is_epoll_wait)
     50       : in_events(events),
     51         out_ready_mask(0) {
     52   }
     53 
     54   int in_events;            // incoming events
     55   int out_ready_mask;       // the new event mask for ready list (0 means don't
     56                             // get on the ready list). This field is always
     57                             // initialized to 0 when the event is passed to
     58                             // OnEvent.
     59 };
     60 
     61 // Callbacks which go into EpollServers are expected to derive from this class.
     62 class EpollCallbackInterface {
     63  public:
     64   // Summary:
     65   //   Called when the callback is registered into a EpollServer.
     66   // Args:
     67   //   eps - the poll server into which this callback was registered
     68   //   fd - the file descriptor which was registered
     69   //   event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
     70   //                which was registered (and will initially be used
     71   //                in the epoll() calls)
     72   virtual void OnRegistration(EpollServer* eps, int fd, int event_mask) = 0;
     73 
     74   // Summary:
     75   //   Called when the event_mask is modified (for a file-descriptor)
     76   // Args:
     77   //   fd - the file descriptor which was registered
     78   //   event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc)
     79   //                which was is now curren (and will be used
     80   //                in subsequent epoll() calls)
     81   virtual void OnModification(int fd, int event_mask) = 0;
     82 
     83   // Summary:
     84   //   Called whenever an event occurs on the file-descriptor.
     85   //   This is where the bulk of processing is expected to occur.
     86   // Args:
     87   //   fd - the file descriptor which was registered
     88   //   event - a struct that contains the event mask (composed of EPOLLIN,
     89   //           EPOLLOUT, etc), a flag that indicates whether this is a true
     90   //           epoll_wait event vs one from the ready list, and an output
     91   //           parameter for OnEvent to inform the EpollServer whether to put
     92   //           this fd on the ready list.
     93   virtual void OnEvent(int fd, EpollEvent* event) = 0;
     94 
     95   // Summary:
     96   //   Called when the file-descriptor is unregistered from the poll-server.
     97   // Args:
     98   //   fd - the file descriptor which was registered, and of this call, is now
     99   //        unregistered.
    100   //   replaced - If true, this callback is being replaced by another, otherwise
    101   //              it is simply being removed.
    102   virtual void OnUnregistration(int fd, bool replaced) = 0;
    103 
    104   // Summary:
    105   //   Called when the epoll server is shutting down.  This is different from
    106   //   OnUnregistration because the subclass may want to clean up memory.
    107   //   This is called in leiu of OnUnregistration.
    108   // Args:
    109   //  fd - the file descriptor which was registered.
    110   virtual void OnShutdown(EpollServer* eps, int fd) = 0;
    111 
    112   virtual ~EpollCallbackInterface() {}
    113 
    114  protected:
    115   EpollCallbackInterface() {}
    116 };
    117 
    118 ////////////////////////////////////////////////////////////////////////////////
    119 ////////////////////////////////////////////////////////////////////////////////
    120 
    121 class EpollServer {
    122  public:
    123   typedef EpollAlarmCallbackInterface AlarmCB;
    124   typedef EpollCallbackInterface CB;
    125 
    126   typedef std::multimap<int64, AlarmCB*> TimeToAlarmCBMap;
    127   typedef TimeToAlarmCBMap::iterator AlarmRegToken;
    128 
    129   // Summary:
    130   //   Constructor:
    131   //    By default, we don't wait any amount of time for events, and
    132   //    we suggest to the epoll-system that we're going to use on-the-order
    133   //    of 1024 FDs.
    134   EpollServer();
    135 
    136   ////////////////////////////////////////
    137 
    138   // Destructor
    139   virtual ~EpollServer();
    140 
    141   ////////////////////////////////////////
    142 
    143   // Summary
    144   //   Register a callback to be called whenever an event contained
    145   //   in the set of events included in event_mask occurs on the
    146   //   file-descriptor 'fd'
    147   //
    148   //   Note that only one callback is allowed to be registered for
    149   //   any specific file-decriptor.
    150   //
    151   //   If a callback is registered for a file-descriptor which has already
    152   //   been registered, then the previous callback is unregistered with
    153   //   the 'replaced' flag set to true. I.e. the previous callback's
    154   //   OnUnregistration() function is called like so:
    155   //      OnUnregistration(fd, true);
    156   //
    157   //  The epoll server does NOT take on ownership of the callback: the callback
    158   //  creator is responsible for managing that memory.
    159   //
    160   // Args:
    161   //   fd - a valid file-descriptor
    162   //   cb - an instance of a subclass of EpollCallbackInterface
    163   //   event_mask - a combination of (EPOLLOUT, EPOLLIN.. etc) indicating
    164   //                the events for which the callback would like to be
    165   //                called.
    166   virtual void RegisterFD(int fd, CB* cb, int event_mask);
    167 
    168   ////////////////////////////////////////
    169 
    170   // Summary:
    171   //   A shortcut for RegisterFD which sets things up such that the
    172   //   callback is called when 'fd' is available for writing.
    173   // Args:
    174   //   fd - a valid file-descriptor
    175   //   cb - an instance of a subclass of EpollCallbackInterface
    176   virtual void RegisterFDForWrite(int fd, CB* cb);
    177 
    178   ////////////////////////////////////////
    179 
    180   // Summary:
    181   //   A shortcut for RegisterFD which sets things up such that the
    182   //   callback is called when 'fd' is available for reading or writing.
    183   // Args:
    184   //   fd - a valid file-descriptor
    185   //   cb - an instance of a subclass of EpollCallbackInterface
    186   virtual void RegisterFDForReadWrite(int fd, CB* cb);
    187 
    188   ////////////////////////////////////////
    189 
    190   // Summary:
    191   //   A shortcut for RegisterFD which sets things up such that the
    192   //   callback is called when 'fd' is available for reading.
    193   // Args:
    194   //   fd - a valid file-descriptor
    195   //   cb - an instance of a subclass of EpollCallbackInterface
    196   virtual void RegisterFDForRead(int fd, CB* cb);
    197 
    198   ////////////////////////////////////////
    199 
    200   // Summary:
    201   //   Removes the FD and the associated callback from the pollserver.
    202   //   If the callback is registered with other FDs, they will continue
    203   //   to be processed using the callback without modification.
    204   //   If the file-descriptor specified is not registered in the
    205   //   epoll_server, then nothing happens as a result of this call.
    206   // Args:
    207   //   fd - the file-descriptor which should no-longer be monitored.
    208   virtual void UnregisterFD(int fd);
    209 
    210   ////////////////////////////////////////
    211 
    212   // Summary:
    213   //   Modifies the event mask for the file-descriptor, replacing
    214   //   the old event_mask with the new one specified here.
    215   //   If the file-descriptor specified is not registered in the
    216   //   epoll_server, then nothing happens as a result of this call.
    217   // Args:
    218   //   fd - the fd whose event mask should be modified.
    219   //   event_mask - the new event mask.
    220   virtual void ModifyCallback(int fd, int event_mask);
    221 
    222   ////////////////////////////////////////
    223 
    224   // Summary:
    225   //   Modifies the event mask for the file-descriptor such that we
    226   //   no longer request events when 'fd' is readable.
    227   //   If the file-descriptor specified is not registered in the
    228   //   epoll_server, then nothing happens as a result of this call.
    229   // Args:
    230   //   fd - the fd whose event mask should be modified.
    231   virtual void StopRead(int fd);
    232 
    233   ////////////////////////////////////////
    234 
    235   // Summary:
    236   //   Modifies the event mask for the file-descriptor such that we
    237   //   request events when 'fd' is readable.
    238   //   If the file-descriptor specified is not registered in the
    239   //   epoll_server, then nothing happens as a result of this call.
    240   // Args:
    241   //   fd - the fd whose event mask should be modified.
    242   virtual void StartRead(int fd);
    243 
    244   ////////////////////////////////////////
    245 
    246   // Summary:
    247   //   Modifies the event mask for the file-descriptor such that we
    248   //   no longer request events when 'fd' is writable.
    249   //   If the file-descriptor specified is not registered in the
    250   //   epoll_server, then nothing happens as a result of this call.
    251   // Args:
    252   //   fd - the fd whose event mask should be modified.
    253   virtual void StopWrite(int fd);
    254 
    255   ////////////////////////////////////////
    256 
    257   // Summary:
    258   //   Modifies the event mask for the file-descriptor such that we
    259   //   request events when 'fd' is writable.
    260   //   If the file-descriptor specified is not registered in the
    261   //   epoll_server, then nothing happens as a result of this call.
    262   // Args:
    263   //   fd - the fd whose event mask should be modified.
    264   virtual void StartWrite(int fd);
    265 
    266   ////////////////////////////////////////
    267 
    268   // Summary:
    269   //   Looks up the callback associated with the file-desriptor 'fd'.
    270   //   If a callback is associated with this file-descriptor, then
    271   //   it's OnEvent() method is called with the file-descriptor 'fd',
    272   //   and event_mask 'event_mask'
    273   //
    274   //   If no callback is registered for this file-descriptor, nothing
    275   //   will happen as a result of this call.
    276   //
    277   //   This function is used internally by the EpollServer, but is
    278   //   available publically so that events might be 'faked'. Calling
    279   //   this function with an fd and event_mask is equivalent (as far
    280   //   as the callback is concerned) to having a real event generated
    281   //   by epoll (except, of course, that read(), etc won't necessarily
    282   //   be able to read anything)
    283   // Args:
    284   //   fd - the file-descriptor on which an event has occured.
    285   //   event_mask - a bitmask representing the events which have occured
    286   //                on/for this fd. This bitmask is composed of
    287   //                POLLIN, POLLOUT, etc.
    288   //
    289   void HandleEvent(int fd, int event_mask);
    290 
    291   // Summary:
    292   //   Call this when you want the pollserver to
    293   //   wait for events and execute the callbacks associated with
    294   //   the file-descriptors on which those events have occured.
    295   //   Depending on the value of timeout_in_us_, this may or may
    296   //   not return immediately. Please reference the set_timeout()
    297   //   function for the specific behaviour.
    298   virtual void WaitForEventsAndExecuteCallbacks();
    299 
    300   // Summary:
    301   //   When an fd is registered to use edge trigger notification, the ready
    302   //   list can be used to simulate level trigger semantics. Edge trigger
    303   //   registration doesn't send an initial event, and only rising edge (going
    304   //   from blocked to unblocked) events are sent. A callback can put itself on
    305   //   the ready list by calling SetFDReady() after calling RegisterFD(). The
    306   //   OnEvent method of all callbacks associated with the fds on the ready
    307   //   list will be called immediately after processing the events returned by
    308   //   epoll_wait(). The fd is removed from the ready list before the
    309   //   callback's OnEvent() method is invoked. To stay on the ready list, the
    310   //   OnEvent() (or some function in that call chain) must call SetFDReady
    311   //   again. When a fd is unregistered using UnregisterFD(), the fd is
    312   //   automatically removed from the ready list.
    313   //
    314   //   When the callback for a edge triggered fd hits the falling edge (about
    315   //   to block, either because of it got an EAGAIN, or had a short read/write
    316   //   operation), it should remove itself from the ready list using
    317   //   SetFDNotReady() (since OnEvent cannot distinguish between invocation
    318   //   from the ready list vs from a normal epoll event). All four ready list
    319   //   methods are safe to be called  within the context of the callbacks.
    320   //
    321   //   Since the ready list invokes EpollCallbackInterface::OnEvent, only fds
    322   //   that are registered with the EpollServer will be put on the ready list.
    323   //   SetFDReady() and SetFDNotReady() will do nothing if the EpollServer
    324   //   doesn't know about the fd passed in.
    325   //
    326   //   Since the ready list cannot reliably determine proper set of events
    327   //   which should be sent to the callback, SetFDReady() requests the caller
    328   //   to provide the ready list with the event mask, which will be used later
    329   //   when OnEvent() is invoked by the ready list. Hence, the event_mask
    330   //   passedto SetFDReady() does not affect the actual epoll registration of
    331   //   the fd with the kernel. If a fd is already put on the ready list, and
    332   //   SetFDReady() is called again for that fd with a different event_mask,
    333   //   the event_mask will be updated.
    334   virtual void SetFDReady(int fd, int events_to_fake);
    335 
    336   virtual void SetFDNotReady(int fd);
    337 
    338   // Summary:
    339   //   IsFDReady(), ReadyListSize(), and VerifyReadyList are intended as
    340   //   debugging tools and for writing unit tests.
    341   //   ISFDReady() returns whether a fd is in the ready list.
    342   //   ReadyListSize() returns the number of fds on the ready list.
    343   //   VerifyReadyList() checks the consistency of internal data structure. It
    344   //   will CHECK if it finds an error.
    345   virtual bool IsFDReady(int fd) const;
    346 
    347   size_t ReadyListSize() const { return ready_list_size_; }
    348 
    349   void VerifyReadyList() const;
    350 
    351   ////////////////////////////////////////
    352 
    353   // Summary:
    354   //   Registers an alarm 'ac' to go off at time 'timeout_time_in_us'.
    355   //   If the callback returns a positive number from its OnAlarm() function,
    356   //   then the callback will be re-registered at that time, else the alarm
    357   //   owner is responsible for freeing up memory.
    358   //
    359   //   Important: A give AlarmCB* can not be registered again if it is already
    360   //    registered. If a user wants to register a callback again it should first
    361   //    unregister the previous callback before calling RegisterAlarm again.
    362   // Args:
    363   //   timeout_time_in_us - the absolute time at which the alarm should go off
    364   //   ac - the alarm which will be called.
    365   virtual void RegisterAlarm(int64 timeout_time_in_us, AlarmCB* ac);
    366 
    367   // Summary:
    368   //   Registers an alarm 'ac' to go off at time: (ApproximateNowInUs() +
    369   //   delta_in_us). While this is somewhat less accurate (see the description
    370   //   for ApproximateNowInUs() to see how 'approximate'), the error is never
    371   //   worse than the amount of time it takes to process all events in one
    372   //   WaitForEvents.  As with 'RegisterAlarm()', if the callback returns a
    373   //   positive number from its OnAlarm() function, then the callback will be
    374   //   re-registered at that time, else the alarm owner is responsible for
    375   //   freeing up memory.
    376   //   Note that this function is purely a convienence. The
    377   //   same thing may be accomplished by using RegisterAlarm with
    378   //   ApproximateNowInUs() directly.
    379   //
    380   //   Important: A give AlarmCB* can not be registered again if it is already
    381   //    registered. If a user wants to register a callback again it should first
    382   //    unregister the previous callback before calling RegisterAlarm again.
    383   // Args:
    384   //   delta_in_us - the delta in microseconds from the ApproximateTimeInUs() at
    385   //                 which point the alarm should go off.
    386   //   ac - the alarm which will be called.
    387   void RegisterAlarmApproximateDelta(int64 delta_in_us, AlarmCB* ac) {
    388     RegisterAlarm(ApproximateNowInUsec() + delta_in_us, ac);
    389   }
    390 
    391   ////////////////////////////////////////
    392 
    393   // Summary:
    394   //   Unregister  the alarm referred to by iterator_token; Callers should
    395   //   be warned that a token may have become already invalid when OnAlarm()
    396   //   is called, was unregistered, or OnShutdown was called on that alarm.
    397   // Args:
    398   //    iterator_token - iterator to the alarm callback to unregister.
    399   virtual void UnregisterAlarm(
    400       const EpollServer::AlarmRegToken& iterator_token);
    401 
    402   ////////////////////////////////////////
    403 
    404   // Summary:
    405   //   returns the number of file-descriptors registered in this EpollServer.
    406   // Returns:
    407   //   number of FDs registered (discounting the internal pipe used for Wake)
    408   virtual int NumFDsRegistered() const;
    409 
    410   // Summary:
    411   //   Force the epoll server to wake up (by writing to an internal pipe).
    412   virtual void Wake();
    413 
    414   // Summary:
    415   //   Wrapper around WallTimer's NowInUsec.  We do this so that we can test
    416   //   EpollServer without using the system clock (and can avoid the flakiness
    417   //   that would ensue)
    418   // Returns:
    419   //   the current time as number of microseconds since the Unix epoch.
    420   virtual int64 NowInUsec() const;
    421 
    422   // Summary:
    423   //   Since calling NowInUsec() many thousands of times per
    424   //   WaitForEventsAndExecuteCallbacks function call is, to say the least,
    425   //   inefficient, we allow users to use an approximate time instead. The
    426   //   time returned from this function is as accurate as NowInUsec() when
    427   //   WaitForEventsAndExecuteCallbacks is not an ancestor of the caller's
    428   //   callstack.
    429   //   However, when WaitForEventsAndExecuteCallbacks -is- an ancestor, then
    430   //   this function returns the time at which the
    431   //   WaitForEventsAndExecuteCallbacks function started to process events or
    432   //   alarms.
    433   //
    434   //   Essentially, this function makes available a fast and mostly accurate
    435   //   mechanism for getting the time for any function handling an event or
    436   //   alarm. When functions which are not handling callbacks or alarms call
    437   //   this function, they get the slow and "absolutely" accurate time.
    438   //
    439   //   Users should be encouraged to use this function.
    440   // Returns:
    441   //   the "approximate" current time as number of microseconds since the Unix
    442   //   epoch.
    443   virtual int64 ApproximateNowInUsec() const {
    444     if (recorded_now_in_us_ != 0) {
    445       return recorded_now_in_us_;
    446     }
    447     return this->NowInUsec();
    448   }
    449 
    450   static std::string EventMaskToString(int event_mask);
    451 
    452   // Summary:
    453   //   Logs the state of the epoll server with LOG(ERROR).
    454   void LogStateOnCrash();
    455 
    456   // Summary:
    457   //   Set the timeout to the value specified.
    458   //   If the timeout is set to a negative number,
    459   //      WaitForEventsAndExecuteCallbacks() will only return when an event has
    460   //      occured
    461   //   If the timeout is set to zero,
    462   //      WaitForEventsAndExecuteCallbacks() will return immediately
    463   //   If the timeout is set to a positive number,
    464   //      WaitForEventsAndExecuteCallbacks() will return when an event has
    465   //      occured, or when timeout_in_us microseconds has elapsed, whichever
    466   //      is first.
    467   //  Args:
    468   //    timeout_in_us - value specified depending on behaviour desired.
    469   //                    See above.
    470   void set_timeout_in_us(int64 timeout_in_us) {
    471     timeout_in_us_ = timeout_in_us;
    472   }
    473 
    474   ////////////////////////////////////////
    475 
    476   // Summary:
    477   //   Accessor for the current value of timeout_in_us.
    478   int timeout_in_us() const { return timeout_in_us_; }
    479 
    480   // Summary:
    481   // Returns true when the EpollServer() is being destroyed.
    482   bool in_shutdown() const { return in_shutdown_; }
    483 
    484   bool ContainsAlarm(EpollAlarmCallbackInterface* alarm) const {
    485     return all_alarms_.find(alarm) != all_alarms_.end();
    486   }
    487 
    488   // Summary:
    489   //   A function for implementing the ready list. It invokes OnEvent for each
    490   //   of the fd in the ready list, and takes care of adding them back to the
    491   //   ready list if the callback requests it (by checking that out_ready_mask
    492   //   is non-zero).
    493   void CallReadyListCallbacks();
    494 
    495   // Granularity at which time moves when considering what alarms are on.
    496   // See function: DoRoundingOnNow() on exact usage.
    497   static const int kMinimumEffectiveAlarmQuantum;
    498  protected:
    499 
    500   // These have to be in the .h file so that we can override them in tests.
    501   virtual inline int GetFlags(int fd) { return fcntl(fd, F_GETFL, 0); }
    502   inline int SetFlags(int fd, int flags) {
    503     return fcntl(fd, F_SETFL, flags | O_NONBLOCK);
    504   }
    505 
    506   virtual void SetNonblocking (int fd);
    507 
    508   // This exists here so that we can override this function in unittests
    509   // in order to make effective mock EpollServer objects.
    510   virtual int epoll_wait_impl(int epfd,
    511                               struct epoll_event* events,
    512                               int max_events,
    513                               int timeout_in_ms) {
    514     return epoll_wait(epfd, events, max_events, timeout_in_ms);
    515   }
    516 
    517   // this struct is used internally, and is never used by anything external
    518   // to this class. Some of its members are declared mutable to get around the
    519   // restriction imposed by hash_set. Since hash_set knows nothing about the
    520   // objects it stores, it has to assume that every bit of the object is used
    521   // in the hash function and equal_to comparison. Thus hash_set::iterator is a
    522   // const iterator. In this case, the only thing that must stay constant is
    523   // fd. Everything else are just along for the ride and changing them doesn't
    524   // compromise the hash_set integrity.
    525   struct CBAndEventMask {
    526     CBAndEventMask()
    527         : cb(NULL),
    528           fd(-1),
    529           event_mask(0),
    530           events_asserted(0),
    531           events_to_fake(0),
    532           in_use(false) {
    533       entry.le_next = NULL;
    534       entry.le_prev = NULL;
    535     }
    536 
    537     CBAndEventMask(EpollCallbackInterface* cb,
    538                    int event_mask,
    539                    int fd)
    540         : cb(cb), fd(fd), event_mask(event_mask), events_asserted(0),
    541           events_to_fake(0), in_use(false) {
    542       entry.le_next = NULL;
    543       entry.le_prev = NULL;
    544     }
    545 
    546     // Required operator for hash_set. Normally operator== should be a free
    547     // standing function. However, since CBAndEventMask is a protected type and
    548     // it will never be a base class, it makes no difference.
    549     bool operator==(const CBAndEventMask& cb_and_mask) const {
    550       return fd == cb_and_mask.fd;
    551     }
    552     // A callback. If the fd is unregistered inside the callchain of OnEvent,
    553     // the cb will be set to NULL.
    554     mutable EpollCallbackInterface* cb;
    555 
    556     mutable LIST_ENTRY(CBAndEventMask) entry;
    557     // file descriptor registered with the epoll server.
    558     int fd;
    559     // the current event_mask registered for this callback.
    560     mutable int event_mask;
    561     // the event_mask that was returned by epoll
    562     mutable int events_asserted;
    563     // the event_mask for the ready list to use to call OnEvent.
    564     mutable int events_to_fake;
    565     // toggle around calls to OnEvent to tell UnregisterFD to not erase the
    566     // iterator because HandleEvent is using it.
    567     mutable bool in_use;
    568   };
    569 
    570   // Custom hash function to be used by hash_set.
    571   struct CBAndEventMaskHash {
    572     size_t operator()(const CBAndEventMask& cb_and_eventmask) const {
    573       return static_cast<size_t>(cb_and_eventmask.fd);
    574     }
    575   };
    576 
    577   typedef __gnu_cxx::hash_set<CBAndEventMask, CBAndEventMaskHash> FDToCBMap;
    578 
    579   // the following four functions are OS-specific, and are likely
    580   // to be changed in a subclass if the poll/select method is changed
    581   // from epoll.
    582 
    583   // Summary:
    584   //   Deletes a file-descriptor from the set of FDs that should be
    585   //   monitored with epoll.
    586   //   Note that this only deals with modifying data relating -directly-
    587   //   with the epoll call-- it does not modify any data within the
    588   //   epoll_server.
    589   // Args:
    590   //   fd - the file descriptor to-be-removed from the monitoring set
    591   virtual void DelFD(int fd) const;
    592 
    593   ////////////////////////////////////////
    594 
    595   // Summary:
    596   //   Adds a file-descriptor to the set of FDs that should be
    597   //   monitored with epoll.
    598   //   Note that this only deals with modifying data relating -directly-
    599   //   with the epoll call.
    600   // Args:
    601   //   fd - the file descriptor to-be-added to the monitoring set
    602   //   event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
    603   //                 OR'd together) which will be associated with this
    604   //                 FD initially.
    605   virtual void AddFD(int fd, int event_mask) const;
    606 
    607   ////////////////////////////////////////
    608 
    609   // Summary:
    610   //   Modifies a file-descriptor in the set of FDs that should be
    611   //   monitored with epoll.
    612   //   Note that this only deals with modifying data relating -directly-
    613   //   with the epoll call.
    614   // Args:
    615   //   fd - the file descriptor to-be-added to the monitoring set
    616   //   event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc
    617   //                 OR'd together) which will be associated with this
    618   //                 FD after this call.
    619   virtual void ModFD(int fd, int event_mask) const;
    620 
    621   ////////////////////////////////////////
    622 
    623   // Summary:
    624   //   Modified the event mask associated with an FD in the set of
    625   //   data needed by epoll.
    626   //   Events are removed before they are added, thus, if ~0 is put
    627   //   in 'remove_event', whatever is put in 'add_event' will be
    628   //   the new event mask.
    629   //   If the file-descriptor specified is not registered in the
    630   //   epoll_server, then nothing happens as a result of this call.
    631   // Args:
    632   //   fd - the file descriptor whose event mask is to be modified
    633   //   remove_event - the events which are to be removed from the current
    634   //                  event_mask
    635   //   add_event - the events which are to be added to the current event_mask
    636   //
    637   //
    638   virtual void ModifyFD(int fd, int remove_event, int add_event);
    639 
    640   ////////////////////////////////////////
    641 
    642   // Summary:
    643   //   Waits for events, and calls HandleEvents() for each
    644   //   fd, event pair discovered to possibly have an event.
    645   //   Note that a callback (B) may get a spurious event if
    646   //   another callback (A) has closed a file-descriptor N, and
    647   //   the callback (B) has a newly opened file-descriptor, which
    648   //   also happens to be N.
    649   virtual void WaitForEventsAndCallHandleEvents(int64 timeout_in_us,
    650                                                 struct epoll_event events[],
    651                                                 int events_size);
    652 
    653 
    654 
    655   // Summary:
    656   //   An internal function for implementing the ready list. It adds a fd's
    657   //   CBAndEventMask to the ready list. If the fd is already on the ready
    658   //   list, it is a no-op.
    659   void AddToReadyList(CBAndEventMask* cb_and_mask);
    660 
    661   // Summary:
    662   //   An internal function for implementing the ready list. It remove a fd's
    663   //   CBAndEventMask from the ready list. If the fd is not on the ready list,
    664   //   it is a no-op.
    665   void RemoveFromReadyList(const CBAndEventMask& cb_and_mask);
    666 
    667   // Summary:
    668   // Calls any pending alarms that should go off and reregisters them if they
    669   // were recurring.
    670   virtual void CallAndReregisterAlarmEvents();
    671 
    672   // The file-descriptor created for epolling
    673   int epoll_fd_;
    674 
    675   // The mapping of file-descriptor to CBAndEventMasks
    676   FDToCBMap cb_map_;
    677 
    678   // Custom hash function to be used by hash_set.
    679   struct AlarmCBHash {
    680     size_t operator()(AlarmCB*const& p) const {
    681       return reinterpret_cast<size_t>(p);
    682     }
    683   };
    684 
    685 
    686   // TOOD(sushantj): Having this hash_set is avoidable. We currently have it
    687   // only so that we can enforce stringent checks that a caller can not register
    688   // the same alarm twice. One option is to have an implementation in which
    689   // this hash_set is used only in the debug mode.
    690   typedef __gnu_cxx::hash_set<AlarmCB*, AlarmCBHash> AlarmCBMap;
    691   AlarmCBMap all_alarms_;
    692 
    693   TimeToAlarmCBMap alarm_map_;
    694 
    695   // The amount of time in microseconds that we'll wait before returning
    696   // from the WaitForEventsAndExecuteCallbacks() function.
    697   // If this is positive, wait that many microseconds.
    698   // If this is negative, wait forever, or for the first event that occurs
    699   // If this is zero, never wait for an event.
    700   int64 timeout_in_us_;
    701 
    702   // This is nonzero only after the invocation of epoll_wait_impl within
    703   // WaitForEventsAndCallHandleEvents and before the function
    704   // WaitForEventsAndExecuteCallbacks returns.  At all other times, this is
    705   // zero. This enables us to have relatively accurate time returned from the
    706   // ApproximateNowInUs() function. See that function for more details.
    707   int64 recorded_now_in_us_;
    708 
    709   // This is used to implement CallAndReregisterAlarmEvents. This stores
    710   // all alarms that were reregistered because OnAlarm() returned a
    711   // value > 0 and the time at which they should be executed is less that
    712   // the current time.  By storing such alarms in this map we ensure
    713   // that while calling CallAndReregisterAlarmEvents we do not call
    714   // OnAlarm on any alarm in this set. This ensures that we do not
    715   // go in an infinite loop.
    716   AlarmCBMap alarms_reregistered_and_should_be_skipped_;
    717 
    718   LIST_HEAD(ReadyList, CBAndEventMask) ready_list_;
    719   LIST_HEAD(TmpList, CBAndEventMask) tmp_list_;
    720   int ready_list_size_;
    721   // TODO(alyssar): make this into something that scales up.
    722   static const int events_size_ = 256;
    723   struct epoll_event events_[256];
    724 
    725   // These controls the granularity for alarms
    726   // See function CallAndReregisterAlarmEvents()
    727   // TODO(sushantj): Add test for this.
    728   int64 DoRoundingOnNow(int64 now_in_us) const;
    729 
    730 #ifdef EPOLL_SERVER_EVENT_TRACING
    731   struct EventRecorder {
    732    public:
    733     EventRecorder() : num_records_(0), record_threshold_(10000) {}
    734 
    735     ~EventRecorder() {
    736       Clear();
    737     }
    738 
    739     // When a number of events equals the record threshold,
    740     // the collected data summary for all FDs will be written
    741     // to LOG(INFO). Note that this does not include the
    742     // individual events (if you'reinterested in those, you'll
    743     // have to get at them programmatically).
    744     // After any such flushing to LOG(INFO) all events will
    745     // be cleared.
    746     // Note that the definition of an 'event' is a bit 'hazy',
    747     // as it includes the 'Unregistration' event, and perhaps
    748     // others.
    749     void set_record_threshold(int64 new_threshold) {
    750       record_threshold_ = new_threshold;
    751     }
    752 
    753     void Clear() {
    754       for (int i = 0; i < debug_events_.size(); ++i) {
    755         delete debug_events_[i];
    756       }
    757       debug_events_.clear();
    758       unregistered_fds_.clear();
    759       event_counts_.clear();
    760     }
    761 
    762     void MaybeRecordAndClear() {
    763       ++num_records_;
    764       if ((num_records_ > record_threshold_) &&
    765           (record_threshold_ > 0)) {
    766         LOG(INFO) << "\n" << *this;
    767         num_records_ = 0;
    768         Clear();
    769       }
    770     }
    771 
    772     void RecordFDMaskEvent(int fd, int mask, const char* function) {
    773       FDMaskOutput* fdmo = new FDMaskOutput(fd, mask, function);
    774       debug_events_.push_back(fdmo);
    775       MaybeRecordAndClear();
    776     }
    777 
    778     void RecordEpollWaitEvent(int timeout_in_ms,
    779                               int num_events_generated) {
    780       EpollWaitOutput* ewo = new EpollWaitOutput(timeout_in_ms,
    781                                                   num_events_generated);
    782       debug_events_.push_back(ewo);
    783       MaybeRecordAndClear();
    784     }
    785 
    786     void RecordEpollEvent(int fd, int event_mask) {
    787       Events& events_for_fd = event_counts_[fd];
    788       events_for_fd.AssignFromMask(event_mask);
    789       MaybeRecordAndClear();
    790     }
    791 
    792     friend ostream& operator<<(ostream& os, const EventRecorder& er) {
    793       for (int i = 0; i < er.unregistered_fds_.size(); ++i) {
    794         os << "fd: " << er.unregistered_fds_[i] << "\n";
    795         os << er.unregistered_fds_[i];
    796       }
    797       for (EventCountsMap::const_iterator i = er.event_counts_.begin();
    798            i != er.event_counts_.end();
    799            ++i) {
    800         os << "fd: " << i->first << "\n";
    801         os << i->second;
    802       }
    803       for (int i = 0; i < er.debug_events_.size(); ++i) {
    804         os << *(er.debug_events_[i]) << "\n";
    805       }
    806       return os;
    807     }
    808 
    809     void RecordUnregistration(int fd) {
    810       EventCountsMap::iterator i = event_counts_.find(fd);
    811       if (i != event_counts_.end()) {
    812         unregistered_fds_.push_back(i->second);
    813         event_counts_.erase(i);
    814       }
    815       MaybeRecordAndClear();
    816     }
    817 
    818    protected:
    819     class DebugOutput {
    820      public:
    821       friend ostream& operator<<(ostream& os, const DebugOutput& debug_output) {
    822         debug_output.OutputToStream(os);
    823         return os;
    824       }
    825       virtual void OutputToStream(ostream* os) const = 0;
    826       virtual ~DebugOutput() {}
    827     };
    828 
    829     class FDMaskOutput : public DebugOutput {
    830      public:
    831       FDMaskOutput(int fd, int mask, const char* function) :
    832           fd_(fd), mask_(mask), function_(function) {}
    833       virtual void OutputToStream(ostream* os) const {
    834         (*os) << "func: " << function_
    835               << "\tfd: " << fd_;
    836         if (mask_ != 0) {
    837            (*os) << "\tmask: " << EventMaskToString(mask_);
    838         }
    839       }
    840       int fd_;
    841       int mask_;
    842       const char* function_;
    843     };
    844 
    845     class EpollWaitOutput : public DebugOutput {
    846      public:
    847       EpollWaitOutput(int timeout_in_ms,
    848                       int num_events_generated) :
    849           timeout_in_ms_(timeout_in_ms),
    850           num_events_generated_(num_events_generated) {}
    851       virtual void OutputToStream(ostream* os) const {
    852         (*os) << "timeout_in_ms: " << timeout_in_ms_
    853               << "\tnum_events_generated: " << num_events_generated_;
    854       }
    855      protected:
    856       int timeout_in_ms_;
    857       int num_events_generated_;
    858     };
    859 
    860     struct Events {
    861       Events() :
    862           epoll_in(0),
    863           epoll_pri(0),
    864           epoll_out(0),
    865           epoll_rdnorm(0),
    866           epoll_rdband(0),
    867           epoll_wrnorm(0),
    868           epoll_wrband(0),
    869           epoll_msg(0),
    870           epoll_err(0),
    871           epoll_hup(0),
    872           epoll_oneshot(0),
    873           epoll_et(0) {}
    874 
    875       void AssignFromMask(int event_mask) {
    876         if (event_mask & EPOLLIN) ++epoll_in;
    877         if (event_mask & EPOLLPRI) ++epoll_pri;
    878         if (event_mask & EPOLLOUT) ++epoll_out;
    879         if (event_mask & EPOLLRDNORM) ++epoll_rdnorm;
    880         if (event_mask & EPOLLRDBAND) ++epoll_rdband;
    881         if (event_mask & EPOLLWRNORM) ++epoll_wrnorm;
    882         if (event_mask & EPOLLWRBAND) ++epoll_wrband;
    883         if (event_mask & EPOLLMSG) ++epoll_msg;
    884         if (event_mask & EPOLLERR) ++epoll_err;
    885         if (event_mask & EPOLLHUP) ++epoll_hup;
    886         if (event_mask & EPOLLONESHOT) ++epoll_oneshot;
    887         if (event_mask & EPOLLET) ++epoll_et;
    888       };
    889 
    890       friend ostream& operator<<(ostream& os, const Events& ev) {
    891         if (ev.epoll_in) {
    892           os << "\t      EPOLLIN: " << ev.epoll_in << "\n";
    893         }
    894         if (ev.epoll_pri) {
    895           os << "\t     EPOLLPRI: " << ev.epoll_pri << "\n";
    896         }
    897         if (ev.epoll_out) {
    898           os << "\t     EPOLLOUT: " << ev.epoll_out << "\n";
    899         }
    900         if (ev.epoll_rdnorm) {
    901           os << "\t  EPOLLRDNORM: " << ev.epoll_rdnorm << "\n";
    902         }
    903         if (ev.epoll_rdband) {
    904           os << "\t  EPOLLRDBAND: " << ev.epoll_rdband << "\n";
    905         }
    906         if (ev.epoll_wrnorm) {
    907           os << "\t  EPOLLWRNORM: " << ev.epoll_wrnorm << "\n";
    908         }
    909         if (ev.epoll_wrband) {
    910           os << "\t  EPOLLWRBAND: " << ev.epoll_wrband << "\n";
    911         }
    912         if (ev.epoll_msg) {
    913           os << "\t     EPOLLMSG: " << ev.epoll_msg << "\n";
    914         }
    915         if (ev.epoll_err) {
    916           os << "\t     EPOLLERR: " << ev.epoll_err << "\n";
    917         }
    918         if (ev.epoll_hup) {
    919           os << "\t     EPOLLHUP: " << ev.epoll_hup << "\n";
    920         }
    921         if (ev.epoll_oneshot) {
    922           os << "\t EPOLLONESHOT: " << ev.epoll_oneshot << "\n";
    923         }
    924         if (ev.epoll_et) {
    925           os << "\t      EPOLLET: " << ev.epoll_et << "\n";
    926         }
    927         return os;
    928       }
    929 
    930       unsigned int epoll_in;
    931       unsigned int epoll_pri;
    932       unsigned int epoll_out;
    933       unsigned int epoll_rdnorm;
    934       unsigned int epoll_rdband;
    935       unsigned int epoll_wrnorm;
    936       unsigned int epoll_wrband;
    937       unsigned int epoll_msg;
    938       unsigned int epoll_err;
    939       unsigned int epoll_hup;
    940       unsigned int epoll_oneshot;
    941       unsigned int epoll_et;
    942     };
    943 
    944     std::vector<DebugOutput*> debug_events_;
    945     std::vector<Events> unregistered_fds_;
    946     typedef __gnu_cxx::hash_map<int, Events> EventCountsMap;
    947     EventCountsMap event_counts_;
    948     int64 num_records_;
    949     int64 record_threshold_;
    950   };
    951 
    952   void ClearEventRecords() {
    953     event_recorder_.Clear();
    954   }
    955   void WriteEventRecords(ostream* os) const {
    956     (*os) << event_recorder_;
    957   }
    958 
    959   mutable EventRecorder event_recorder_;
    960 
    961 #endif
    962 
    963  private:
    964   // Helper functions used in the destructor.
    965   void CleanupFDToCBMap();
    966   void CleanupTimeToAlarmCBMap();
    967 
    968   // The callback registered to the fds below.  As the purpose of their
    969   // registration is to wake the epoll server it just clears the pipe and
    970   // returns.
    971   scoped_ptr<ReadPipeCallback> wake_cb_;
    972 
    973   // A pipe owned by the epoll server.  The server will be registered to listen
    974   // on read_fd_ and can be woken by Wake() which writes to write_fd_.
    975   int read_fd_;
    976   int write_fd_;
    977 
    978   // This boolean is checked to see if it is false at the top of the
    979   // WaitForEventsAndExecuteCallbacks function. If not, then it either returns
    980   // without doing work, and logs to ERROR, or aborts the program (in
    981   // DEBUG mode). If so, then it sets the bool to true, does work, and
    982   // sets it back to false when done. This catches unwanted recursion.
    983   bool in_wait_for_events_and_execute_callbacks_;
    984 
    985   // Returns true when the EpollServer() is being destroyed.
    986   bool in_shutdown_;
    987 
    988   DISALLOW_COPY_AND_ASSIGN(EpollServer);
    989 };
    990 
    991 class EpollAlarmCallbackInterface {
    992  public:
    993   // Summary:
    994   //   Called when an alarm times out. Invalidates an AlarmRegToken.
    995   //   WARNING: If a token was saved to refer to an alarm callback, OnAlarm must
    996   //   delete it, as the reference is no longer valid.
    997   // Returns:
    998   //   the unix time (in microseconds) at which this alarm should be signaled
    999   //   again, or 0 if the alarm should be removed.
   1000   virtual int64 OnAlarm() = 0;
   1001 
   1002   // Summary:
   1003   //   Called when the an alarm is registered. Invalidates an AlarmRegToken.
   1004   // Args:
   1005   //   token: the iterator to the the alarm registered in the alarm map.
   1006   //   WARNING: this token becomes invalid when the alarm fires, is
   1007   //   unregistered, or OnShutdown is called on that alarm.
   1008   //   eps: the epoll server the alarm is registered with.
   1009   virtual void OnRegistration(const EpollServer::AlarmRegToken& token,
   1010                               EpollServer* eps) = 0;
   1011 
   1012   // Summary:
   1013   //   Called when the an alarm is unregistered.
   1014   //   WARNING: It is not valid to unregister a callback and then use the token
   1015   //   that was saved to refer to the callback.
   1016   virtual void OnUnregistration() = 0;
   1017 
   1018   // Summary:
   1019   //   Called when the epoll server is shutting down.
   1020   //   Invalidates the AlarmRegToken that was given when this alarm was
   1021   //   registered.
   1022   virtual void OnShutdown(EpollServer* eps) = 0;
   1023 
   1024   virtual ~EpollAlarmCallbackInterface() {}
   1025 
   1026  protected:
   1027   EpollAlarmCallbackInterface() {}
   1028 };
   1029 
   1030 // A simple alarm which unregisters itself on destruction.
   1031 //
   1032 // PLEASE NOTE:
   1033 // Any classes overriding these functions must either call the implementation
   1034 // of the parent class, or is must otherwise make sure that the 'registered_'
   1035 // boolean and the token, 'token_', are updated appropriately.
   1036 class EpollAlarm : public EpollAlarmCallbackInterface {
   1037  public:
   1038   EpollAlarm();
   1039 
   1040   virtual ~EpollAlarm();
   1041 
   1042   // Marks the alarm as unregistered and returns 0.  The return value may be
   1043   // safely ignored by subclasses.
   1044   virtual int64 OnAlarm();
   1045 
   1046   // Marks the alarm as registered, and stores the token.
   1047   virtual void OnRegistration(const EpollServer::AlarmRegToken& token,
   1048                               EpollServer* eps);
   1049 
   1050   // Marks the alarm as unregistered.
   1051   virtual void OnUnregistration();
   1052 
   1053   // Marks the alarm as unregistered.
   1054   virtual void OnShutdown(EpollServer* eps);
   1055 
   1056   // If the alarm was registered, unregister it.
   1057   void UnregisterIfRegistered();
   1058 
   1059   bool registered() const { return registered_; }
   1060 
   1061   const EpollServer* eps() const { return eps_; }
   1062 
   1063  private:
   1064   EpollServer::AlarmRegToken token_;
   1065   EpollServer* eps_;
   1066   bool registered_;
   1067 };
   1068 
   1069 }  // namespace net
   1070 
   1071 #endif  // NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_
   1072 
   1073