Home | History | Annotate | Download | only in flip_server
      1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/tools/flip_server/epoll_server.h"
      6 
      7 #include <stdlib.h>  // for abort
      8 #include <errno.h>    // for errno and strerror_r
      9 #include <algorithm>
     10 #include <iostream>
     11 #include <utility>
     12 #include <vector>
     13 
     14 #include "base/logging.h"
     15 #include "base/timer.h"
     16 
     17 // Design notes: An efficient implementation of ready list has the following
     18 // desirable properties:
     19 //
     20 // A. O(1) insertion into/removal from the list in any location.
     21 // B. Once the callback is found by hash lookup using the fd, the lookup of
     22 //    corresponding entry in the list is O(1).
     23 // C. Safe insertion into/removal from the list during list iteration. (The
     24 //    ready list's purpose is to enable completely event driven I/O model.
     25 //    Thus, all the interesting bits happen in the callback. It is critical
     26 //    to not place any restriction on the API during list iteration.
     27 //
     28 // The current implementation achieves these goals with the following design:
     29 //
     30 // - The ready list is constructed as a doubly linked list to enable O(1)
     31 //   insertion/removal (see man 3 queue).
     32 // - The forward and backward links are directly embedded inside the
     33 //   CBAndEventMask struct. This enables O(1) lookup in the list for a given
     34 //   callback. (Techincally, we could've used std::list of hash_set::iterator,
     35 //   and keep a list::iterator in CBAndEventMask to achieve the same effect.
     36 //   However, iterators have two problems: no way to portably invalidate them,
     37 //   and no way to tell whether an iterator is singular or not. The only way to
     38 //   overcome these issues is to keep bools in both places, but that throws off
     39 //   memory alignment (up to 7 wasted bytes for each bool). The extra level of
     40 //   indirection will also likely be less cache friendly. Direct manipulation
     41 //   of link pointers makes it easier to retrieve the CBAndEventMask from the
     42 //   list, easier to check whether an CBAndEventMask is in the list, uses less
     43 //   memory (save 32 bytes/fd), and does not affect cache usage (we need to
     44 //   read in the struct to use the callback anyway).)
     45 // - Embed the fd directly into CBAndEventMask and switch to using hash_set.
     46 //   This removes the need to store hash_map::iterator in the list just so that
     47 //   we can get both the fd and the callback.
     48 // - The ready list is "one shot": each entry is removed before OnEvent is
     49 //   called. This removes the mutation-while-iterating problem.
     50 // - Use two lists to keep track of callbacks. The ready_list_ is the one used
     51 //   for registration. Before iteration, the ready_list_ is swapped into the
     52 //   tmp_list_. Once iteration is done, tmp_list_ will be empty, and
     53 //   ready_list_ will have all the new ready fds.
     54 
     55 // The size we use for buffers passed to strerror_r
     56 static const int kErrorBufferSize = 256;
     57 
     58 namespace net {
     59 
     60 // Clears the pipe and returns.  Used for waking the epoll server up.
     61 class ReadPipeCallback : public EpollCallbackInterface {
     62  public:
     63   void OnEvent(int fd, EpollEvent* event) {
     64     DCHECK(event->in_events == EPOLLIN);
     65     int data;
     66     int data_read = 1;
     67     // Read until the pipe is empty.
     68     while (data_read > 0) {
     69       data_read = read(fd, &data, sizeof(data));
     70     }
     71   }
     72   void OnShutdown(EpollServer *eps, int fd) {}
     73   void OnRegistration(EpollServer*, int, int) {}
     74   void OnModification(int, int) {}       // COV_NF_LINE
     75   void OnUnregistration(int, bool) {}    // COV_NF_LINE
     76 };
     77 
     78 ////////////////////////////////////////////////////////////////////////////////
     79 ////////////////////////////////////////////////////////////////////////////////
     80 
     81 EpollServer::EpollServer()
     82   : epoll_fd_(epoll_create(1024)),
     83     timeout_in_us_(0),
     84     recorded_now_in_us_(0),
     85     ready_list_size_(0),
     86     wake_cb_(new ReadPipeCallback),
     87     read_fd_(-1),
     88     write_fd_(-1),
     89     in_wait_for_events_and_execute_callbacks_(false),
     90     in_shutdown_(false) {
     91   // ensure that the epoll_fd_ is valid.
     92   CHECK_NE(epoll_fd_, -1);
     93   LIST_INIT(&ready_list_);
     94   LIST_INIT(&tmp_list_);
     95 
     96   int pipe_fds[2];
     97   if (pipe(pipe_fds) < 0) {
     98     // Unfortunately, it is impossible to test any such initialization in
     99     // a constructor (as virtual methods do not yet work).
    100     // This -could- be solved by moving initialization to an outside
    101     // call...
    102     int saved_errno = errno;
    103     char buf[kErrorBufferSize];
    104     LOG(FATAL) << "Error " << saved_errno
    105                << " in pipe(): " << strerror_r(saved_errno, buf, sizeof(buf));
    106   }
    107   read_fd_ = pipe_fds[0];
    108   write_fd_ = pipe_fds[1];
    109   RegisterFD(read_fd_, wake_cb_.get(), EPOLLIN);
    110 }
    111 
    112 void EpollServer::CleanupFDToCBMap() {
    113   FDToCBMap::iterator cb_iter = cb_map_.begin();
    114   while (cb_iter != cb_map_.end()) {
    115     int fd = cb_iter->fd;
    116     CB* cb = cb_iter->cb;
    117 
    118     cb_iter->in_use = true;
    119     if (cb) {
    120       cb->OnShutdown(this, fd);
    121     }
    122 
    123     cb_map_.erase(cb_iter);
    124     cb_iter = cb_map_.begin();
    125   }
    126 }
    127 
    128 void EpollServer::CleanupTimeToAlarmCBMap() {
    129   TimeToAlarmCBMap::iterator erase_it;
    130 
    131   // Call OnShutdown() on alarms. Note that the structure of the loop
    132   // is similar to the structure of loop in the function HandleAlarms()
    133   for (TimeToAlarmCBMap::iterator i = alarm_map_.begin();
    134        i != alarm_map_.end();
    135       ) {
    136     // Note that OnShutdown() can call UnregisterAlarm() on
    137     // other iterators. OnShutdown() should not call UnregisterAlarm()
    138     // on self because by definition the iterator is not valid any more.
    139     i->second->OnShutdown(this);
    140     erase_it = i;
    141     ++i;
    142     alarm_map_.erase(erase_it);
    143   }
    144 }
    145 
    146 EpollServer::~EpollServer() {
    147   DCHECK_EQ(in_shutdown_, false);
    148   in_shutdown_ = true;
    149 #ifdef EPOLL_SERVER_EVENT_TRACING
    150   LOG(INFO) << "\n" << event_recorder_;
    151 #endif
    152   VLOG(2) << "Shutting down epoll server ";
    153   CleanupFDToCBMap();
    154 
    155   LIST_INIT(&ready_list_);
    156   LIST_INIT(&tmp_list_);
    157 
    158   CleanupTimeToAlarmCBMap();
    159 
    160   close(read_fd_);
    161   close(write_fd_);
    162   close(epoll_fd_);
    163 }
    164 
    165 // Whether a CBAandEventMask is on the ready list is determined by a non-NULL
    166 // le_prev pointer (le_next being NULL indicates end of list).
    167 inline void EpollServer::AddToReadyList(CBAndEventMask* cb_and_mask) {
    168   if (cb_and_mask->entry.le_prev == NULL) {
    169     LIST_INSERT_HEAD(&ready_list_, cb_and_mask, entry);
    170     ++ready_list_size_;
    171   }
    172 }
    173 
    174 inline void EpollServer::RemoveFromReadyList(
    175     const CBAndEventMask& cb_and_mask) {
    176   if (cb_and_mask.entry.le_prev != NULL) {
    177     LIST_REMOVE(&cb_and_mask, entry);
    178     // Clean up all the ready list states. Don't bother with the other fields
    179     // as they are initialized when the CBAandEventMask is added to the ready
    180     // list. This saves a few cycles in the inner loop.
    181     cb_and_mask.entry.le_prev = NULL;
    182     --ready_list_size_;
    183     if (ready_list_size_ == 0) {
    184       DCHECK(ready_list_.lh_first == NULL);
    185       DCHECK(tmp_list_.lh_first == NULL);
    186     }
    187   }
    188 }
    189 
    190 void EpollServer::RegisterFD(int fd, CB* cb, int event_mask) {
    191   CHECK(cb);
    192   VLOG(3) << "RegisterFD fd=" << fd << " event_mask=" << event_mask;
    193   FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd));
    194   if (cb_map_.end() != fd_i) {
    195     // do we just abort, or do we just unregister the other guy?
    196     // for now, lets just unregister the other guy.
    197 
    198     // unregister any callback that may already be registered for this FD.
    199     CB* other_cb = fd_i->cb;
    200     if (other_cb) {
    201       // Must remove from the ready list before erasing.
    202       RemoveFromReadyList(*fd_i);
    203       other_cb->OnUnregistration(fd, true);
    204       ModFD(fd, event_mask);
    205     } else {
    206       // already unregistered, so just recycle the node.
    207       AddFD(fd, event_mask);
    208     }
    209     fd_i->cb = cb;
    210     fd_i->event_mask = event_mask;
    211     fd_i->events_to_fake = 0;
    212   } else {
    213     AddFD(fd, event_mask);
    214     cb_map_.insert(CBAndEventMask(cb, event_mask, fd));
    215   }
    216 
    217 
    218   // set the FD to be non-blocking.
    219   SetNonblocking(fd);
    220 
    221   cb->OnRegistration(this, fd, event_mask);
    222 }
    223 
    224 int EpollServer::GetFlags(int fd) {
    225   return fcntl(fd, F_GETFL, 0);
    226 }
    227 
    228 void EpollServer::SetNonblocking(int fd) {
    229   int flags = GetFlags(fd);
    230   if (flags == -1) {
    231     int saved_errno = errno;
    232     char buf[kErrorBufferSize];
    233     LOG(FATAL) << "Error " << saved_errno
    234                << " doing fcntl(" << fd << ", F_GETFL, 0): "
    235                << strerror_r(saved_errno, buf, sizeof(buf));
    236   }
    237   if (!(flags & O_NONBLOCK)) {
    238     int saved_flags = flags;
    239     flags = SetFlags(fd, flags | O_NONBLOCK);
    240     if (flags == -1) {
    241       // bad.
    242       int saved_errno = errno;
    243       char buf[kErrorBufferSize];
    244       LOG(FATAL) << "Error " << saved_errno
    245         << " doing fcntl(" << fd << ", F_SETFL, " << saved_flags << "): "
    246         << strerror_r(saved_errno, buf, sizeof(buf));
    247     }
    248   }
    249 }
    250 
    251 int EpollServer::epoll_wait_impl(int epfd,
    252                                  struct epoll_event* events,
    253                                  int max_events,
    254                                  int timeout_in_ms) {
    255   return epoll_wait(epfd, events, max_events, timeout_in_ms);
    256 }
    257 
    258 void EpollServer::RegisterFDForWrite(int fd, CB* cb) {
    259   RegisterFD(fd, cb, EPOLLOUT);
    260 }
    261 
    262 void EpollServer::RegisterFDForReadWrite(int fd, CB* cb) {
    263   RegisterFD(fd, cb, EPOLLIN | EPOLLOUT);
    264 }
    265 
    266 void EpollServer::RegisterFDForRead(int fd, CB* cb) {
    267   RegisterFD(fd, cb, EPOLLIN);
    268 }
    269 
    270 void EpollServer::UnregisterFD(int fd) {
    271   FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd));
    272   if (cb_map_.end() == fd_i || fd_i->cb == NULL) {
    273     // Doesn't exist in server, or has gone through UnregisterFD once and still
    274     // inside the callchain of OnEvent.
    275     return;
    276   }
    277 #ifdef EPOLL_SERVER_EVENT_TRACING
    278   event_recorder_.RecordUnregistration(fd);
    279 #endif
    280   CB* cb = fd_i->cb;
    281   // Since the links are embedded within the struct, we must remove it from the
    282   // list before erasing it from the hash_set.
    283   RemoveFromReadyList(*fd_i);
    284   DelFD(fd);
    285   cb->OnUnregistration(fd, false);
    286   // fd_i->cb is NULL if that fd is unregistered inside the callchain of
    287   // OnEvent. Since the EpollServer needs a valid CBAndEventMask after OnEvent
    288   // returns in order to add it to the ready list, we cannot have UnregisterFD
    289   // erase the entry if it is in use. Thus, a NULL fd_i->cb is used as a
    290   // condition that tells the EpollServer that this entry is unused at a later
    291   // point.
    292   if (!fd_i->in_use) {
    293     cb_map_.erase(fd_i);
    294   } else {
    295     // Remove all trace of the registration, and just keep the node alive long
    296     // enough so the code that calls OnEvent doesn't have to worry about
    297     // figuring out whether the CBAndEventMask is valid or not.
    298     fd_i->cb = NULL;
    299     fd_i->event_mask = 0;
    300     fd_i->events_to_fake = 0;
    301   }
    302 }
    303 
    304 void EpollServer::ModifyCallback(int fd, int event_mask) {
    305   ModifyFD(fd, ~0, event_mask);
    306 }
    307 
    308 void EpollServer::StopRead(int fd) {
    309   ModifyFD(fd, EPOLLIN, 0);
    310 }
    311 
    312 void EpollServer::StartRead(int fd) {
    313   ModifyFD(fd, 0, EPOLLIN);
    314 }
    315 
    316 void EpollServer::StopWrite(int fd) {
    317   ModifyFD(fd, EPOLLOUT, 0);
    318 }
    319 
    320 void EpollServer::StartWrite(int fd) {
    321   ModifyFD(fd, 0, EPOLLOUT);
    322 }
    323 
    324 void EpollServer::HandleEvent(int fd, int event_mask) {
    325 #ifdef EPOLL_SERVER_EVENT_TRACING
    326   event_recorder_.RecordEpollEvent(fd, event_mask);
    327 #endif
    328   FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd));
    329   if (fd_i == cb_map_.end() || fd_i->cb == NULL) {
    330     // Ignore the event.
    331     // This could occur if epoll() returns a set of events, and
    332     // while processing event A (earlier) we removed the callback
    333     // for event B (and are now processing event B).
    334     return;
    335   }
    336   fd_i->events_asserted = event_mask;
    337   CBAndEventMask* cb_and_mask = const_cast<CBAndEventMask*>(&*fd_i);
    338   AddToReadyList(cb_and_mask);
    339 }
    340 
    341 class TrueFalseGuard {
    342  public:
    343   explicit TrueFalseGuard(bool* guarded_bool) : guarded_bool_(guarded_bool) {
    344     DCHECK(guarded_bool_ != NULL);
    345     DCHECK(*guarded_bool_ == false);
    346     *guarded_bool_ = true;
    347   }
    348   ~TrueFalseGuard() {
    349     *guarded_bool_ = false;
    350   }
    351  private:
    352   bool* guarded_bool_;
    353 };
    354 
    355 void EpollServer::WaitForEventsAndExecuteCallbacks() {
    356   if (in_wait_for_events_and_execute_callbacks_) {
    357     LOG(DFATAL) <<
    358       "Attempting to call WaitForEventsAndExecuteCallbacks"
    359       " when an ancestor to the current function is already"
    360       " WaitForEventsAndExecuteCallbacks!";
    361     // The line below is actually tested, but in coverage mode,
    362     // we never see it.
    363     return;  // COV_NF_LINE
    364   }
    365   TrueFalseGuard recursion_guard(&in_wait_for_events_and_execute_callbacks_);
    366   if (alarm_map_.empty()) {
    367     // no alarms, this is business as usual.
    368     WaitForEventsAndCallHandleEvents(timeout_in_us_,
    369                                      events_,
    370                                      events_size_);
    371     recorded_now_in_us_ = 0;
    372     return;
    373   }
    374 
    375   // store the 'now'. If we recomputed 'now' every iteration
    376   // down below, then we might never exit that loop-- any
    377   // long-running alarms might install other long-running
    378   // alarms, etc. By storing it here now, we ensure that
    379   // a more reasonable amount of work is done here.
    380   int64 now_in_us  = NowInUsec();
    381 
    382   // Get the first timeout from the alarm_map where it is
    383   // stored in absolute time.
    384   int64 next_alarm_time_in_us =  alarm_map_.begin()->first;
    385   VLOG(4) << "next_alarm_time = " << next_alarm_time_in_us
    386           << " now             = " << now_in_us
    387           << " timeout_in_us = " << timeout_in_us_;
    388 
    389   int64 wait_time_in_us;
    390   int64 alarm_timeout_in_us = next_alarm_time_in_us - now_in_us;
    391 
    392   // If the next alarm is sooner than the default timeout, or if there is no
    393   // timeout (timeout_in_us_ == -1), wake up when the alarm should fire.
    394   // Otherwise use the default timeout.
    395   if (alarm_timeout_in_us < timeout_in_us_ || timeout_in_us_ < 0) {
    396     wait_time_in_us = std::max(alarm_timeout_in_us, static_cast<int64>(0));
    397   } else {
    398     wait_time_in_us = timeout_in_us_;
    399   }
    400 
    401   VLOG(4) << "wait_time_in_us = " << wait_time_in_us;
    402 
    403   // wait for events.
    404 
    405   WaitForEventsAndCallHandleEvents(wait_time_in_us,
    406                                    events_,
    407                                    events_size_);
    408   CallAndReregisterAlarmEvents();
    409   recorded_now_in_us_ = 0;
    410 }
    411 
    412 void EpollServer::SetFDReady(int fd, int events_to_fake) {
    413   FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd));
    414   if (cb_map_.end() != fd_i && fd_i->cb != NULL) {
    415     // This const_cast is necessary for LIST_HEAD_INSERT to work. Declaring
    416     // entry mutable is insufficient because LIST_HEAD_INSERT assigns the
    417     // forward pointer of the list head to the current cb_and_mask, and the
    418     // compiler complains that it can't assign a const T* to a T*.
    419     CBAndEventMask* cb_and_mask = const_cast<CBAndEventMask*>(&*fd_i);
    420     // Note that there is no clearly correct behavior here when
    421     // cb_and_mask->events_to_fake != 0 and this function is called.
    422     // Of the two operations:
    423     //      cb_and_mask->events_to_fake = events_to_fake
    424     //      cb_and_mask->events_to_fake |= events_to_fake
    425     // the first was picked because it discourages users from calling
    426     // SetFDReady repeatedly to build up the correct event set as it is more
    427     // efficient to call SetFDReady once with the correct, final mask.
    428     cb_and_mask->events_to_fake = events_to_fake;
    429     AddToReadyList(cb_and_mask);
    430   }
    431 }
    432 
    433 void EpollServer::SetFDNotReady(int fd) {
    434   FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd));
    435   if (cb_map_.end() != fd_i) {
    436     RemoveFromReadyList(*fd_i);
    437   }
    438 }
    439 
    440 bool EpollServer::IsFDReady(int fd) const {
    441   FDToCBMap::const_iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd));
    442   return (cb_map_.end() != fd_i &&
    443           fd_i->cb != NULL &&
    444           fd_i->entry.le_prev != NULL);
    445 }
    446 
    447 void EpollServer::VerifyReadyList() const {
    448   int count = 0;
    449   CBAndEventMask* cur = ready_list_.lh_first;
    450   for (; cur; cur = cur->entry.le_next) {
    451     ++count;
    452   }
    453   for (cur = tmp_list_.lh_first; cur; cur = cur->entry.le_next) {
    454     ++count;
    455   }
    456   CHECK_EQ(ready_list_size_, count) << "Ready list size does not match count";
    457 }
    458 
    459 void EpollServer::RegisterAlarm(int64 timeout_time_in_us, AlarmCB* ac) {
    460   CHECK(ac);
    461   if (ContainsAlarm(ac)) {
    462     LOG(FATAL) << "Alarm already exists " << ac;
    463   }
    464   VLOG(4) << "RegisteringAlarm at : " << timeout_time_in_us;
    465 
    466   TimeToAlarmCBMap::iterator alarm_iter =
    467       alarm_map_.insert(std::make_pair(timeout_time_in_us, ac));
    468 
    469   all_alarms_.insert(ac);
    470   // Pass the iterator to the EpollAlarmCallbackInterface.
    471   ac->OnRegistration(alarm_iter, this);
    472 }
    473 
    474 // Unregister a specific alarm callback: iterator_token must be a
    475 //  valid iterator. The caller must ensure the validity of the iterator.
    476 void EpollServer::UnregisterAlarm(const AlarmRegToken& iterator_token) {
    477   AlarmCB* cb = iterator_token->second;
    478   alarm_map_.erase(iterator_token);
    479   all_alarms_.erase(cb);
    480   cb->OnUnregistration();
    481 }
    482 
    483 int EpollServer::NumFDsRegistered() const {
    484   DCHECK(cb_map_.size() >= 1);
    485   // Omit the internal FD (read_fd_)
    486   return cb_map_.size() - 1;
    487 }
    488 
    489 void EpollServer::Wake() {
    490   char data = 'd';  // 'd' is for data.  It's good enough for me.
    491   int rv = write(write_fd_, &data, 1);
    492   DCHECK(rv == 1);
    493 }
    494 
    495 int64 EpollServer::NowInUsec() const {
    496   return base::Time::Now().ToInternalValue();
    497 }
    498 
    499 int64 EpollServer::ApproximateNowInUsec() const {
    500   if (recorded_now_in_us_ != 0) {
    501     return recorded_now_in_us_;
    502   }
    503   return this->NowInUsec();
    504 }
    505 
    506 std::string EpollServer::EventMaskToString(int event_mask) {
    507   std::string s;
    508   if (event_mask & EPOLLIN) s += "EPOLLIN ";
    509   if (event_mask & EPOLLPRI) s += "EPOLLPRI ";
    510   if (event_mask & EPOLLOUT) s += "EPOLLOUT ";
    511   if (event_mask & EPOLLRDNORM) s += "EPOLLRDNORM ";
    512   if (event_mask & EPOLLRDBAND) s += "EPOLLRDBAND ";
    513   if (event_mask & EPOLLWRNORM) s += "EPOLLWRNORM ";
    514   if (event_mask & EPOLLWRBAND) s += "EPOLLWRBAND ";
    515   if (event_mask & EPOLLMSG) s += "EPOLLMSG ";
    516   if (event_mask & EPOLLERR) s += "EPOLLERR ";
    517   if (event_mask & EPOLLHUP) s += "EPOLLHUP ";
    518   if (event_mask & EPOLLONESHOT) s += "EPOLLONESHOT ";
    519   if (event_mask & EPOLLET) s += "EPOLLET ";
    520   return s;
    521 }
    522 
    523 void EpollServer::LogStateOnCrash() {
    524   LOG(ERROR) << "----------------------Epoll Server---------------------------";
    525   LOG(ERROR) << "Epoll server " << this << " polling on fd " << epoll_fd_;
    526   LOG(ERROR) << "timeout_in_us_: " << timeout_in_us_;
    527 
    528   // Log sessions with alarms.
    529   LOG(ERROR) << alarm_map_.size() << " alarms registered.";
    530   for (TimeToAlarmCBMap::iterator it = alarm_map_.begin();
    531        it != alarm_map_.end();
    532        ++it) {
    533     const bool skipped =
    534         alarms_reregistered_and_should_be_skipped_.find(it->second)
    535         != alarms_reregistered_and_should_be_skipped_.end();
    536     LOG(ERROR) << "Alarm " << it->second << " registered at time " << it->first
    537                << " and should be skipped = " << skipped;
    538   }
    539 
    540   LOG(ERROR) << cb_map_.size() << " fd callbacks registered.";
    541   for (FDToCBMap::iterator it = cb_map_.begin();
    542        it != cb_map_.end();
    543        ++it) {
    544     LOG(ERROR) << "fd: " << it->fd << " with mask " << it->event_mask
    545                << " registered with cb: " << it->cb;
    546   }
    547   LOG(ERROR) << "----------------------/Epoll Server--------------------------";
    548 }
    549 
    550 
    551 
    552 ////////////////////////////////////////////////////////////////////////////////
    553 ////////////////////////////////////////////////////////////////////////////////
    554 
    555 void EpollServer::DelFD(int fd) const {
    556   struct epoll_event ee;
    557   memset(&ee, 0, sizeof(ee));
    558 #ifdef EPOLL_SERVER_EVENT_TRACING
    559   event_recorder_.RecordFDMaskEvent(fd, 0, "DelFD");
    560 #endif
    561   if (epoll_ctl(epoll_fd_, EPOLL_CTL_DEL, fd, &ee)) {
    562     int saved_errno = errno;
    563     char buf[kErrorBufferSize];
    564     LOG(FATAL) << "Epoll set removal error for fd " << fd << ": "
    565                << strerror_r(saved_errno, buf, sizeof(buf));
    566   }
    567 }
    568 
    569 ////////////////////////////////////////
    570 
    571 void EpollServer::AddFD(int fd, int event_mask) const {
    572   struct epoll_event ee;
    573   memset(&ee, 0, sizeof(ee));
    574   ee.events = event_mask | EPOLLERR | EPOLLHUP;
    575   ee.data.fd = fd;
    576 #ifdef EPOLL_SERVER_EVENT_TRACING
    577   event_recorder_.RecordFDMaskEvent(fd, ee.events, "AddFD");
    578 #endif
    579   if (epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, fd, &ee)) {
    580     int saved_errno = errno;
    581     char buf[kErrorBufferSize];
    582     LOG(FATAL) << "Epoll set insertion error for fd " << fd << ": "
    583                << strerror_r(saved_errno, buf, sizeof(buf));
    584   }
    585 }
    586 
    587 ////////////////////////////////////////
    588 
    589 void EpollServer::ModFD(int fd, int event_mask) const {
    590   struct epoll_event ee;
    591   memset(&ee, 0, sizeof(ee));
    592   ee.events = event_mask | EPOLLERR | EPOLLHUP;
    593   ee.data.fd = fd;
    594 #ifdef EPOLL_SERVER_EVENT_TRACING
    595   event_recorder_.RecordFDMaskEvent(fd, ee.events, "ModFD");
    596 #endif
    597   VLOG(3) <<  "modifying fd= " << fd << " "
    598           << EventMaskToString(ee.events);
    599   if (epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, fd, &ee)) {
    600     int saved_errno = errno;
    601     char buf[kErrorBufferSize];
    602     LOG(FATAL) << "Epoll set modification error for fd " << fd << ": "
    603                << strerror_r(saved_errno, buf, sizeof(buf));
    604   }
    605 }
    606 
    607 ////////////////////////////////////////
    608 
    609 void EpollServer::ModifyFD(int fd, int remove_event, int add_event) {
    610   FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd));
    611   if (cb_map_.end() == fd_i) {
    612     VLOG(2) << "Didn't find the fd " << fd << "in internal structures";
    613     return;
    614   }
    615 
    616   if (fd_i->cb != NULL) {
    617     int & event_mask = fd_i->event_mask;
    618     VLOG(3) << "fd= " << fd
    619             << " event_mask before: " << EventMaskToString(event_mask);
    620     event_mask &= ~remove_event;
    621     event_mask |= add_event;
    622 
    623     VLOG(3) << " event_mask after: " << EventMaskToString(event_mask);
    624 
    625     ModFD(fd, event_mask);
    626 
    627     fd_i->cb->OnModification(fd, event_mask);
    628   }
    629 }
    630 
    631 void EpollServer::WaitForEventsAndCallHandleEvents(int64 timeout_in_us,
    632                                                    struct epoll_event events[],
    633                                                    int events_size) {
    634   if (timeout_in_us == 0 || ready_list_.lh_first != NULL) {
    635     // If ready list is not empty, then don't sleep at all.
    636     timeout_in_us = 0;
    637   } else if (timeout_in_us < 0) {
    638     LOG(INFO) << "Negative epoll timeout: " << timeout_in_us
    639               << "us; epoll will wait forever for events.";
    640     // If timeout_in_us is < 0 we are supposed to Wait forever.  This means we
    641     // should set timeout_in_us to -1000 so we will
    642     // Wait(-1000/1000) == Wait(-1) == Wait forever.
    643     timeout_in_us = -1000;
    644   } else {
    645     // If timeout is specified, and the ready list is empty.
    646     if (timeout_in_us < 1000) {
    647       timeout_in_us = 1000;
    648     }
    649   }
    650   const int timeout_in_ms = timeout_in_us / 1000;
    651   int nfds = epoll_wait_impl(epoll_fd_,
    652                              events,
    653                              events_size,
    654                              timeout_in_ms);
    655   VLOG(3) << "nfds=" << nfds;
    656 
    657 #ifdef EPOLL_SERVER_EVENT_TRACING
    658   event_recorder_.RecordEpollWaitEvent(timeout_in_ms, nfds);
    659 #endif
    660 
    661   // If you're wondering why the NowInUsec() is recorded here, the answer is
    662   // simple: If we did it before the epoll_wait_impl, then the max error for
    663   // the ApproximateNowInUs() call would be as large as the maximum length of
    664   // epoll_wait, which can be arbitrarily long. Since this would make
    665   // ApproximateNowInUs() worthless, we instead record the time -after- we've
    666   // done epoll_wait, which guarantees that the maximum error is the amount of
    667   // time it takes to process all the events generated by epoll_wait.
    668   recorded_now_in_us_ = NowInUsec();
    669   if (nfds > 0) {
    670     for (int i = 0; i < nfds; ++i) {
    671       int event_mask = events[i].events;
    672       int fd = events[i].data.fd;
    673       HandleEvent(fd, event_mask);
    674     }
    675   } else if (nfds < 0) {
    676     // Catch interrupted syscall and just ignore it and move on.
    677     if (errno != EINTR && errno != 0) {
    678       int saved_errno = errno;
    679       char buf[kErrorBufferSize];
    680       LOG(FATAL) << "Error " << saved_errno << " in epoll_wait: "
    681                  << strerror_r(saved_errno, buf, sizeof(buf));
    682     }
    683   }
    684 
    685   // Now run through the ready list.
    686   if (ready_list_.lh_first) {
    687     CallReadyListCallbacks();
    688   }
    689 }
    690 
    691 void EpollServer::CallReadyListCallbacks() {
    692   // Check pre-conditions.
    693   DCHECK(tmp_list_.lh_first == NULL);
    694   // Swap out the ready_list_ into the tmp_list_ before traversing the list to
    695   // enable SetFDReady() to just push new items into the ready_list_.
    696   std::swap(ready_list_.lh_first, tmp_list_.lh_first);
    697   if (tmp_list_.lh_first) {
    698     tmp_list_.lh_first->entry.le_prev = &tmp_list_.lh_first;
    699     EpollEvent event(0, false);
    700     while (tmp_list_.lh_first != NULL) {
    701       DCHECK_GT(ready_list_size_, 0);
    702       CBAndEventMask* cb_and_mask = tmp_list_.lh_first;
    703       RemoveFromReadyList(*cb_and_mask);
    704 
    705       event.out_ready_mask = 0;
    706       event.in_events =
    707         cb_and_mask->events_asserted | cb_and_mask->events_to_fake;
    708       // TODO(fenix): get rid of the two separate fields in cb_and_mask.
    709       cb_and_mask->events_asserted = 0;
    710       cb_and_mask->events_to_fake = 0;
    711       {
    712         // OnEvent() may call UnRegister, so we set in_use, here. Any
    713         // UnRegister call will now simply set the cb to NULL instead of
    714         // invalidating the cb_and_mask object (by deleting the object in the
    715         // map to which cb_and_mask refers)
    716         TrueFalseGuard in_use_guard(&(cb_and_mask->in_use));
    717         cb_and_mask->cb->OnEvent(cb_and_mask->fd, &event);
    718       }
    719 
    720       // Since OnEvent may have called UnregisterFD, we must check here that
    721       // the callback is still valid. If it isn't, then UnregisterFD *was*
    722       // called, and we should now get rid of the object.
    723       if (cb_and_mask->cb == NULL) {
    724         cb_map_.erase(*cb_and_mask);
    725       } else if (event.out_ready_mask != 0) {
    726         cb_and_mask->events_to_fake = event.out_ready_mask;
    727         AddToReadyList(cb_and_mask);
    728       }
    729     }
    730   }
    731   DCHECK(tmp_list_.lh_first == NULL);
    732 }
    733 
    734 const int EpollServer::kMinimumEffectiveAlarmQuantum = 1000;
    735 
    736 // Alarms may be up to kMinimumEffectiveAlarmQuantum -1 us late.
    737 inline int64 EpollServer::DoRoundingOnNow(int64 now_in_us) const {
    738   now_in_us /= kMinimumEffectiveAlarmQuantum;
    739   now_in_us *= kMinimumEffectiveAlarmQuantum;
    740   now_in_us += (2 * kMinimumEffectiveAlarmQuantum - 1);
    741   return now_in_us;
    742 }
    743 
    744 void EpollServer::CallAndReregisterAlarmEvents() {
    745   int64 now_in_us = recorded_now_in_us_;
    746   DCHECK_NE(0, recorded_now_in_us_);
    747   now_in_us = DoRoundingOnNow(now_in_us);
    748 
    749   TimeToAlarmCBMap::iterator erase_it;
    750 
    751   // execute alarms.
    752   for (TimeToAlarmCBMap::iterator i = alarm_map_.begin();
    753        i != alarm_map_.end();
    754       ) {
    755     if (i->first > now_in_us) {
    756       break;
    757     }
    758     AlarmCB* cb = i->second;
    759     // Execute the OnAlarm() only if we did not register
    760     // it in this loop itself.
    761     const bool added_in_this_round =
    762         alarms_reregistered_and_should_be_skipped_.find(cb)
    763         != alarms_reregistered_and_should_be_skipped_.end();
    764     if (added_in_this_round) {
    765       ++i;
    766       continue;
    767     }
    768     all_alarms_.erase(cb);
    769     const int64 new_timeout_time_in_us = cb->OnAlarm();
    770 
    771     erase_it = i;
    772     ++i;
    773     alarm_map_.erase(erase_it);
    774 
    775     if (new_timeout_time_in_us > 0) {
    776       // We add to hash_set only if the new timeout is <= now_in_us.
    777       // if timeout is > now_in_us then we have no fear that this alarm
    778       // can be reexecuted in this loop, and hence we do not need to
    779       // worry about a recursive loop.
    780       DVLOG(3) << "Reregistering alarm "
    781                << " " << cb
    782                << " " << new_timeout_time_in_us
    783                << " " << now_in_us;
    784       if (new_timeout_time_in_us <= now_in_us) {
    785         alarms_reregistered_and_should_be_skipped_.insert(cb);
    786       }
    787       RegisterAlarm(new_timeout_time_in_us, cb);
    788     }
    789   }
    790   alarms_reregistered_and_should_be_skipped_.clear();
    791 }
    792 
    793 EpollAlarm::EpollAlarm() : eps_(NULL), registered_(false) {
    794 }
    795 
    796 EpollAlarm::~EpollAlarm() {
    797   UnregisterIfRegistered();
    798 }
    799 
    800 int64 EpollAlarm::OnAlarm() {
    801   registered_ = false;
    802   return 0;
    803 }
    804 
    805 void EpollAlarm::OnRegistration(const EpollServer::AlarmRegToken& token,
    806                                 EpollServer* eps) {
    807   DCHECK_EQ(false, registered_);
    808 
    809   token_ = token;
    810   eps_ = eps;
    811   registered_ = true;
    812 }
    813 
    814 void EpollAlarm::OnUnregistration() {
    815   registered_ = false;
    816 }
    817 
    818 void EpollAlarm::OnShutdown(EpollServer* eps) {
    819   registered_ = false;
    820   eps_ = NULL;
    821 }
    822 
    823 // If the alarm was registered, unregister it.
    824 void EpollAlarm::UnregisterIfRegistered() {
    825   if (!registered_) {
    826     return;
    827   }
    828   eps_->UnregisterAlarm(token_);
    829 }
    830 
    831 }  // namespace net
    832 
    833