1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/tools/flip_server/epoll_server.h" 6 7 #include <stdlib.h> // for abort 8 #include <errno.h> // for errno and strerror_r 9 #include <algorithm> 10 #include <iostream> 11 #include <utility> 12 #include <vector> 13 14 #include "base/logging.h" 15 #include "base/timer.h" 16 #include "net/tools/flip_server/other_defines.h" 17 18 // Design notes: An efficient implementation of ready list has the following 19 // desirable properties: 20 // 21 // A. O(1) insertion into/removal from the list in any location. 22 // B. Once the callback is found by hash lookup using the fd, the lookup of 23 // corresponding entry in the list is O(1). 24 // C. Safe insertion into/removal from the list during list iteration. (The 25 // ready list's purpose is to enable completely event driven I/O model. 26 // Thus, all the interesting bits happen in the callback. It is critical 27 // to not place any restriction on the API during list iteration. 28 // 29 // The current implementation achieves these goals with the following design: 30 // 31 // - The ready list is constructed as a doubly linked list to enable O(1) 32 // insertion/removal (see man 3 queue). 33 // - The forward and backward links are directly embedded inside the 34 // CBAndEventMask struct. This enables O(1) lookup in the list for a given 35 // callback. (Techincally, we could've used std::list of hash_set::iterator, 36 // and keep a list::iterator in CBAndEventMask to achieve the same effect. 37 // However, iterators have two problems: no way to portably invalidate them, 38 // and no way to tell whether an iterator is singular or not. The only way to 39 // overcome these issues is to keep bools in both places, but that throws off 40 // memory alignment (up to 7 wasted bytes for each bool). The extra level of 41 // indirection will also likely be less cache friendly. Direct manipulation 42 // of link pointers makes it easier to retrieve the CBAndEventMask from the 43 // list, easier to check whether an CBAndEventMask is in the list, uses less 44 // memory (save 32 bytes/fd), and does not affect cache usage (we need to 45 // read in the struct to use the callback anyway).) 46 // - Embed the fd directly into CBAndEventMask and switch to using hash_set. 47 // This removes the need to store hash_map::iterator in the list just so that 48 // we can get both the fd and the callback. 49 // - The ready list is "one shot": each entry is removed before OnEvent is 50 // called. This removes the mutation-while-iterating problem. 51 // - Use two lists to keep track of callbacks. The ready_list_ is the one used 52 // for registration. Before iteration, the ready_list_ is swapped into the 53 // tmp_list_. Once iteration is done, tmp_list_ will be empty, and 54 // ready_list_ will have all the new ready fds. 55 56 // The size we use for buffers passed to strerror_r 57 static const int kErrorBufferSize = 256; 58 59 namespace net { 60 61 // Clears the pipe and returns. Used for waking the epoll server up. 62 class ReadPipeCallback : public EpollCallbackInterface { 63 public: 64 void OnEvent(int fd, EpollEvent* event) { 65 DCHECK(event->in_events == EPOLLIN); 66 int data; 67 int data_read = 1; 68 // Read until the pipe is empty. 69 while (data_read > 0) { 70 data_read = read(fd, &data, sizeof(data)); 71 } 72 } 73 void OnShutdown(EpollServer *eps, int fd) {} 74 void OnRegistration(EpollServer*, int, int) {} 75 void OnModification(int, int) {} // COV_NF_LINE 76 void OnUnregistration(int, bool) {} // COV_NF_LINE 77 }; 78 79 //////////////////////////////////////////////////////////////////////////////// 80 //////////////////////////////////////////////////////////////////////////////// 81 82 EpollServer::EpollServer() 83 : epoll_fd_(epoll_create(1024)), 84 timeout_in_us_(0), 85 recorded_now_in_us_(0), 86 ready_list_size_(0), 87 wake_cb_(new ReadPipeCallback), 88 read_fd_(-1), 89 write_fd_(-1), 90 in_wait_for_events_and_execute_callbacks_(false), 91 in_shutdown_(false) { 92 // ensure that the epoll_fd_ is valid. 93 CHECK_NE(epoll_fd_, -1); 94 LIST_INIT(&ready_list_); 95 LIST_INIT(&tmp_list_); 96 97 int pipe_fds[2]; 98 if (pipe(pipe_fds) < 0) { 99 // Unfortunately, it is impossible to test any such initialization in 100 // a constructor (as virtual methods do not yet work). 101 // This -could- be solved by moving initialization to an outside 102 // call... 103 int saved_errno = errno; 104 char buf[kErrorBufferSize]; 105 LOG(FATAL) << "Error " << saved_errno 106 << " in pipe(): " << strerror_r(saved_errno, buf, sizeof(buf)); 107 } 108 read_fd_ = pipe_fds[0]; 109 write_fd_ = pipe_fds[1]; 110 RegisterFD(read_fd_, wake_cb_.get(), EPOLLIN); 111 } 112 113 void EpollServer::CleanupFDToCBMap() { 114 FDToCBMap::iterator cb_iter = cb_map_.begin(); 115 while (cb_iter != cb_map_.end()) { 116 int fd = cb_iter->fd; 117 CB* cb = cb_iter->cb; 118 119 cb_iter->in_use = true; 120 if (cb) { 121 cb->OnShutdown(this, fd); 122 } 123 124 cb_map_.erase(cb_iter); 125 cb_iter = cb_map_.begin(); 126 } 127 } 128 129 void EpollServer::CleanupTimeToAlarmCBMap() { 130 TimeToAlarmCBMap::iterator erase_it; 131 132 // Call OnShutdown() on alarms. Note that the structure of the loop 133 // is similar to the structure of loop in the function HandleAlarms() 134 for (TimeToAlarmCBMap::iterator i = alarm_map_.begin(); 135 i != alarm_map_.end(); 136 ) { 137 // Note that OnShutdown() can call UnregisterAlarm() on 138 // other iterators. OnShutdown() should not call UnregisterAlarm() 139 // on self because by definition the iterator is not valid any more. 140 i->second->OnShutdown(this); 141 erase_it = i; 142 ++i; 143 alarm_map_.erase(erase_it); 144 } 145 } 146 147 EpollServer::~EpollServer() { 148 DCHECK_EQ(in_shutdown_, false); 149 in_shutdown_ = true; 150 #ifdef EPOLL_SERVER_EVENT_TRACING 151 LOG(INFO) << "\n" << event_recorder_; 152 #endif 153 VLOG(2) << "Shutting down epoll server "; 154 CleanupFDToCBMap(); 155 156 LIST_INIT(&ready_list_); 157 LIST_INIT(&tmp_list_); 158 159 CleanupTimeToAlarmCBMap(); 160 161 close(read_fd_); 162 close(write_fd_); 163 close(epoll_fd_); 164 } 165 166 // Whether a CBAandEventMask is on the ready list is determined by a non-NULL 167 // le_prev pointer (le_next being NULL indicates end of list). 168 inline void EpollServer::AddToReadyList(CBAndEventMask* cb_and_mask) { 169 if (cb_and_mask->entry.le_prev == NULL) { 170 LIST_INSERT_HEAD(&ready_list_, cb_and_mask, entry); 171 ++ready_list_size_; 172 } 173 } 174 175 inline void EpollServer::RemoveFromReadyList( 176 const CBAndEventMask& cb_and_mask) { 177 if (cb_and_mask.entry.le_prev != NULL) { 178 LIST_REMOVE(&cb_and_mask, entry); 179 // Clean up all the ready list states. Don't bother with the other fields 180 // as they are initialized when the CBAandEventMask is added to the ready 181 // list. This saves a few cycles in the inner loop. 182 cb_and_mask.entry.le_prev = NULL; 183 --ready_list_size_; 184 if (ready_list_size_ == 0) { 185 DCHECK(ready_list_.lh_first == NULL); 186 DCHECK(tmp_list_.lh_first == NULL); 187 } 188 } 189 } 190 191 void EpollServer::RegisterFD(int fd, CB* cb, int event_mask) { 192 CHECK(cb); 193 VLOG(3) << "RegisterFD fd=" << fd << " event_mask=" << event_mask; 194 FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); 195 if (cb_map_.end() != fd_i) { 196 // do we just abort, or do we just unregister the other guy? 197 // for now, lets just unregister the other guy. 198 199 // unregister any callback that may already be registered for this FD. 200 CB* other_cb = fd_i->cb; 201 if (other_cb) { 202 // Must remove from the ready list before erasing. 203 RemoveFromReadyList(*fd_i); 204 other_cb->OnUnregistration(fd, true); 205 ModFD(fd, event_mask); 206 } else { 207 // already unregistered, so just recycle the node. 208 AddFD(fd, event_mask); 209 } 210 fd_i->cb = cb; 211 fd_i->event_mask = event_mask; 212 fd_i->events_to_fake = 0; 213 } else { 214 AddFD(fd, event_mask); 215 cb_map_.insert(CBAndEventMask(cb, event_mask, fd)); 216 } 217 218 219 // set the FD to be non-blocking. 220 SetNonblocking(fd); 221 222 cb->OnRegistration(this, fd, event_mask); 223 } 224 225 void EpollServer::SetNonblocking(int fd) { 226 int flags = GetFlags(fd); 227 if (flags == -1) { 228 int saved_errno = errno; 229 char buf[kErrorBufferSize]; 230 LOG(FATAL) << "Error " << saved_errno 231 << " doing fcntl(" << fd << ", F_GETFL, 0): " 232 << strerror_r(saved_errno, buf, sizeof(buf)); 233 } 234 if (!(flags & O_NONBLOCK)) { 235 int saved_flags = flags; 236 flags = SetFlags(fd, flags | O_NONBLOCK); 237 if (flags == -1) { 238 // bad. 239 int saved_errno = errno; 240 char buf[kErrorBufferSize]; 241 LOG(FATAL) << "Error " << saved_errno 242 << " doing fcntl(" << fd << ", F_SETFL, " << saved_flags << "): " 243 << strerror_r(saved_errno, buf, sizeof(buf)); 244 } 245 } 246 } 247 248 void EpollServer::RegisterFDForWrite(int fd, CB* cb) { 249 RegisterFD(fd, cb, EPOLLOUT); 250 } 251 252 void EpollServer::RegisterFDForReadWrite(int fd, CB* cb) { 253 RegisterFD(fd, cb, EPOLLIN | EPOLLOUT); 254 } 255 256 void EpollServer::RegisterFDForRead(int fd, CB* cb) { 257 RegisterFD(fd, cb, EPOLLIN); 258 } 259 260 void EpollServer::UnregisterFD(int fd) { 261 FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); 262 if (cb_map_.end() == fd_i || fd_i->cb == NULL) { 263 // Doesn't exist in server, or has gone through UnregisterFD once and still 264 // inside the callchain of OnEvent. 265 return; 266 } 267 #ifdef EPOLL_SERVER_EVENT_TRACING 268 event_recorder_.RecordUnregistration(fd); 269 #endif 270 CB* cb = fd_i->cb; 271 // Since the links are embedded within the struct, we must remove it from the 272 // list before erasing it from the hash_set. 273 RemoveFromReadyList(*fd_i); 274 DelFD(fd); 275 cb->OnUnregistration(fd, false); 276 // fd_i->cb is NULL if that fd is unregistered inside the callchain of 277 // OnEvent. Since the EpollServer needs a valid CBAndEventMask after OnEvent 278 // returns in order to add it to the ready list, we cannot have UnregisterFD 279 // erase the entry if it is in use. Thus, a NULL fd_i->cb is used as a 280 // condition that tells the EpollServer that this entry is unused at a later 281 // point. 282 if (!fd_i->in_use) { 283 cb_map_.erase(fd_i); 284 } else { 285 // Remove all trace of the registration, and just keep the node alive long 286 // enough so the code that calls OnEvent doesn't have to worry about 287 // figuring out whether the CBAndEventMask is valid or not. 288 fd_i->cb = NULL; 289 fd_i->event_mask = 0; 290 fd_i->events_to_fake = 0; 291 } 292 } 293 294 void EpollServer::ModifyCallback(int fd, int event_mask) { 295 ModifyFD(fd, ~0, event_mask); 296 } 297 298 void EpollServer::StopRead(int fd) { 299 ModifyFD(fd, EPOLLIN, 0); 300 } 301 302 void EpollServer::StartRead(int fd) { 303 ModifyFD(fd, 0, EPOLLIN); 304 } 305 306 void EpollServer::StopWrite(int fd) { 307 ModifyFD(fd, EPOLLOUT, 0); 308 } 309 310 void EpollServer::StartWrite(int fd) { 311 ModifyFD(fd, 0, EPOLLOUT); 312 } 313 314 void EpollServer::HandleEvent(int fd, int event_mask) { 315 #ifdef EPOLL_SERVER_EVENT_TRACING 316 event_recorder_.RecordEpollEvent(fd, event_mask); 317 #endif 318 FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); 319 if (fd_i == cb_map_.end() || fd_i->cb == NULL) { 320 // Ignore the event. 321 // This could occur if epoll() returns a set of events, and 322 // while processing event A (earlier) we removed the callback 323 // for event B (and are now processing event B). 324 return; 325 } 326 fd_i->events_asserted = event_mask; 327 CBAndEventMask* cb_and_mask = const_cast<CBAndEventMask*>(&*fd_i); 328 AddToReadyList(cb_and_mask); 329 } 330 331 class TrueFalseGuard { 332 public: 333 explicit TrueFalseGuard(bool* guarded_bool) : guarded_bool_(guarded_bool) { 334 DCHECK(guarded_bool_ != NULL); 335 DCHECK(*guarded_bool_ == false); 336 *guarded_bool_ = true; 337 } 338 ~TrueFalseGuard() { 339 *guarded_bool_ = false; 340 } 341 private: 342 bool* guarded_bool_; 343 }; 344 345 void EpollServer::WaitForEventsAndExecuteCallbacks() { 346 if (in_wait_for_events_and_execute_callbacks_) { 347 LOG(DFATAL) << 348 "Attempting to call WaitForEventsAndExecuteCallbacks" 349 " when an ancestor to the current function is already" 350 " WaitForEventsAndExecuteCallbacks!"; 351 // The line below is actually tested, but in coverage mode, 352 // we never see it. 353 return; // COV_NF_LINE 354 } 355 TrueFalseGuard recursion_guard(&in_wait_for_events_and_execute_callbacks_); 356 if (alarm_map_.empty()) { 357 // no alarms, this is business as usual. 358 WaitForEventsAndCallHandleEvents(timeout_in_us_, 359 events_, 360 events_size_); 361 recorded_now_in_us_ = 0; 362 return; 363 } 364 365 // store the 'now'. If we recomputed 'now' every iteration 366 // down below, then we might never exit that loop-- any 367 // long-running alarms might install other long-running 368 // alarms, etc. By storing it here now, we ensure that 369 // a more reasonable amount of work is done here. 370 int64 now_in_us = NowInUsec(); 371 372 // Get the first timeout from the alarm_map where it is 373 // stored in absolute time. 374 int64 next_alarm_time_in_us = alarm_map_.begin()->first; 375 VLOG(4) << "next_alarm_time = " << next_alarm_time_in_us 376 << " now = " << now_in_us 377 << " timeout_in_us = " << timeout_in_us_; 378 379 int64 wait_time_in_us; 380 int64 alarm_timeout_in_us = next_alarm_time_in_us - now_in_us; 381 382 // If the next alarm is sooner than the default timeout, or if there is no 383 // timeout (timeout_in_us_ == -1), wake up when the alarm should fire. 384 // Otherwise use the default timeout. 385 if (alarm_timeout_in_us < timeout_in_us_ || timeout_in_us_ < 0) { 386 wait_time_in_us = std::max(alarm_timeout_in_us, static_cast<int64>(0)); 387 } else { 388 wait_time_in_us = timeout_in_us_; 389 } 390 391 VLOG(4) << "wait_time_in_us = " << wait_time_in_us; 392 393 // wait for events. 394 395 WaitForEventsAndCallHandleEvents(wait_time_in_us, 396 events_, 397 events_size_); 398 CallAndReregisterAlarmEvents(); 399 recorded_now_in_us_ = 0; 400 } 401 402 void EpollServer::SetFDReady(int fd, int events_to_fake) { 403 FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); 404 if (cb_map_.end() != fd_i && fd_i->cb != NULL) { 405 // This const_cast is necessary for LIST_HEAD_INSERT to work. Declaring 406 // entry mutable is insufficient because LIST_HEAD_INSERT assigns the 407 // forward pointer of the list head to the current cb_and_mask, and the 408 // compiler complains that it can't assign a const T* to a T*. 409 CBAndEventMask* cb_and_mask = const_cast<CBAndEventMask*>(&*fd_i); 410 // Note that there is no clearly correct behavior here when 411 // cb_and_mask->events_to_fake != 0 and this function is called. 412 // Of the two operations: 413 // cb_and_mask->events_to_fake = events_to_fake 414 // cb_and_mask->events_to_fake |= events_to_fake 415 // the first was picked because it discourages users from calling 416 // SetFDReady repeatedly to build up the correct event set as it is more 417 // efficient to call SetFDReady once with the correct, final mask. 418 cb_and_mask->events_to_fake = events_to_fake; 419 AddToReadyList(cb_and_mask); 420 } 421 } 422 423 void EpollServer::SetFDNotReady(int fd) { 424 FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); 425 if (cb_map_.end() != fd_i) { 426 RemoveFromReadyList(*fd_i); 427 } 428 } 429 430 bool EpollServer::IsFDReady(int fd) const { 431 FDToCBMap::const_iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); 432 return (cb_map_.end() != fd_i && 433 fd_i->cb != NULL && 434 fd_i->entry.le_prev != NULL); 435 } 436 437 void EpollServer::VerifyReadyList() const { 438 int count = 0; 439 CBAndEventMask* cur = ready_list_.lh_first; 440 for (; cur; cur = cur->entry.le_next) { 441 ++count; 442 } 443 for (cur = tmp_list_.lh_first; cur; cur = cur->entry.le_next) { 444 ++count; 445 } 446 CHECK_EQ(ready_list_size_, count) << "Ready list size does not match count"; 447 } 448 449 void EpollServer::RegisterAlarm(int64 timeout_time_in_us, AlarmCB* ac) { 450 CHECK(ac); 451 if (ContainsAlarm(ac)) { 452 LOG(FATAL) << "Alarm already exists " << ac; 453 } 454 VLOG(4) << "RegisteringAlarm at : " << timeout_time_in_us; 455 456 TimeToAlarmCBMap::iterator alarm_iter = 457 alarm_map_.insert(std::make_pair(timeout_time_in_us, ac)); 458 459 all_alarms_.insert(ac); 460 // Pass the iterator to the EpollAlarmCallbackInterface. 461 ac->OnRegistration(alarm_iter, this); 462 } 463 464 // Unregister a specific alarm callback: iterator_token must be a 465 // valid iterator. The caller must ensure the validity of the iterator. 466 void EpollServer::UnregisterAlarm(const AlarmRegToken& iterator_token) { 467 AlarmCB* cb = iterator_token->second; 468 alarm_map_.erase(iterator_token); 469 all_alarms_.erase(cb); 470 cb->OnUnregistration(); 471 } 472 473 int EpollServer::NumFDsRegistered() const { 474 DCHECK(cb_map_.size() >= 1); 475 // Omit the internal FD (read_fd_) 476 return cb_map_.size() - 1; 477 } 478 479 void EpollServer::Wake() { 480 char data = 'd'; // 'd' is for data. It's good enough for me. 481 write(write_fd_, &data, 1); 482 } 483 484 int64 EpollServer::NowInUsec() const { 485 return base::Time::Now().ToInternalValue(); 486 } 487 488 std::string EpollServer::EventMaskToString(int event_mask) { 489 std::string s; 490 if (event_mask & EPOLLIN) s += "EPOLLIN "; 491 if (event_mask & EPOLLPRI) s += "EPOLLPRI "; 492 if (event_mask & EPOLLOUT) s += "EPOLLOUT "; 493 if (event_mask & EPOLLRDNORM) s += "EPOLLRDNORM "; 494 if (event_mask & EPOLLRDBAND) s += "EPOLLRDBAND "; 495 if (event_mask & EPOLLWRNORM) s += "EPOLLWRNORM "; 496 if (event_mask & EPOLLWRBAND) s += "EPOLLWRBAND "; 497 if (event_mask & EPOLLMSG) s += "EPOLLMSG "; 498 if (event_mask & EPOLLERR) s += "EPOLLERR "; 499 if (event_mask & EPOLLHUP) s += "EPOLLHUP "; 500 if (event_mask & EPOLLONESHOT) s += "EPOLLONESHOT "; 501 if (event_mask & EPOLLET) s += "EPOLLET "; 502 return s; 503 } 504 505 void EpollServer::LogStateOnCrash() { 506 LOG(ERROR) << "----------------------Epoll Server---------------------------"; 507 LOG(ERROR) << "Epoll server " << this << " polling on fd " << epoll_fd_; 508 LOG(ERROR) << "timeout_in_us_: " << timeout_in_us_; 509 510 // Log sessions with alarms. 511 LOG(ERROR) << alarm_map_.size() << " alarms registered."; 512 for (TimeToAlarmCBMap::iterator it = alarm_map_.begin(); 513 it != alarm_map_.end(); 514 ++it) { 515 const bool skipped = 516 alarms_reregistered_and_should_be_skipped_.find(it->second) 517 != alarms_reregistered_and_should_be_skipped_.end(); 518 LOG(ERROR) << "Alarm " << it->second << " registered at time " << it->first 519 << " and should be skipped = " << skipped; 520 } 521 522 LOG(ERROR) << cb_map_.size() << " fd callbacks registered."; 523 for (FDToCBMap::iterator it = cb_map_.begin(); 524 it != cb_map_.end(); 525 ++it) { 526 LOG(ERROR) << "fd: " << it->fd << " with mask " << it->event_mask 527 << " registered with cb: " << it->cb; 528 } 529 LOG(ERROR) << "----------------------/Epoll Server--------------------------"; 530 } 531 532 533 534 //////////////////////////////////////////////////////////////////////////////// 535 //////////////////////////////////////////////////////////////////////////////// 536 537 void EpollServer::DelFD(int fd) const { 538 struct epoll_event ee; 539 memset(&ee, 0, sizeof(ee)); 540 #ifdef EPOLL_SERVER_EVENT_TRACING 541 event_recorder_.RecordFDMaskEvent(fd, 0, "DelFD"); 542 #endif 543 if (epoll_ctl(epoll_fd_, EPOLL_CTL_DEL, fd, &ee)) { 544 int saved_errno = errno; 545 char buf[kErrorBufferSize]; 546 LOG(FATAL) << "Epoll set removal error for fd " << fd << ": " 547 << strerror_r(saved_errno, buf, sizeof(buf)); 548 } 549 } 550 551 //////////////////////////////////////// 552 553 void EpollServer::AddFD(int fd, int event_mask) const { 554 struct epoll_event ee; 555 memset(&ee, 0, sizeof(ee)); 556 ee.events = event_mask | EPOLLERR | EPOLLHUP; 557 ee.data.fd = fd; 558 #ifdef EPOLL_SERVER_EVENT_TRACING 559 event_recorder_.RecordFDMaskEvent(fd, ee.events, "AddFD"); 560 #endif 561 if (epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, fd, &ee)) { 562 int saved_errno = errno; 563 char buf[kErrorBufferSize]; 564 LOG(FATAL) << "Epoll set insertion error for fd " << fd << ": " 565 << strerror_r(saved_errno, buf, sizeof(buf)); 566 } 567 } 568 569 //////////////////////////////////////// 570 571 void EpollServer::ModFD(int fd, int event_mask) const { 572 struct epoll_event ee; 573 memset(&ee, 0, sizeof(ee)); 574 ee.events = event_mask | EPOLLERR | EPOLLHUP; 575 ee.data.fd = fd; 576 #ifdef EPOLL_SERVER_EVENT_TRACING 577 event_recorder_.RecordFDMaskEvent(fd, ee.events, "ModFD"); 578 #endif 579 VLOG(3) << "modifying fd= " << fd << " " 580 << EventMaskToString(ee.events); 581 if (epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, fd, &ee)) { 582 int saved_errno = errno; 583 char buf[kErrorBufferSize]; 584 LOG(FATAL) << "Epoll set modification error for fd " << fd << ": " 585 << strerror_r(saved_errno, buf, sizeof(buf)); 586 } 587 } 588 589 //////////////////////////////////////// 590 591 void EpollServer::ModifyFD(int fd, int remove_event, int add_event) { 592 FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); 593 if (cb_map_.end() == fd_i) { 594 VLOG(2) << "Didn't find the fd " << fd << "in internal structures"; 595 return; 596 } 597 598 if (fd_i->cb != NULL) { 599 int & event_mask = fd_i->event_mask; 600 VLOG(3) << "fd= " << fd 601 << " event_mask before: " << EventMaskToString(event_mask); 602 event_mask &= ~remove_event; 603 event_mask |= add_event; 604 605 VLOG(3) << " event_mask after: " << EventMaskToString(event_mask); 606 607 ModFD(fd, event_mask); 608 609 fd_i->cb->OnModification(fd, event_mask); 610 } 611 } 612 613 void EpollServer::WaitForEventsAndCallHandleEvents(int64 timeout_in_us, 614 struct epoll_event events[], 615 int events_size) { 616 if (timeout_in_us == 0 || ready_list_.lh_first != NULL) { 617 // If ready list is not empty, then don't sleep at all. 618 timeout_in_us = 0; 619 } else if (timeout_in_us < 0) { 620 LOG(INFO) << "Negative epoll timeout: " << timeout_in_us 621 << "us; epoll will wait forever for events."; 622 // If timeout_in_us is < 0 we are supposed to Wait forever. This means we 623 // should set timeout_in_us to -1000 so we will 624 // Wait(-1000/1000) == Wait(-1) == Wait forever. 625 timeout_in_us = -1000; 626 } else { 627 // If timeout is specified, and the ready list is empty. 628 if (timeout_in_us < 1000) { 629 timeout_in_us = 1000; 630 } 631 } 632 const int timeout_in_ms = timeout_in_us / 1000; 633 int nfds = epoll_wait_impl(epoll_fd_, 634 events, 635 events_size, 636 timeout_in_ms); 637 VLOG(3) << "nfds=" << nfds; 638 639 #ifdef EPOLL_SERVER_EVENT_TRACING 640 event_recorder_.RecordEpollWaitEvent(timeout_in_ms, nfds); 641 #endif 642 643 // If you're wondering why the NowInUsec() is recorded here, the answer is 644 // simple: If we did it before the epoll_wait_impl, then the max error for 645 // the ApproximateNowInUs() call would be as large as the maximum length of 646 // epoll_wait, which can be arbitrarily long. Since this would make 647 // ApproximateNowInUs() worthless, we instead record the time -after- we've 648 // done epoll_wait, which guarantees that the maximum error is the amount of 649 // time it takes to process all the events generated by epoll_wait. 650 recorded_now_in_us_ = NowInUsec(); 651 if (nfds > 0) { 652 for (int i = 0; i < nfds; ++i) { 653 int event_mask = events[i].events; 654 int fd = events[i].data.fd; 655 HandleEvent(fd, event_mask); 656 } 657 } else if (nfds < 0) { 658 // Catch interrupted syscall and just ignore it and move on. 659 if (errno != EINTR && errno != 0) { 660 int saved_errno = errno; 661 char buf[kErrorBufferSize]; 662 LOG(FATAL) << "Error " << saved_errno << " in epoll_wait: " 663 << strerror_r(saved_errno, buf, sizeof(buf)); 664 } 665 } 666 667 // Now run through the ready list. 668 if (ready_list_.lh_first) { 669 CallReadyListCallbacks(); 670 } 671 } 672 673 void EpollServer::CallReadyListCallbacks() { 674 // Check pre-conditions. 675 DCHECK(tmp_list_.lh_first == NULL); 676 // Swap out the ready_list_ into the tmp_list_ before traversing the list to 677 // enable SetFDReady() to just push new items into the ready_list_. 678 std::swap(ready_list_.lh_first, tmp_list_.lh_first); 679 if (tmp_list_.lh_first) { 680 tmp_list_.lh_first->entry.le_prev = &tmp_list_.lh_first; 681 EpollEvent event(0, false); 682 while (tmp_list_.lh_first != NULL) { 683 DCHECK_GT(ready_list_size_, 0); 684 CBAndEventMask* cb_and_mask = tmp_list_.lh_first; 685 RemoveFromReadyList(*cb_and_mask); 686 687 event.out_ready_mask = 0; 688 event.in_events = 689 cb_and_mask->events_asserted | cb_and_mask->events_to_fake; 690 // TODO(fenix): get rid of the two separate fields in cb_and_mask. 691 cb_and_mask->events_asserted = 0; 692 cb_and_mask->events_to_fake = 0; 693 { 694 // OnEvent() may call UnRegister, so we set in_use, here. Any 695 // UnRegister call will now simply set the cb to NULL instead of 696 // invalidating the cb_and_mask object (by deleting the object in the 697 // map to which cb_and_mask refers) 698 TrueFalseGuard in_use_guard(&(cb_and_mask->in_use)); 699 cb_and_mask->cb->OnEvent(cb_and_mask->fd, &event); 700 } 701 702 // Since OnEvent may have called UnregisterFD, we must check here that 703 // the callback is still valid. If it isn't, then UnregisterFD *was* 704 // called, and we should now get rid of the object. 705 if (cb_and_mask->cb == NULL) { 706 cb_map_.erase(*cb_and_mask); 707 } else if (event.out_ready_mask != 0) { 708 cb_and_mask->events_to_fake = event.out_ready_mask; 709 AddToReadyList(cb_and_mask); 710 } 711 } 712 } 713 DCHECK(tmp_list_.lh_first == NULL); 714 } 715 716 const int EpollServer::kMinimumEffectiveAlarmQuantum = 1000; 717 718 // Alarms may be up to kMinimumEffectiveAlarmQuantum -1 us late. 719 inline int64 EpollServer::DoRoundingOnNow(int64 now_in_us) const { 720 now_in_us /= kMinimumEffectiveAlarmQuantum; 721 now_in_us *= kMinimumEffectiveAlarmQuantum; 722 now_in_us += (2 * kMinimumEffectiveAlarmQuantum - 1); 723 return now_in_us; 724 } 725 726 void EpollServer::CallAndReregisterAlarmEvents() { 727 int64 now_in_us = recorded_now_in_us_; 728 DCHECK_NE(0, recorded_now_in_us_); 729 now_in_us = DoRoundingOnNow(now_in_us); 730 731 TimeToAlarmCBMap::iterator erase_it; 732 733 // execute alarms. 734 for (TimeToAlarmCBMap::iterator i = alarm_map_.begin(); 735 i != alarm_map_.end(); 736 ) { 737 if (i->first > now_in_us) { 738 break; 739 } 740 AlarmCB* cb = i->second; 741 // Execute the OnAlarm() only if we did not register 742 // it in this loop itself. 743 const bool added_in_this_round = 744 alarms_reregistered_and_should_be_skipped_.find(cb) 745 != alarms_reregistered_and_should_be_skipped_.end(); 746 if (added_in_this_round) { 747 ++i; 748 continue; 749 } 750 all_alarms_.erase(cb); 751 const int64 new_timeout_time_in_us = cb->OnAlarm(); 752 753 erase_it = i; 754 ++i; 755 alarm_map_.erase(erase_it); 756 757 if (new_timeout_time_in_us > 0) { 758 // We add to hash_set only if the new timeout is <= now_in_us. 759 // if timeout is > now_in_us then we have no fear that this alarm 760 // can be reexecuted in this loop, and hence we do not need to 761 // worry about a recursive loop. 762 DVLOG(3) << "Reregistering alarm " 763 << " " << cb 764 << " " << new_timeout_time_in_us 765 << " " << now_in_us; 766 if (new_timeout_time_in_us <= now_in_us) { 767 alarms_reregistered_and_should_be_skipped_.insert(cb); 768 } 769 RegisterAlarm(new_timeout_time_in_us, cb); 770 } 771 } 772 alarms_reregistered_and_should_be_skipped_.clear(); 773 } 774 775 EpollAlarm::EpollAlarm() : eps_(NULL), registered_(false) { 776 } 777 778 EpollAlarm::~EpollAlarm() { 779 UnregisterIfRegistered(); 780 } 781 782 int64 EpollAlarm::OnAlarm() { 783 registered_ = false; 784 return 0; 785 } 786 787 void EpollAlarm::OnRegistration(const EpollServer::AlarmRegToken& token, 788 EpollServer* eps) { 789 DCHECK_EQ(false, registered_); 790 791 token_ = token; 792 eps_ = eps; 793 registered_ = true; 794 } 795 796 void EpollAlarm::OnUnregistration() { 797 registered_ = false; 798 } 799 800 void EpollAlarm::OnShutdown(EpollServer* eps) { 801 registered_ = false; 802 eps_ = NULL; 803 } 804 805 // If the alarm was registered, unregister it. 806 void EpollAlarm::UnregisterIfRegistered() { 807 if (!registered_) { 808 return; 809 } 810 eps_->UnregisterAlarm(token_); 811 } 812 813 } // namespace net 814 815