1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/tools/flip_server/epoll_server.h" 6 7 #include <stdlib.h> // for abort 8 #include <errno.h> // for errno and strerror_r 9 #include <algorithm> 10 #include <iostream> 11 #include <utility> 12 #include <vector> 13 14 #include "base/logging.h" 15 #include "base/timer.h" 16 17 // Design notes: An efficient implementation of ready list has the following 18 // desirable properties: 19 // 20 // A. O(1) insertion into/removal from the list in any location. 21 // B. Once the callback is found by hash lookup using the fd, the lookup of 22 // corresponding entry in the list is O(1). 23 // C. Safe insertion into/removal from the list during list iteration. (The 24 // ready list's purpose is to enable completely event driven I/O model. 25 // Thus, all the interesting bits happen in the callback. It is critical 26 // to not place any restriction on the API during list iteration. 27 // 28 // The current implementation achieves these goals with the following design: 29 // 30 // - The ready list is constructed as a doubly linked list to enable O(1) 31 // insertion/removal (see man 3 queue). 32 // - The forward and backward links are directly embedded inside the 33 // CBAndEventMask struct. This enables O(1) lookup in the list for a given 34 // callback. (Techincally, we could've used std::list of hash_set::iterator, 35 // and keep a list::iterator in CBAndEventMask to achieve the same effect. 36 // However, iterators have two problems: no way to portably invalidate them, 37 // and no way to tell whether an iterator is singular or not. The only way to 38 // overcome these issues is to keep bools in both places, but that throws off 39 // memory alignment (up to 7 wasted bytes for each bool). The extra level of 40 // indirection will also likely be less cache friendly. Direct manipulation 41 // of link pointers makes it easier to retrieve the CBAndEventMask from the 42 // list, easier to check whether an CBAndEventMask is in the list, uses less 43 // memory (save 32 bytes/fd), and does not affect cache usage (we need to 44 // read in the struct to use the callback anyway).) 45 // - Embed the fd directly into CBAndEventMask and switch to using hash_set. 46 // This removes the need to store hash_map::iterator in the list just so that 47 // we can get both the fd and the callback. 48 // - The ready list is "one shot": each entry is removed before OnEvent is 49 // called. This removes the mutation-while-iterating problem. 50 // - Use two lists to keep track of callbacks. The ready_list_ is the one used 51 // for registration. Before iteration, the ready_list_ is swapped into the 52 // tmp_list_. Once iteration is done, tmp_list_ will be empty, and 53 // ready_list_ will have all the new ready fds. 54 55 // The size we use for buffers passed to strerror_r 56 static const int kErrorBufferSize = 256; 57 58 namespace net { 59 60 // Clears the pipe and returns. Used for waking the epoll server up. 61 class ReadPipeCallback : public EpollCallbackInterface { 62 public: 63 void OnEvent(int fd, EpollEvent* event) { 64 DCHECK(event->in_events == EPOLLIN); 65 int data; 66 int data_read = 1; 67 // Read until the pipe is empty. 68 while (data_read > 0) { 69 data_read = read(fd, &data, sizeof(data)); 70 } 71 } 72 void OnShutdown(EpollServer *eps, int fd) {} 73 void OnRegistration(EpollServer*, int, int) {} 74 void OnModification(int, int) {} // COV_NF_LINE 75 void OnUnregistration(int, bool) {} // COV_NF_LINE 76 }; 77 78 //////////////////////////////////////////////////////////////////////////////// 79 //////////////////////////////////////////////////////////////////////////////// 80 81 EpollServer::EpollServer() 82 : epoll_fd_(epoll_create(1024)), 83 timeout_in_us_(0), 84 recorded_now_in_us_(0), 85 ready_list_size_(0), 86 wake_cb_(new ReadPipeCallback), 87 read_fd_(-1), 88 write_fd_(-1), 89 in_wait_for_events_and_execute_callbacks_(false), 90 in_shutdown_(false) { 91 // ensure that the epoll_fd_ is valid. 92 CHECK_NE(epoll_fd_, -1); 93 LIST_INIT(&ready_list_); 94 LIST_INIT(&tmp_list_); 95 96 int pipe_fds[2]; 97 if (pipe(pipe_fds) < 0) { 98 // Unfortunately, it is impossible to test any such initialization in 99 // a constructor (as virtual methods do not yet work). 100 // This -could- be solved by moving initialization to an outside 101 // call... 102 int saved_errno = errno; 103 char buf[kErrorBufferSize]; 104 LOG(FATAL) << "Error " << saved_errno 105 << " in pipe(): " << strerror_r(saved_errno, buf, sizeof(buf)); 106 } 107 read_fd_ = pipe_fds[0]; 108 write_fd_ = pipe_fds[1]; 109 RegisterFD(read_fd_, wake_cb_.get(), EPOLLIN); 110 } 111 112 void EpollServer::CleanupFDToCBMap() { 113 FDToCBMap::iterator cb_iter = cb_map_.begin(); 114 while (cb_iter != cb_map_.end()) { 115 int fd = cb_iter->fd; 116 CB* cb = cb_iter->cb; 117 118 cb_iter->in_use = true; 119 if (cb) { 120 cb->OnShutdown(this, fd); 121 } 122 123 cb_map_.erase(cb_iter); 124 cb_iter = cb_map_.begin(); 125 } 126 } 127 128 void EpollServer::CleanupTimeToAlarmCBMap() { 129 TimeToAlarmCBMap::iterator erase_it; 130 131 // Call OnShutdown() on alarms. Note that the structure of the loop 132 // is similar to the structure of loop in the function HandleAlarms() 133 for (TimeToAlarmCBMap::iterator i = alarm_map_.begin(); 134 i != alarm_map_.end(); 135 ) { 136 // Note that OnShutdown() can call UnregisterAlarm() on 137 // other iterators. OnShutdown() should not call UnregisterAlarm() 138 // on self because by definition the iterator is not valid any more. 139 i->second->OnShutdown(this); 140 erase_it = i; 141 ++i; 142 alarm_map_.erase(erase_it); 143 } 144 } 145 146 EpollServer::~EpollServer() { 147 DCHECK_EQ(in_shutdown_, false); 148 in_shutdown_ = true; 149 #ifdef EPOLL_SERVER_EVENT_TRACING 150 LOG(INFO) << "\n" << event_recorder_; 151 #endif 152 VLOG(2) << "Shutting down epoll server "; 153 CleanupFDToCBMap(); 154 155 LIST_INIT(&ready_list_); 156 LIST_INIT(&tmp_list_); 157 158 CleanupTimeToAlarmCBMap(); 159 160 close(read_fd_); 161 close(write_fd_); 162 close(epoll_fd_); 163 } 164 165 // Whether a CBAandEventMask is on the ready list is determined by a non-NULL 166 // le_prev pointer (le_next being NULL indicates end of list). 167 inline void EpollServer::AddToReadyList(CBAndEventMask* cb_and_mask) { 168 if (cb_and_mask->entry.le_prev == NULL) { 169 LIST_INSERT_HEAD(&ready_list_, cb_and_mask, entry); 170 ++ready_list_size_; 171 } 172 } 173 174 inline void EpollServer::RemoveFromReadyList( 175 const CBAndEventMask& cb_and_mask) { 176 if (cb_and_mask.entry.le_prev != NULL) { 177 LIST_REMOVE(&cb_and_mask, entry); 178 // Clean up all the ready list states. Don't bother with the other fields 179 // as they are initialized when the CBAandEventMask is added to the ready 180 // list. This saves a few cycles in the inner loop. 181 cb_and_mask.entry.le_prev = NULL; 182 --ready_list_size_; 183 if (ready_list_size_ == 0) { 184 DCHECK(ready_list_.lh_first == NULL); 185 DCHECK(tmp_list_.lh_first == NULL); 186 } 187 } 188 } 189 190 void EpollServer::RegisterFD(int fd, CB* cb, int event_mask) { 191 CHECK(cb); 192 VLOG(3) << "RegisterFD fd=" << fd << " event_mask=" << event_mask; 193 FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); 194 if (cb_map_.end() != fd_i) { 195 // do we just abort, or do we just unregister the other guy? 196 // for now, lets just unregister the other guy. 197 198 // unregister any callback that may already be registered for this FD. 199 CB* other_cb = fd_i->cb; 200 if (other_cb) { 201 // Must remove from the ready list before erasing. 202 RemoveFromReadyList(*fd_i); 203 other_cb->OnUnregistration(fd, true); 204 ModFD(fd, event_mask); 205 } else { 206 // already unregistered, so just recycle the node. 207 AddFD(fd, event_mask); 208 } 209 fd_i->cb = cb; 210 fd_i->event_mask = event_mask; 211 fd_i->events_to_fake = 0; 212 } else { 213 AddFD(fd, event_mask); 214 cb_map_.insert(CBAndEventMask(cb, event_mask, fd)); 215 } 216 217 218 // set the FD to be non-blocking. 219 SetNonblocking(fd); 220 221 cb->OnRegistration(this, fd, event_mask); 222 } 223 224 int EpollServer::GetFlags(int fd) { 225 return fcntl(fd, F_GETFL, 0); 226 } 227 228 void EpollServer::SetNonblocking(int fd) { 229 int flags = GetFlags(fd); 230 if (flags == -1) { 231 int saved_errno = errno; 232 char buf[kErrorBufferSize]; 233 LOG(FATAL) << "Error " << saved_errno 234 << " doing fcntl(" << fd << ", F_GETFL, 0): " 235 << strerror_r(saved_errno, buf, sizeof(buf)); 236 } 237 if (!(flags & O_NONBLOCK)) { 238 int saved_flags = flags; 239 flags = SetFlags(fd, flags | O_NONBLOCK); 240 if (flags == -1) { 241 // bad. 242 int saved_errno = errno; 243 char buf[kErrorBufferSize]; 244 LOG(FATAL) << "Error " << saved_errno 245 << " doing fcntl(" << fd << ", F_SETFL, " << saved_flags << "): " 246 << strerror_r(saved_errno, buf, sizeof(buf)); 247 } 248 } 249 } 250 251 int EpollServer::epoll_wait_impl(int epfd, 252 struct epoll_event* events, 253 int max_events, 254 int timeout_in_ms) { 255 return epoll_wait(epfd, events, max_events, timeout_in_ms); 256 } 257 258 void EpollServer::RegisterFDForWrite(int fd, CB* cb) { 259 RegisterFD(fd, cb, EPOLLOUT); 260 } 261 262 void EpollServer::RegisterFDForReadWrite(int fd, CB* cb) { 263 RegisterFD(fd, cb, EPOLLIN | EPOLLOUT); 264 } 265 266 void EpollServer::RegisterFDForRead(int fd, CB* cb) { 267 RegisterFD(fd, cb, EPOLLIN); 268 } 269 270 void EpollServer::UnregisterFD(int fd) { 271 FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); 272 if (cb_map_.end() == fd_i || fd_i->cb == NULL) { 273 // Doesn't exist in server, or has gone through UnregisterFD once and still 274 // inside the callchain of OnEvent. 275 return; 276 } 277 #ifdef EPOLL_SERVER_EVENT_TRACING 278 event_recorder_.RecordUnregistration(fd); 279 #endif 280 CB* cb = fd_i->cb; 281 // Since the links are embedded within the struct, we must remove it from the 282 // list before erasing it from the hash_set. 283 RemoveFromReadyList(*fd_i); 284 DelFD(fd); 285 cb->OnUnregistration(fd, false); 286 // fd_i->cb is NULL if that fd is unregistered inside the callchain of 287 // OnEvent. Since the EpollServer needs a valid CBAndEventMask after OnEvent 288 // returns in order to add it to the ready list, we cannot have UnregisterFD 289 // erase the entry if it is in use. Thus, a NULL fd_i->cb is used as a 290 // condition that tells the EpollServer that this entry is unused at a later 291 // point. 292 if (!fd_i->in_use) { 293 cb_map_.erase(fd_i); 294 } else { 295 // Remove all trace of the registration, and just keep the node alive long 296 // enough so the code that calls OnEvent doesn't have to worry about 297 // figuring out whether the CBAndEventMask is valid or not. 298 fd_i->cb = NULL; 299 fd_i->event_mask = 0; 300 fd_i->events_to_fake = 0; 301 } 302 } 303 304 void EpollServer::ModifyCallback(int fd, int event_mask) { 305 ModifyFD(fd, ~0, event_mask); 306 } 307 308 void EpollServer::StopRead(int fd) { 309 ModifyFD(fd, EPOLLIN, 0); 310 } 311 312 void EpollServer::StartRead(int fd) { 313 ModifyFD(fd, 0, EPOLLIN); 314 } 315 316 void EpollServer::StopWrite(int fd) { 317 ModifyFD(fd, EPOLLOUT, 0); 318 } 319 320 void EpollServer::StartWrite(int fd) { 321 ModifyFD(fd, 0, EPOLLOUT); 322 } 323 324 void EpollServer::HandleEvent(int fd, int event_mask) { 325 #ifdef EPOLL_SERVER_EVENT_TRACING 326 event_recorder_.RecordEpollEvent(fd, event_mask); 327 #endif 328 FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); 329 if (fd_i == cb_map_.end() || fd_i->cb == NULL) { 330 // Ignore the event. 331 // This could occur if epoll() returns a set of events, and 332 // while processing event A (earlier) we removed the callback 333 // for event B (and are now processing event B). 334 return; 335 } 336 fd_i->events_asserted = event_mask; 337 CBAndEventMask* cb_and_mask = const_cast<CBAndEventMask*>(&*fd_i); 338 AddToReadyList(cb_and_mask); 339 } 340 341 class TrueFalseGuard { 342 public: 343 explicit TrueFalseGuard(bool* guarded_bool) : guarded_bool_(guarded_bool) { 344 DCHECK(guarded_bool_ != NULL); 345 DCHECK(*guarded_bool_ == false); 346 *guarded_bool_ = true; 347 } 348 ~TrueFalseGuard() { 349 *guarded_bool_ = false; 350 } 351 private: 352 bool* guarded_bool_; 353 }; 354 355 void EpollServer::WaitForEventsAndExecuteCallbacks() { 356 if (in_wait_for_events_and_execute_callbacks_) { 357 LOG(DFATAL) << 358 "Attempting to call WaitForEventsAndExecuteCallbacks" 359 " when an ancestor to the current function is already" 360 " WaitForEventsAndExecuteCallbacks!"; 361 // The line below is actually tested, but in coverage mode, 362 // we never see it. 363 return; // COV_NF_LINE 364 } 365 TrueFalseGuard recursion_guard(&in_wait_for_events_and_execute_callbacks_); 366 if (alarm_map_.empty()) { 367 // no alarms, this is business as usual. 368 WaitForEventsAndCallHandleEvents(timeout_in_us_, 369 events_, 370 events_size_); 371 recorded_now_in_us_ = 0; 372 return; 373 } 374 375 // store the 'now'. If we recomputed 'now' every iteration 376 // down below, then we might never exit that loop-- any 377 // long-running alarms might install other long-running 378 // alarms, etc. By storing it here now, we ensure that 379 // a more reasonable amount of work is done here. 380 int64 now_in_us = NowInUsec(); 381 382 // Get the first timeout from the alarm_map where it is 383 // stored in absolute time. 384 int64 next_alarm_time_in_us = alarm_map_.begin()->first; 385 VLOG(4) << "next_alarm_time = " << next_alarm_time_in_us 386 << " now = " << now_in_us 387 << " timeout_in_us = " << timeout_in_us_; 388 389 int64 wait_time_in_us; 390 int64 alarm_timeout_in_us = next_alarm_time_in_us - now_in_us; 391 392 // If the next alarm is sooner than the default timeout, or if there is no 393 // timeout (timeout_in_us_ == -1), wake up when the alarm should fire. 394 // Otherwise use the default timeout. 395 if (alarm_timeout_in_us < timeout_in_us_ || timeout_in_us_ < 0) { 396 wait_time_in_us = std::max(alarm_timeout_in_us, static_cast<int64>(0)); 397 } else { 398 wait_time_in_us = timeout_in_us_; 399 } 400 401 VLOG(4) << "wait_time_in_us = " << wait_time_in_us; 402 403 // wait for events. 404 405 WaitForEventsAndCallHandleEvents(wait_time_in_us, 406 events_, 407 events_size_); 408 CallAndReregisterAlarmEvents(); 409 recorded_now_in_us_ = 0; 410 } 411 412 void EpollServer::SetFDReady(int fd, int events_to_fake) { 413 FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); 414 if (cb_map_.end() != fd_i && fd_i->cb != NULL) { 415 // This const_cast is necessary for LIST_HEAD_INSERT to work. Declaring 416 // entry mutable is insufficient because LIST_HEAD_INSERT assigns the 417 // forward pointer of the list head to the current cb_and_mask, and the 418 // compiler complains that it can't assign a const T* to a T*. 419 CBAndEventMask* cb_and_mask = const_cast<CBAndEventMask*>(&*fd_i); 420 // Note that there is no clearly correct behavior here when 421 // cb_and_mask->events_to_fake != 0 and this function is called. 422 // Of the two operations: 423 // cb_and_mask->events_to_fake = events_to_fake 424 // cb_and_mask->events_to_fake |= events_to_fake 425 // the first was picked because it discourages users from calling 426 // SetFDReady repeatedly to build up the correct event set as it is more 427 // efficient to call SetFDReady once with the correct, final mask. 428 cb_and_mask->events_to_fake = events_to_fake; 429 AddToReadyList(cb_and_mask); 430 } 431 } 432 433 void EpollServer::SetFDNotReady(int fd) { 434 FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); 435 if (cb_map_.end() != fd_i) { 436 RemoveFromReadyList(*fd_i); 437 } 438 } 439 440 bool EpollServer::IsFDReady(int fd) const { 441 FDToCBMap::const_iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); 442 return (cb_map_.end() != fd_i && 443 fd_i->cb != NULL && 444 fd_i->entry.le_prev != NULL); 445 } 446 447 void EpollServer::VerifyReadyList() const { 448 int count = 0; 449 CBAndEventMask* cur = ready_list_.lh_first; 450 for (; cur; cur = cur->entry.le_next) { 451 ++count; 452 } 453 for (cur = tmp_list_.lh_first; cur; cur = cur->entry.le_next) { 454 ++count; 455 } 456 CHECK_EQ(ready_list_size_, count) << "Ready list size does not match count"; 457 } 458 459 void EpollServer::RegisterAlarm(int64 timeout_time_in_us, AlarmCB* ac) { 460 CHECK(ac); 461 if (ContainsAlarm(ac)) { 462 LOG(FATAL) << "Alarm already exists " << ac; 463 } 464 VLOG(4) << "RegisteringAlarm at : " << timeout_time_in_us; 465 466 TimeToAlarmCBMap::iterator alarm_iter = 467 alarm_map_.insert(std::make_pair(timeout_time_in_us, ac)); 468 469 all_alarms_.insert(ac); 470 // Pass the iterator to the EpollAlarmCallbackInterface. 471 ac->OnRegistration(alarm_iter, this); 472 } 473 474 // Unregister a specific alarm callback: iterator_token must be a 475 // valid iterator. The caller must ensure the validity of the iterator. 476 void EpollServer::UnregisterAlarm(const AlarmRegToken& iterator_token) { 477 AlarmCB* cb = iterator_token->second; 478 alarm_map_.erase(iterator_token); 479 all_alarms_.erase(cb); 480 cb->OnUnregistration(); 481 } 482 483 int EpollServer::NumFDsRegistered() const { 484 DCHECK(cb_map_.size() >= 1); 485 // Omit the internal FD (read_fd_) 486 return cb_map_.size() - 1; 487 } 488 489 void EpollServer::Wake() { 490 char data = 'd'; // 'd' is for data. It's good enough for me. 491 int rv = write(write_fd_, &data, 1); 492 DCHECK(rv == 1); 493 } 494 495 int64 EpollServer::NowInUsec() const { 496 return base::Time::Now().ToInternalValue(); 497 } 498 499 int64 EpollServer::ApproximateNowInUsec() const { 500 if (recorded_now_in_us_ != 0) { 501 return recorded_now_in_us_; 502 } 503 return this->NowInUsec(); 504 } 505 506 std::string EpollServer::EventMaskToString(int event_mask) { 507 std::string s; 508 if (event_mask & EPOLLIN) s += "EPOLLIN "; 509 if (event_mask & EPOLLPRI) s += "EPOLLPRI "; 510 if (event_mask & EPOLLOUT) s += "EPOLLOUT "; 511 if (event_mask & EPOLLRDNORM) s += "EPOLLRDNORM "; 512 if (event_mask & EPOLLRDBAND) s += "EPOLLRDBAND "; 513 if (event_mask & EPOLLWRNORM) s += "EPOLLWRNORM "; 514 if (event_mask & EPOLLWRBAND) s += "EPOLLWRBAND "; 515 if (event_mask & EPOLLMSG) s += "EPOLLMSG "; 516 if (event_mask & EPOLLERR) s += "EPOLLERR "; 517 if (event_mask & EPOLLHUP) s += "EPOLLHUP "; 518 if (event_mask & EPOLLONESHOT) s += "EPOLLONESHOT "; 519 if (event_mask & EPOLLET) s += "EPOLLET "; 520 return s; 521 } 522 523 void EpollServer::LogStateOnCrash() { 524 LOG(ERROR) << "----------------------Epoll Server---------------------------"; 525 LOG(ERROR) << "Epoll server " << this << " polling on fd " << epoll_fd_; 526 LOG(ERROR) << "timeout_in_us_: " << timeout_in_us_; 527 528 // Log sessions with alarms. 529 LOG(ERROR) << alarm_map_.size() << " alarms registered."; 530 for (TimeToAlarmCBMap::iterator it = alarm_map_.begin(); 531 it != alarm_map_.end(); 532 ++it) { 533 const bool skipped = 534 alarms_reregistered_and_should_be_skipped_.find(it->second) 535 != alarms_reregistered_and_should_be_skipped_.end(); 536 LOG(ERROR) << "Alarm " << it->second << " registered at time " << it->first 537 << " and should be skipped = " << skipped; 538 } 539 540 LOG(ERROR) << cb_map_.size() << " fd callbacks registered."; 541 for (FDToCBMap::iterator it = cb_map_.begin(); 542 it != cb_map_.end(); 543 ++it) { 544 LOG(ERROR) << "fd: " << it->fd << " with mask " << it->event_mask 545 << " registered with cb: " << it->cb; 546 } 547 LOG(ERROR) << "----------------------/Epoll Server--------------------------"; 548 } 549 550 551 552 //////////////////////////////////////////////////////////////////////////////// 553 //////////////////////////////////////////////////////////////////////////////// 554 555 void EpollServer::DelFD(int fd) const { 556 struct epoll_event ee; 557 memset(&ee, 0, sizeof(ee)); 558 #ifdef EPOLL_SERVER_EVENT_TRACING 559 event_recorder_.RecordFDMaskEvent(fd, 0, "DelFD"); 560 #endif 561 if (epoll_ctl(epoll_fd_, EPOLL_CTL_DEL, fd, &ee)) { 562 int saved_errno = errno; 563 char buf[kErrorBufferSize]; 564 LOG(FATAL) << "Epoll set removal error for fd " << fd << ": " 565 << strerror_r(saved_errno, buf, sizeof(buf)); 566 } 567 } 568 569 //////////////////////////////////////// 570 571 void EpollServer::AddFD(int fd, int event_mask) const { 572 struct epoll_event ee; 573 memset(&ee, 0, sizeof(ee)); 574 ee.events = event_mask | EPOLLERR | EPOLLHUP; 575 ee.data.fd = fd; 576 #ifdef EPOLL_SERVER_EVENT_TRACING 577 event_recorder_.RecordFDMaskEvent(fd, ee.events, "AddFD"); 578 #endif 579 if (epoll_ctl(epoll_fd_, EPOLL_CTL_ADD, fd, &ee)) { 580 int saved_errno = errno; 581 char buf[kErrorBufferSize]; 582 LOG(FATAL) << "Epoll set insertion error for fd " << fd << ": " 583 << strerror_r(saved_errno, buf, sizeof(buf)); 584 } 585 } 586 587 //////////////////////////////////////// 588 589 void EpollServer::ModFD(int fd, int event_mask) const { 590 struct epoll_event ee; 591 memset(&ee, 0, sizeof(ee)); 592 ee.events = event_mask | EPOLLERR | EPOLLHUP; 593 ee.data.fd = fd; 594 #ifdef EPOLL_SERVER_EVENT_TRACING 595 event_recorder_.RecordFDMaskEvent(fd, ee.events, "ModFD"); 596 #endif 597 VLOG(3) << "modifying fd= " << fd << " " 598 << EventMaskToString(ee.events); 599 if (epoll_ctl(epoll_fd_, EPOLL_CTL_MOD, fd, &ee)) { 600 int saved_errno = errno; 601 char buf[kErrorBufferSize]; 602 LOG(FATAL) << "Epoll set modification error for fd " << fd << ": " 603 << strerror_r(saved_errno, buf, sizeof(buf)); 604 } 605 } 606 607 //////////////////////////////////////// 608 609 void EpollServer::ModifyFD(int fd, int remove_event, int add_event) { 610 FDToCBMap::iterator fd_i = cb_map_.find(CBAndEventMask(NULL, 0, fd)); 611 if (cb_map_.end() == fd_i) { 612 VLOG(2) << "Didn't find the fd " << fd << "in internal structures"; 613 return; 614 } 615 616 if (fd_i->cb != NULL) { 617 int & event_mask = fd_i->event_mask; 618 VLOG(3) << "fd= " << fd 619 << " event_mask before: " << EventMaskToString(event_mask); 620 event_mask &= ~remove_event; 621 event_mask |= add_event; 622 623 VLOG(3) << " event_mask after: " << EventMaskToString(event_mask); 624 625 ModFD(fd, event_mask); 626 627 fd_i->cb->OnModification(fd, event_mask); 628 } 629 } 630 631 void EpollServer::WaitForEventsAndCallHandleEvents(int64 timeout_in_us, 632 struct epoll_event events[], 633 int events_size) { 634 if (timeout_in_us == 0 || ready_list_.lh_first != NULL) { 635 // If ready list is not empty, then don't sleep at all. 636 timeout_in_us = 0; 637 } else if (timeout_in_us < 0) { 638 LOG(INFO) << "Negative epoll timeout: " << timeout_in_us 639 << "us; epoll will wait forever for events."; 640 // If timeout_in_us is < 0 we are supposed to Wait forever. This means we 641 // should set timeout_in_us to -1000 so we will 642 // Wait(-1000/1000) == Wait(-1) == Wait forever. 643 timeout_in_us = -1000; 644 } else { 645 // If timeout is specified, and the ready list is empty. 646 if (timeout_in_us < 1000) { 647 timeout_in_us = 1000; 648 } 649 } 650 const int timeout_in_ms = timeout_in_us / 1000; 651 int nfds = epoll_wait_impl(epoll_fd_, 652 events, 653 events_size, 654 timeout_in_ms); 655 VLOG(3) << "nfds=" << nfds; 656 657 #ifdef EPOLL_SERVER_EVENT_TRACING 658 event_recorder_.RecordEpollWaitEvent(timeout_in_ms, nfds); 659 #endif 660 661 // If you're wondering why the NowInUsec() is recorded here, the answer is 662 // simple: If we did it before the epoll_wait_impl, then the max error for 663 // the ApproximateNowInUs() call would be as large as the maximum length of 664 // epoll_wait, which can be arbitrarily long. Since this would make 665 // ApproximateNowInUs() worthless, we instead record the time -after- we've 666 // done epoll_wait, which guarantees that the maximum error is the amount of 667 // time it takes to process all the events generated by epoll_wait. 668 recorded_now_in_us_ = NowInUsec(); 669 if (nfds > 0) { 670 for (int i = 0; i < nfds; ++i) { 671 int event_mask = events[i].events; 672 int fd = events[i].data.fd; 673 HandleEvent(fd, event_mask); 674 } 675 } else if (nfds < 0) { 676 // Catch interrupted syscall and just ignore it and move on. 677 if (errno != EINTR && errno != 0) { 678 int saved_errno = errno; 679 char buf[kErrorBufferSize]; 680 LOG(FATAL) << "Error " << saved_errno << " in epoll_wait: " 681 << strerror_r(saved_errno, buf, sizeof(buf)); 682 } 683 } 684 685 // Now run through the ready list. 686 if (ready_list_.lh_first) { 687 CallReadyListCallbacks(); 688 } 689 } 690 691 void EpollServer::CallReadyListCallbacks() { 692 // Check pre-conditions. 693 DCHECK(tmp_list_.lh_first == NULL); 694 // Swap out the ready_list_ into the tmp_list_ before traversing the list to 695 // enable SetFDReady() to just push new items into the ready_list_. 696 std::swap(ready_list_.lh_first, tmp_list_.lh_first); 697 if (tmp_list_.lh_first) { 698 tmp_list_.lh_first->entry.le_prev = &tmp_list_.lh_first; 699 EpollEvent event(0, false); 700 while (tmp_list_.lh_first != NULL) { 701 DCHECK_GT(ready_list_size_, 0); 702 CBAndEventMask* cb_and_mask = tmp_list_.lh_first; 703 RemoveFromReadyList(*cb_and_mask); 704 705 event.out_ready_mask = 0; 706 event.in_events = 707 cb_and_mask->events_asserted | cb_and_mask->events_to_fake; 708 // TODO(fenix): get rid of the two separate fields in cb_and_mask. 709 cb_and_mask->events_asserted = 0; 710 cb_and_mask->events_to_fake = 0; 711 { 712 // OnEvent() may call UnRegister, so we set in_use, here. Any 713 // UnRegister call will now simply set the cb to NULL instead of 714 // invalidating the cb_and_mask object (by deleting the object in the 715 // map to which cb_and_mask refers) 716 TrueFalseGuard in_use_guard(&(cb_and_mask->in_use)); 717 cb_and_mask->cb->OnEvent(cb_and_mask->fd, &event); 718 } 719 720 // Since OnEvent may have called UnregisterFD, we must check here that 721 // the callback is still valid. If it isn't, then UnregisterFD *was* 722 // called, and we should now get rid of the object. 723 if (cb_and_mask->cb == NULL) { 724 cb_map_.erase(*cb_and_mask); 725 } else if (event.out_ready_mask != 0) { 726 cb_and_mask->events_to_fake = event.out_ready_mask; 727 AddToReadyList(cb_and_mask); 728 } 729 } 730 } 731 DCHECK(tmp_list_.lh_first == NULL); 732 } 733 734 const int EpollServer::kMinimumEffectiveAlarmQuantum = 1000; 735 736 // Alarms may be up to kMinimumEffectiveAlarmQuantum -1 us late. 737 inline int64 EpollServer::DoRoundingOnNow(int64 now_in_us) const { 738 now_in_us /= kMinimumEffectiveAlarmQuantum; 739 now_in_us *= kMinimumEffectiveAlarmQuantum; 740 now_in_us += (2 * kMinimumEffectiveAlarmQuantum - 1); 741 return now_in_us; 742 } 743 744 void EpollServer::CallAndReregisterAlarmEvents() { 745 int64 now_in_us = recorded_now_in_us_; 746 DCHECK_NE(0, recorded_now_in_us_); 747 now_in_us = DoRoundingOnNow(now_in_us); 748 749 TimeToAlarmCBMap::iterator erase_it; 750 751 // execute alarms. 752 for (TimeToAlarmCBMap::iterator i = alarm_map_.begin(); 753 i != alarm_map_.end(); 754 ) { 755 if (i->first > now_in_us) { 756 break; 757 } 758 AlarmCB* cb = i->second; 759 // Execute the OnAlarm() only if we did not register 760 // it in this loop itself. 761 const bool added_in_this_round = 762 alarms_reregistered_and_should_be_skipped_.find(cb) 763 != alarms_reregistered_and_should_be_skipped_.end(); 764 if (added_in_this_round) { 765 ++i; 766 continue; 767 } 768 all_alarms_.erase(cb); 769 const int64 new_timeout_time_in_us = cb->OnAlarm(); 770 771 erase_it = i; 772 ++i; 773 alarm_map_.erase(erase_it); 774 775 if (new_timeout_time_in_us > 0) { 776 // We add to hash_set only if the new timeout is <= now_in_us. 777 // if timeout is > now_in_us then we have no fear that this alarm 778 // can be reexecuted in this loop, and hence we do not need to 779 // worry about a recursive loop. 780 DVLOG(3) << "Reregistering alarm " 781 << " " << cb 782 << " " << new_timeout_time_in_us 783 << " " << now_in_us; 784 if (new_timeout_time_in_us <= now_in_us) { 785 alarms_reregistered_and_should_be_skipped_.insert(cb); 786 } 787 RegisterAlarm(new_timeout_time_in_us, cb); 788 } 789 } 790 alarms_reregistered_and_should_be_skipped_.clear(); 791 } 792 793 EpollAlarm::EpollAlarm() : eps_(NULL), registered_(false) { 794 } 795 796 EpollAlarm::~EpollAlarm() { 797 UnregisterIfRegistered(); 798 } 799 800 int64 EpollAlarm::OnAlarm() { 801 registered_ = false; 802 return 0; 803 } 804 805 void EpollAlarm::OnRegistration(const EpollServer::AlarmRegToken& token, 806 EpollServer* eps) { 807 DCHECK_EQ(false, registered_); 808 809 token_ = token; 810 eps_ = eps; 811 registered_ = true; 812 } 813 814 void EpollAlarm::OnUnregistration() { 815 registered_ = false; 816 } 817 818 void EpollAlarm::OnShutdown(EpollServer* eps) { 819 registered_ = false; 820 eps_ = NULL; 821 } 822 823 // If the alarm was registered, unregister it. 824 void EpollAlarm::UnregisterIfRegistered() { 825 if (!registered_) { 826 return; 827 } 828 eps_->UnregisterAlarm(token_); 829 } 830 831 } // namespace net 832 833