1 // Copyright (c) 2009 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #ifndef NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_ 6 #define NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_ 7 8 #include <fcntl.h> 9 #include <sys/queue.h> 10 #include <ext/hash_map> // it is annoying that gcc does this. oh well. 11 #include <ext/hash_set> 12 #include <map> 13 #include <string> 14 #include <utility> 15 #include <set> 16 #include <vector> 17 18 // #define EPOLL_SERVER_EVENT_TRACING 1 19 // 20 // Defining EPOLL_SERVER_EVENT_TRACING 21 // causes code to exist which didn't before. 22 // This code tracks each event generated by the epollserver, 23 // as well as providing a per-fd-registered summary of 24 // events. Note that enabling this code vastly slows 25 // down operations, and uses substantially more 26 // memory. For these reasons, it should only be enabled when doing 27 // developer debugging at his/her workstation. 28 // 29 // A structure called 'EventRecorder' will exist when 30 // the macro is defined. See the EventRecorder class interface 31 // within the EpollServer class for more details. 32 #ifdef EPOLL_SERVER_EVENT_TRACING 33 #include <iostream> 34 #include "base/logging.h" 35 #endif 36 37 #include "base/basictypes.h" 38 #include "base/scoped_ptr.h" 39 #include <sys/epoll.h> 40 41 namespace net { 42 43 class EpollServer; 44 class EpollAlarmCallbackInterface; 45 class ReadPipeCallback; 46 class WatchDogToken; 47 48 struct EpollEvent { 49 EpollEvent(int events, bool is_epoll_wait) 50 : in_events(events), 51 out_ready_mask(0) { 52 } 53 54 int in_events; // incoming events 55 int out_ready_mask; // the new event mask for ready list (0 means don't 56 // get on the ready list). This field is always 57 // initialized to 0 when the event is passed to 58 // OnEvent. 59 }; 60 61 // Callbacks which go into EpollServers are expected to derive from this class. 62 class EpollCallbackInterface { 63 public: 64 // Summary: 65 // Called when the callback is registered into a EpollServer. 66 // Args: 67 // eps - the poll server into which this callback was registered 68 // fd - the file descriptor which was registered 69 // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc) 70 // which was registered (and will initially be used 71 // in the epoll() calls) 72 virtual void OnRegistration(EpollServer* eps, int fd, int event_mask) = 0; 73 74 // Summary: 75 // Called when the event_mask is modified (for a file-descriptor) 76 // Args: 77 // fd - the file descriptor which was registered 78 // event_mask - the event mask (composed of EPOLLIN, EPOLLOUT, etc) 79 // which was is now curren (and will be used 80 // in subsequent epoll() calls) 81 virtual void OnModification(int fd, int event_mask) = 0; 82 83 // Summary: 84 // Called whenever an event occurs on the file-descriptor. 85 // This is where the bulk of processing is expected to occur. 86 // Args: 87 // fd - the file descriptor which was registered 88 // event - a struct that contains the event mask (composed of EPOLLIN, 89 // EPOLLOUT, etc), a flag that indicates whether this is a true 90 // epoll_wait event vs one from the ready list, and an output 91 // parameter for OnEvent to inform the EpollServer whether to put 92 // this fd on the ready list. 93 virtual void OnEvent(int fd, EpollEvent* event) = 0; 94 95 // Summary: 96 // Called when the file-descriptor is unregistered from the poll-server. 97 // Args: 98 // fd - the file descriptor which was registered, and of this call, is now 99 // unregistered. 100 // replaced - If true, this callback is being replaced by another, otherwise 101 // it is simply being removed. 102 virtual void OnUnregistration(int fd, bool replaced) = 0; 103 104 // Summary: 105 // Called when the epoll server is shutting down. This is different from 106 // OnUnregistration because the subclass may want to clean up memory. 107 // This is called in leiu of OnUnregistration. 108 // Args: 109 // fd - the file descriptor which was registered. 110 virtual void OnShutdown(EpollServer* eps, int fd) = 0; 111 112 virtual ~EpollCallbackInterface() {} 113 114 protected: 115 EpollCallbackInterface() {} 116 }; 117 118 //////////////////////////////////////////////////////////////////////////////// 119 //////////////////////////////////////////////////////////////////////////////// 120 121 class EpollServer { 122 public: 123 typedef EpollAlarmCallbackInterface AlarmCB; 124 typedef EpollCallbackInterface CB; 125 126 typedef std::multimap<int64, AlarmCB*> TimeToAlarmCBMap; 127 typedef TimeToAlarmCBMap::iterator AlarmRegToken; 128 129 // Summary: 130 // Constructor: 131 // By default, we don't wait any amount of time for events, and 132 // we suggest to the epoll-system that we're going to use on-the-order 133 // of 1024 FDs. 134 EpollServer(); 135 136 //////////////////////////////////////// 137 138 // Destructor 139 virtual ~EpollServer(); 140 141 //////////////////////////////////////// 142 143 // Summary 144 // Register a callback to be called whenever an event contained 145 // in the set of events included in event_mask occurs on the 146 // file-descriptor 'fd' 147 // 148 // Note that only one callback is allowed to be registered for 149 // any specific file-decriptor. 150 // 151 // If a callback is registered for a file-descriptor which has already 152 // been registered, then the previous callback is unregistered with 153 // the 'replaced' flag set to true. I.e. the previous callback's 154 // OnUnregistration() function is called like so: 155 // OnUnregistration(fd, true); 156 // 157 // The epoll server does NOT take on ownership of the callback: the callback 158 // creator is responsible for managing that memory. 159 // 160 // Args: 161 // fd - a valid file-descriptor 162 // cb - an instance of a subclass of EpollCallbackInterface 163 // event_mask - a combination of (EPOLLOUT, EPOLLIN.. etc) indicating 164 // the events for which the callback would like to be 165 // called. 166 virtual void RegisterFD(int fd, CB* cb, int event_mask); 167 168 //////////////////////////////////////// 169 170 // Summary: 171 // A shortcut for RegisterFD which sets things up such that the 172 // callback is called when 'fd' is available for writing. 173 // Args: 174 // fd - a valid file-descriptor 175 // cb - an instance of a subclass of EpollCallbackInterface 176 virtual void RegisterFDForWrite(int fd, CB* cb); 177 178 //////////////////////////////////////// 179 180 // Summary: 181 // A shortcut for RegisterFD which sets things up such that the 182 // callback is called when 'fd' is available for reading or writing. 183 // Args: 184 // fd - a valid file-descriptor 185 // cb - an instance of a subclass of EpollCallbackInterface 186 virtual void RegisterFDForReadWrite(int fd, CB* cb); 187 188 //////////////////////////////////////// 189 190 // Summary: 191 // A shortcut for RegisterFD which sets things up such that the 192 // callback is called when 'fd' is available for reading. 193 // Args: 194 // fd - a valid file-descriptor 195 // cb - an instance of a subclass of EpollCallbackInterface 196 virtual void RegisterFDForRead(int fd, CB* cb); 197 198 //////////////////////////////////////// 199 200 // Summary: 201 // Removes the FD and the associated callback from the pollserver. 202 // If the callback is registered with other FDs, they will continue 203 // to be processed using the callback without modification. 204 // If the file-descriptor specified is not registered in the 205 // epoll_server, then nothing happens as a result of this call. 206 // Args: 207 // fd - the file-descriptor which should no-longer be monitored. 208 virtual void UnregisterFD(int fd); 209 210 //////////////////////////////////////// 211 212 // Summary: 213 // Modifies the event mask for the file-descriptor, replacing 214 // the old event_mask with the new one specified here. 215 // If the file-descriptor specified is not registered in the 216 // epoll_server, then nothing happens as a result of this call. 217 // Args: 218 // fd - the fd whose event mask should be modified. 219 // event_mask - the new event mask. 220 virtual void ModifyCallback(int fd, int event_mask); 221 222 //////////////////////////////////////// 223 224 // Summary: 225 // Modifies the event mask for the file-descriptor such that we 226 // no longer request events when 'fd' is readable. 227 // If the file-descriptor specified is not registered in the 228 // epoll_server, then nothing happens as a result of this call. 229 // Args: 230 // fd - the fd whose event mask should be modified. 231 virtual void StopRead(int fd); 232 233 //////////////////////////////////////// 234 235 // Summary: 236 // Modifies the event mask for the file-descriptor such that we 237 // request events when 'fd' is readable. 238 // If the file-descriptor specified is not registered in the 239 // epoll_server, then nothing happens as a result of this call. 240 // Args: 241 // fd - the fd whose event mask should be modified. 242 virtual void StartRead(int fd); 243 244 //////////////////////////////////////// 245 246 // Summary: 247 // Modifies the event mask for the file-descriptor such that we 248 // no longer request events when 'fd' is writable. 249 // If the file-descriptor specified is not registered in the 250 // epoll_server, then nothing happens as a result of this call. 251 // Args: 252 // fd - the fd whose event mask should be modified. 253 virtual void StopWrite(int fd); 254 255 //////////////////////////////////////// 256 257 // Summary: 258 // Modifies the event mask for the file-descriptor such that we 259 // request events when 'fd' is writable. 260 // If the file-descriptor specified is not registered in the 261 // epoll_server, then nothing happens as a result of this call. 262 // Args: 263 // fd - the fd whose event mask should be modified. 264 virtual void StartWrite(int fd); 265 266 //////////////////////////////////////// 267 268 // Summary: 269 // Looks up the callback associated with the file-desriptor 'fd'. 270 // If a callback is associated with this file-descriptor, then 271 // it's OnEvent() method is called with the file-descriptor 'fd', 272 // and event_mask 'event_mask' 273 // 274 // If no callback is registered for this file-descriptor, nothing 275 // will happen as a result of this call. 276 // 277 // This function is used internally by the EpollServer, but is 278 // available publically so that events might be 'faked'. Calling 279 // this function with an fd and event_mask is equivalent (as far 280 // as the callback is concerned) to having a real event generated 281 // by epoll (except, of course, that read(), etc won't necessarily 282 // be able to read anything) 283 // Args: 284 // fd - the file-descriptor on which an event has occured. 285 // event_mask - a bitmask representing the events which have occured 286 // on/for this fd. This bitmask is composed of 287 // POLLIN, POLLOUT, etc. 288 // 289 void HandleEvent(int fd, int event_mask); 290 291 // Summary: 292 // Call this when you want the pollserver to 293 // wait for events and execute the callbacks associated with 294 // the file-descriptors on which those events have occured. 295 // Depending on the value of timeout_in_us_, this may or may 296 // not return immediately. Please reference the set_timeout() 297 // function for the specific behaviour. 298 virtual void WaitForEventsAndExecuteCallbacks(); 299 300 // Summary: 301 // When an fd is registered to use edge trigger notification, the ready 302 // list can be used to simulate level trigger semantics. Edge trigger 303 // registration doesn't send an initial event, and only rising edge (going 304 // from blocked to unblocked) events are sent. A callback can put itself on 305 // the ready list by calling SetFDReady() after calling RegisterFD(). The 306 // OnEvent method of all callbacks associated with the fds on the ready 307 // list will be called immediately after processing the events returned by 308 // epoll_wait(). The fd is removed from the ready list before the 309 // callback's OnEvent() method is invoked. To stay on the ready list, the 310 // OnEvent() (or some function in that call chain) must call SetFDReady 311 // again. When a fd is unregistered using UnregisterFD(), the fd is 312 // automatically removed from the ready list. 313 // 314 // When the callback for a edge triggered fd hits the falling edge (about 315 // to block, either because of it got an EAGAIN, or had a short read/write 316 // operation), it should remove itself from the ready list using 317 // SetFDNotReady() (since OnEvent cannot distinguish between invocation 318 // from the ready list vs from a normal epoll event). All four ready list 319 // methods are safe to be called within the context of the callbacks. 320 // 321 // Since the ready list invokes EpollCallbackInterface::OnEvent, only fds 322 // that are registered with the EpollServer will be put on the ready list. 323 // SetFDReady() and SetFDNotReady() will do nothing if the EpollServer 324 // doesn't know about the fd passed in. 325 // 326 // Since the ready list cannot reliably determine proper set of events 327 // which should be sent to the callback, SetFDReady() requests the caller 328 // to provide the ready list with the event mask, which will be used later 329 // when OnEvent() is invoked by the ready list. Hence, the event_mask 330 // passedto SetFDReady() does not affect the actual epoll registration of 331 // the fd with the kernel. If a fd is already put on the ready list, and 332 // SetFDReady() is called again for that fd with a different event_mask, 333 // the event_mask will be updated. 334 virtual void SetFDReady(int fd, int events_to_fake); 335 336 virtual void SetFDNotReady(int fd); 337 338 // Summary: 339 // IsFDReady(), ReadyListSize(), and VerifyReadyList are intended as 340 // debugging tools and for writing unit tests. 341 // ISFDReady() returns whether a fd is in the ready list. 342 // ReadyListSize() returns the number of fds on the ready list. 343 // VerifyReadyList() checks the consistency of internal data structure. It 344 // will CHECK if it finds an error. 345 virtual bool IsFDReady(int fd) const; 346 347 size_t ReadyListSize() const { return ready_list_size_; } 348 349 void VerifyReadyList() const; 350 351 //////////////////////////////////////// 352 353 // Summary: 354 // Registers an alarm 'ac' to go off at time 'timeout_time_in_us'. 355 // If the callback returns a positive number from its OnAlarm() function, 356 // then the callback will be re-registered at that time, else the alarm 357 // owner is responsible for freeing up memory. 358 // 359 // Important: A give AlarmCB* can not be registered again if it is already 360 // registered. If a user wants to register a callback again it should first 361 // unregister the previous callback before calling RegisterAlarm again. 362 // Args: 363 // timeout_time_in_us - the absolute time at which the alarm should go off 364 // ac - the alarm which will be called. 365 virtual void RegisterAlarm(int64 timeout_time_in_us, AlarmCB* ac); 366 367 // Summary: 368 // Registers an alarm 'ac' to go off at time: (ApproximateNowInUs() + 369 // delta_in_us). While this is somewhat less accurate (see the description 370 // for ApproximateNowInUs() to see how 'approximate'), the error is never 371 // worse than the amount of time it takes to process all events in one 372 // WaitForEvents. As with 'RegisterAlarm()', if the callback returns a 373 // positive number from its OnAlarm() function, then the callback will be 374 // re-registered at that time, else the alarm owner is responsible for 375 // freeing up memory. 376 // Note that this function is purely a convienence. The 377 // same thing may be accomplished by using RegisterAlarm with 378 // ApproximateNowInUs() directly. 379 // 380 // Important: A give AlarmCB* can not be registered again if it is already 381 // registered. If a user wants to register a callback again it should first 382 // unregister the previous callback before calling RegisterAlarm again. 383 // Args: 384 // delta_in_us - the delta in microseconds from the ApproximateTimeInUs() at 385 // which point the alarm should go off. 386 // ac - the alarm which will be called. 387 void RegisterAlarmApproximateDelta(int64 delta_in_us, AlarmCB* ac) { 388 RegisterAlarm(ApproximateNowInUsec() + delta_in_us, ac); 389 } 390 391 //////////////////////////////////////// 392 393 // Summary: 394 // Unregister the alarm referred to by iterator_token; Callers should 395 // be warned that a token may have become already invalid when OnAlarm() 396 // is called, was unregistered, or OnShutdown was called on that alarm. 397 // Args: 398 // iterator_token - iterator to the alarm callback to unregister. 399 virtual void UnregisterAlarm( 400 const EpollServer::AlarmRegToken& iterator_token); 401 402 //////////////////////////////////////// 403 404 // Summary: 405 // returns the number of file-descriptors registered in this EpollServer. 406 // Returns: 407 // number of FDs registered (discounting the internal pipe used for Wake) 408 virtual int NumFDsRegistered() const; 409 410 // Summary: 411 // Force the epoll server to wake up (by writing to an internal pipe). 412 virtual void Wake(); 413 414 // Summary: 415 // Wrapper around WallTimer's NowInUsec. We do this so that we can test 416 // EpollServer without using the system clock (and can avoid the flakiness 417 // that would ensue) 418 // Returns: 419 // the current time as number of microseconds since the Unix epoch. 420 virtual int64 NowInUsec() const; 421 422 // Summary: 423 // Since calling NowInUsec() many thousands of times per 424 // WaitForEventsAndExecuteCallbacks function call is, to say the least, 425 // inefficient, we allow users to use an approximate time instead. The 426 // time returned from this function is as accurate as NowInUsec() when 427 // WaitForEventsAndExecuteCallbacks is not an ancestor of the caller's 428 // callstack. 429 // However, when WaitForEventsAndExecuteCallbacks -is- an ancestor, then 430 // this function returns the time at which the 431 // WaitForEventsAndExecuteCallbacks function started to process events or 432 // alarms. 433 // 434 // Essentially, this function makes available a fast and mostly accurate 435 // mechanism for getting the time for any function handling an event or 436 // alarm. When functions which are not handling callbacks or alarms call 437 // this function, they get the slow and "absolutely" accurate time. 438 // 439 // Users should be encouraged to use this function. 440 // Returns: 441 // the "approximate" current time as number of microseconds since the Unix 442 // epoch. 443 virtual int64 ApproximateNowInUsec() const { 444 if (recorded_now_in_us_ != 0) { 445 return recorded_now_in_us_; 446 } 447 return this->NowInUsec(); 448 } 449 450 static std::string EventMaskToString(int event_mask); 451 452 // Summary: 453 // Logs the state of the epoll server with LOG(ERROR). 454 void LogStateOnCrash(); 455 456 // Summary: 457 // Set the timeout to the value specified. 458 // If the timeout is set to a negative number, 459 // WaitForEventsAndExecuteCallbacks() will only return when an event has 460 // occured 461 // If the timeout is set to zero, 462 // WaitForEventsAndExecuteCallbacks() will return immediately 463 // If the timeout is set to a positive number, 464 // WaitForEventsAndExecuteCallbacks() will return when an event has 465 // occured, or when timeout_in_us microseconds has elapsed, whichever 466 // is first. 467 // Args: 468 // timeout_in_us - value specified depending on behaviour desired. 469 // See above. 470 void set_timeout_in_us(int64 timeout_in_us) { 471 timeout_in_us_ = timeout_in_us; 472 } 473 474 //////////////////////////////////////// 475 476 // Summary: 477 // Accessor for the current value of timeout_in_us. 478 int timeout_in_us() const { return timeout_in_us_; } 479 480 // Summary: 481 // Returns true when the EpollServer() is being destroyed. 482 bool in_shutdown() const { return in_shutdown_; } 483 484 bool ContainsAlarm(EpollAlarmCallbackInterface* alarm) const { 485 return all_alarms_.find(alarm) != all_alarms_.end(); 486 } 487 488 // Summary: 489 // A function for implementing the ready list. It invokes OnEvent for each 490 // of the fd in the ready list, and takes care of adding them back to the 491 // ready list if the callback requests it (by checking that out_ready_mask 492 // is non-zero). 493 void CallReadyListCallbacks(); 494 495 // Granularity at which time moves when considering what alarms are on. 496 // See function: DoRoundingOnNow() on exact usage. 497 static const int kMinimumEffectiveAlarmQuantum; 498 protected: 499 500 // These have to be in the .h file so that we can override them in tests. 501 virtual inline int GetFlags(int fd) { return fcntl(fd, F_GETFL, 0); } 502 inline int SetFlags(int fd, int flags) { 503 return fcntl(fd, F_SETFL, flags | O_NONBLOCK); 504 } 505 506 virtual void SetNonblocking (int fd); 507 508 // This exists here so that we can override this function in unittests 509 // in order to make effective mock EpollServer objects. 510 virtual int epoll_wait_impl(int epfd, 511 struct epoll_event* events, 512 int max_events, 513 int timeout_in_ms) { 514 return epoll_wait(epfd, events, max_events, timeout_in_ms); 515 } 516 517 // this struct is used internally, and is never used by anything external 518 // to this class. Some of its members are declared mutable to get around the 519 // restriction imposed by hash_set. Since hash_set knows nothing about the 520 // objects it stores, it has to assume that every bit of the object is used 521 // in the hash function and equal_to comparison. Thus hash_set::iterator is a 522 // const iterator. In this case, the only thing that must stay constant is 523 // fd. Everything else are just along for the ride and changing them doesn't 524 // compromise the hash_set integrity. 525 struct CBAndEventMask { 526 CBAndEventMask() 527 : cb(NULL), 528 fd(-1), 529 event_mask(0), 530 events_asserted(0), 531 events_to_fake(0), 532 in_use(false) { 533 entry.le_next = NULL; 534 entry.le_prev = NULL; 535 } 536 537 CBAndEventMask(EpollCallbackInterface* cb, 538 int event_mask, 539 int fd) 540 : cb(cb), fd(fd), event_mask(event_mask), events_asserted(0), 541 events_to_fake(0), in_use(false) { 542 entry.le_next = NULL; 543 entry.le_prev = NULL; 544 } 545 546 // Required operator for hash_set. Normally operator== should be a free 547 // standing function. However, since CBAndEventMask is a protected type and 548 // it will never be a base class, it makes no difference. 549 bool operator==(const CBAndEventMask& cb_and_mask) const { 550 return fd == cb_and_mask.fd; 551 } 552 // A callback. If the fd is unregistered inside the callchain of OnEvent, 553 // the cb will be set to NULL. 554 mutable EpollCallbackInterface* cb; 555 556 mutable LIST_ENTRY(CBAndEventMask) entry; 557 // file descriptor registered with the epoll server. 558 int fd; 559 // the current event_mask registered for this callback. 560 mutable int event_mask; 561 // the event_mask that was returned by epoll 562 mutable int events_asserted; 563 // the event_mask for the ready list to use to call OnEvent. 564 mutable int events_to_fake; 565 // toggle around calls to OnEvent to tell UnregisterFD to not erase the 566 // iterator because HandleEvent is using it. 567 mutable bool in_use; 568 }; 569 570 // Custom hash function to be used by hash_set. 571 struct CBAndEventMaskHash { 572 size_t operator()(const CBAndEventMask& cb_and_eventmask) const { 573 return static_cast<size_t>(cb_and_eventmask.fd); 574 } 575 }; 576 577 typedef __gnu_cxx::hash_set<CBAndEventMask, CBAndEventMaskHash> FDToCBMap; 578 579 // the following four functions are OS-specific, and are likely 580 // to be changed in a subclass if the poll/select method is changed 581 // from epoll. 582 583 // Summary: 584 // Deletes a file-descriptor from the set of FDs that should be 585 // monitored with epoll. 586 // Note that this only deals with modifying data relating -directly- 587 // with the epoll call-- it does not modify any data within the 588 // epoll_server. 589 // Args: 590 // fd - the file descriptor to-be-removed from the monitoring set 591 virtual void DelFD(int fd) const; 592 593 //////////////////////////////////////// 594 595 // Summary: 596 // Adds a file-descriptor to the set of FDs that should be 597 // monitored with epoll. 598 // Note that this only deals with modifying data relating -directly- 599 // with the epoll call. 600 // Args: 601 // fd - the file descriptor to-be-added to the monitoring set 602 // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc 603 // OR'd together) which will be associated with this 604 // FD initially. 605 virtual void AddFD(int fd, int event_mask) const; 606 607 //////////////////////////////////////// 608 609 // Summary: 610 // Modifies a file-descriptor in the set of FDs that should be 611 // monitored with epoll. 612 // Note that this only deals with modifying data relating -directly- 613 // with the epoll call. 614 // Args: 615 // fd - the file descriptor to-be-added to the monitoring set 616 // event_mask - the event mask (consisting of EPOLLIN, EPOLLOUT, etc 617 // OR'd together) which will be associated with this 618 // FD after this call. 619 virtual void ModFD(int fd, int event_mask) const; 620 621 //////////////////////////////////////// 622 623 // Summary: 624 // Modified the event mask associated with an FD in the set of 625 // data needed by epoll. 626 // Events are removed before they are added, thus, if ~0 is put 627 // in 'remove_event', whatever is put in 'add_event' will be 628 // the new event mask. 629 // If the file-descriptor specified is not registered in the 630 // epoll_server, then nothing happens as a result of this call. 631 // Args: 632 // fd - the file descriptor whose event mask is to be modified 633 // remove_event - the events which are to be removed from the current 634 // event_mask 635 // add_event - the events which are to be added to the current event_mask 636 // 637 // 638 virtual void ModifyFD(int fd, int remove_event, int add_event); 639 640 //////////////////////////////////////// 641 642 // Summary: 643 // Waits for events, and calls HandleEvents() for each 644 // fd, event pair discovered to possibly have an event. 645 // Note that a callback (B) may get a spurious event if 646 // another callback (A) has closed a file-descriptor N, and 647 // the callback (B) has a newly opened file-descriptor, which 648 // also happens to be N. 649 virtual void WaitForEventsAndCallHandleEvents(int64 timeout_in_us, 650 struct epoll_event events[], 651 int events_size); 652 653 654 655 // Summary: 656 // An internal function for implementing the ready list. It adds a fd's 657 // CBAndEventMask to the ready list. If the fd is already on the ready 658 // list, it is a no-op. 659 void AddToReadyList(CBAndEventMask* cb_and_mask); 660 661 // Summary: 662 // An internal function for implementing the ready list. It remove a fd's 663 // CBAndEventMask from the ready list. If the fd is not on the ready list, 664 // it is a no-op. 665 void RemoveFromReadyList(const CBAndEventMask& cb_and_mask); 666 667 // Summary: 668 // Calls any pending alarms that should go off and reregisters them if they 669 // were recurring. 670 virtual void CallAndReregisterAlarmEvents(); 671 672 // The file-descriptor created for epolling 673 int epoll_fd_; 674 675 // The mapping of file-descriptor to CBAndEventMasks 676 FDToCBMap cb_map_; 677 678 // Custom hash function to be used by hash_set. 679 struct AlarmCBHash { 680 size_t operator()(AlarmCB*const& p) const { 681 return reinterpret_cast<size_t>(p); 682 } 683 }; 684 685 686 // TOOD(sushantj): Having this hash_set is avoidable. We currently have it 687 // only so that we can enforce stringent checks that a caller can not register 688 // the same alarm twice. One option is to have an implementation in which 689 // this hash_set is used only in the debug mode. 690 typedef __gnu_cxx::hash_set<AlarmCB*, AlarmCBHash> AlarmCBMap; 691 AlarmCBMap all_alarms_; 692 693 TimeToAlarmCBMap alarm_map_; 694 695 // The amount of time in microseconds that we'll wait before returning 696 // from the WaitForEventsAndExecuteCallbacks() function. 697 // If this is positive, wait that many microseconds. 698 // If this is negative, wait forever, or for the first event that occurs 699 // If this is zero, never wait for an event. 700 int64 timeout_in_us_; 701 702 // This is nonzero only after the invocation of epoll_wait_impl within 703 // WaitForEventsAndCallHandleEvents and before the function 704 // WaitForEventsAndExecuteCallbacks returns. At all other times, this is 705 // zero. This enables us to have relatively accurate time returned from the 706 // ApproximateNowInUs() function. See that function for more details. 707 int64 recorded_now_in_us_; 708 709 // This is used to implement CallAndReregisterAlarmEvents. This stores 710 // all alarms that were reregistered because OnAlarm() returned a 711 // value > 0 and the time at which they should be executed is less that 712 // the current time. By storing such alarms in this map we ensure 713 // that while calling CallAndReregisterAlarmEvents we do not call 714 // OnAlarm on any alarm in this set. This ensures that we do not 715 // go in an infinite loop. 716 AlarmCBMap alarms_reregistered_and_should_be_skipped_; 717 718 LIST_HEAD(ReadyList, CBAndEventMask) ready_list_; 719 LIST_HEAD(TmpList, CBAndEventMask) tmp_list_; 720 int ready_list_size_; 721 // TODO(alyssar): make this into something that scales up. 722 static const int events_size_ = 256; 723 struct epoll_event events_[256]; 724 725 // These controls the granularity for alarms 726 // See function CallAndReregisterAlarmEvents() 727 // TODO(sushantj): Add test for this. 728 int64 DoRoundingOnNow(int64 now_in_us) const; 729 730 #ifdef EPOLL_SERVER_EVENT_TRACING 731 struct EventRecorder { 732 public: 733 EventRecorder() : num_records_(0), record_threshold_(10000) {} 734 735 ~EventRecorder() { 736 Clear(); 737 } 738 739 // When a number of events equals the record threshold, 740 // the collected data summary for all FDs will be written 741 // to LOG(INFO). Note that this does not include the 742 // individual events (if you'reinterested in those, you'll 743 // have to get at them programmatically). 744 // After any such flushing to LOG(INFO) all events will 745 // be cleared. 746 // Note that the definition of an 'event' is a bit 'hazy', 747 // as it includes the 'Unregistration' event, and perhaps 748 // others. 749 void set_record_threshold(int64 new_threshold) { 750 record_threshold_ = new_threshold; 751 } 752 753 void Clear() { 754 for (int i = 0; i < debug_events_.size(); ++i) { 755 delete debug_events_[i]; 756 } 757 debug_events_.clear(); 758 unregistered_fds_.clear(); 759 event_counts_.clear(); 760 } 761 762 void MaybeRecordAndClear() { 763 ++num_records_; 764 if ((num_records_ > record_threshold_) && 765 (record_threshold_ > 0)) { 766 LOG(INFO) << "\n" << *this; 767 num_records_ = 0; 768 Clear(); 769 } 770 } 771 772 void RecordFDMaskEvent(int fd, int mask, const char* function) { 773 FDMaskOutput* fdmo = new FDMaskOutput(fd, mask, function); 774 debug_events_.push_back(fdmo); 775 MaybeRecordAndClear(); 776 } 777 778 void RecordEpollWaitEvent(int timeout_in_ms, 779 int num_events_generated) { 780 EpollWaitOutput* ewo = new EpollWaitOutput(timeout_in_ms, 781 num_events_generated); 782 debug_events_.push_back(ewo); 783 MaybeRecordAndClear(); 784 } 785 786 void RecordEpollEvent(int fd, int event_mask) { 787 Events& events_for_fd = event_counts_[fd]; 788 events_for_fd.AssignFromMask(event_mask); 789 MaybeRecordAndClear(); 790 } 791 792 friend ostream& operator<<(ostream& os, const EventRecorder& er) { 793 for (int i = 0; i < er.unregistered_fds_.size(); ++i) { 794 os << "fd: " << er.unregistered_fds_[i] << "\n"; 795 os << er.unregistered_fds_[i]; 796 } 797 for (EventCountsMap::const_iterator i = er.event_counts_.begin(); 798 i != er.event_counts_.end(); 799 ++i) { 800 os << "fd: " << i->first << "\n"; 801 os << i->second; 802 } 803 for (int i = 0; i < er.debug_events_.size(); ++i) { 804 os << *(er.debug_events_[i]) << "\n"; 805 } 806 return os; 807 } 808 809 void RecordUnregistration(int fd) { 810 EventCountsMap::iterator i = event_counts_.find(fd); 811 if (i != event_counts_.end()) { 812 unregistered_fds_.push_back(i->second); 813 event_counts_.erase(i); 814 } 815 MaybeRecordAndClear(); 816 } 817 818 protected: 819 class DebugOutput { 820 public: 821 friend ostream& operator<<(ostream& os, const DebugOutput& debug_output) { 822 debug_output.OutputToStream(os); 823 return os; 824 } 825 virtual void OutputToStream(ostream* os) const = 0; 826 virtual ~DebugOutput() {} 827 }; 828 829 class FDMaskOutput : public DebugOutput { 830 public: 831 FDMaskOutput(int fd, int mask, const char* function) : 832 fd_(fd), mask_(mask), function_(function) {} 833 virtual void OutputToStream(ostream* os) const { 834 (*os) << "func: " << function_ 835 << "\tfd: " << fd_; 836 if (mask_ != 0) { 837 (*os) << "\tmask: " << EventMaskToString(mask_); 838 } 839 } 840 int fd_; 841 int mask_; 842 const char* function_; 843 }; 844 845 class EpollWaitOutput : public DebugOutput { 846 public: 847 EpollWaitOutput(int timeout_in_ms, 848 int num_events_generated) : 849 timeout_in_ms_(timeout_in_ms), 850 num_events_generated_(num_events_generated) {} 851 virtual void OutputToStream(ostream* os) const { 852 (*os) << "timeout_in_ms: " << timeout_in_ms_ 853 << "\tnum_events_generated: " << num_events_generated_; 854 } 855 protected: 856 int timeout_in_ms_; 857 int num_events_generated_; 858 }; 859 860 struct Events { 861 Events() : 862 epoll_in(0), 863 epoll_pri(0), 864 epoll_out(0), 865 epoll_rdnorm(0), 866 epoll_rdband(0), 867 epoll_wrnorm(0), 868 epoll_wrband(0), 869 epoll_msg(0), 870 epoll_err(0), 871 epoll_hup(0), 872 epoll_oneshot(0), 873 epoll_et(0) {} 874 875 void AssignFromMask(int event_mask) { 876 if (event_mask & EPOLLIN) ++epoll_in; 877 if (event_mask & EPOLLPRI) ++epoll_pri; 878 if (event_mask & EPOLLOUT) ++epoll_out; 879 if (event_mask & EPOLLRDNORM) ++epoll_rdnorm; 880 if (event_mask & EPOLLRDBAND) ++epoll_rdband; 881 if (event_mask & EPOLLWRNORM) ++epoll_wrnorm; 882 if (event_mask & EPOLLWRBAND) ++epoll_wrband; 883 if (event_mask & EPOLLMSG) ++epoll_msg; 884 if (event_mask & EPOLLERR) ++epoll_err; 885 if (event_mask & EPOLLHUP) ++epoll_hup; 886 if (event_mask & EPOLLONESHOT) ++epoll_oneshot; 887 if (event_mask & EPOLLET) ++epoll_et; 888 }; 889 890 friend ostream& operator<<(ostream& os, const Events& ev) { 891 if (ev.epoll_in) { 892 os << "\t EPOLLIN: " << ev.epoll_in << "\n"; 893 } 894 if (ev.epoll_pri) { 895 os << "\t EPOLLPRI: " << ev.epoll_pri << "\n"; 896 } 897 if (ev.epoll_out) { 898 os << "\t EPOLLOUT: " << ev.epoll_out << "\n"; 899 } 900 if (ev.epoll_rdnorm) { 901 os << "\t EPOLLRDNORM: " << ev.epoll_rdnorm << "\n"; 902 } 903 if (ev.epoll_rdband) { 904 os << "\t EPOLLRDBAND: " << ev.epoll_rdband << "\n"; 905 } 906 if (ev.epoll_wrnorm) { 907 os << "\t EPOLLWRNORM: " << ev.epoll_wrnorm << "\n"; 908 } 909 if (ev.epoll_wrband) { 910 os << "\t EPOLLWRBAND: " << ev.epoll_wrband << "\n"; 911 } 912 if (ev.epoll_msg) { 913 os << "\t EPOLLMSG: " << ev.epoll_msg << "\n"; 914 } 915 if (ev.epoll_err) { 916 os << "\t EPOLLERR: " << ev.epoll_err << "\n"; 917 } 918 if (ev.epoll_hup) { 919 os << "\t EPOLLHUP: " << ev.epoll_hup << "\n"; 920 } 921 if (ev.epoll_oneshot) { 922 os << "\t EPOLLONESHOT: " << ev.epoll_oneshot << "\n"; 923 } 924 if (ev.epoll_et) { 925 os << "\t EPOLLET: " << ev.epoll_et << "\n"; 926 } 927 return os; 928 } 929 930 unsigned int epoll_in; 931 unsigned int epoll_pri; 932 unsigned int epoll_out; 933 unsigned int epoll_rdnorm; 934 unsigned int epoll_rdband; 935 unsigned int epoll_wrnorm; 936 unsigned int epoll_wrband; 937 unsigned int epoll_msg; 938 unsigned int epoll_err; 939 unsigned int epoll_hup; 940 unsigned int epoll_oneshot; 941 unsigned int epoll_et; 942 }; 943 944 std::vector<DebugOutput*> debug_events_; 945 std::vector<Events> unregistered_fds_; 946 typedef __gnu_cxx::hash_map<int, Events> EventCountsMap; 947 EventCountsMap event_counts_; 948 int64 num_records_; 949 int64 record_threshold_; 950 }; 951 952 void ClearEventRecords() { 953 event_recorder_.Clear(); 954 } 955 void WriteEventRecords(ostream* os) const { 956 (*os) << event_recorder_; 957 } 958 959 mutable EventRecorder event_recorder_; 960 961 #endif 962 963 private: 964 // Helper functions used in the destructor. 965 void CleanupFDToCBMap(); 966 void CleanupTimeToAlarmCBMap(); 967 968 // The callback registered to the fds below. As the purpose of their 969 // registration is to wake the epoll server it just clears the pipe and 970 // returns. 971 scoped_ptr<ReadPipeCallback> wake_cb_; 972 973 // A pipe owned by the epoll server. The server will be registered to listen 974 // on read_fd_ and can be woken by Wake() which writes to write_fd_. 975 int read_fd_; 976 int write_fd_; 977 978 // This boolean is checked to see if it is false at the top of the 979 // WaitForEventsAndExecuteCallbacks function. If not, then it either returns 980 // without doing work, and logs to ERROR, or aborts the program (in 981 // DEBUG mode). If so, then it sets the bool to true, does work, and 982 // sets it back to false when done. This catches unwanted recursion. 983 bool in_wait_for_events_and_execute_callbacks_; 984 985 // Returns true when the EpollServer() is being destroyed. 986 bool in_shutdown_; 987 988 DISALLOW_COPY_AND_ASSIGN(EpollServer); 989 }; 990 991 class EpollAlarmCallbackInterface { 992 public: 993 // Summary: 994 // Called when an alarm times out. Invalidates an AlarmRegToken. 995 // WARNING: If a token was saved to refer to an alarm callback, OnAlarm must 996 // delete it, as the reference is no longer valid. 997 // Returns: 998 // the unix time (in microseconds) at which this alarm should be signaled 999 // again, or 0 if the alarm should be removed. 1000 virtual int64 OnAlarm() = 0; 1001 1002 // Summary: 1003 // Called when the an alarm is registered. Invalidates an AlarmRegToken. 1004 // Args: 1005 // token: the iterator to the the alarm registered in the alarm map. 1006 // WARNING: this token becomes invalid when the alarm fires, is 1007 // unregistered, or OnShutdown is called on that alarm. 1008 // eps: the epoll server the alarm is registered with. 1009 virtual void OnRegistration(const EpollServer::AlarmRegToken& token, 1010 EpollServer* eps) = 0; 1011 1012 // Summary: 1013 // Called when the an alarm is unregistered. 1014 // WARNING: It is not valid to unregister a callback and then use the token 1015 // that was saved to refer to the callback. 1016 virtual void OnUnregistration() = 0; 1017 1018 // Summary: 1019 // Called when the epoll server is shutting down. 1020 // Invalidates the AlarmRegToken that was given when this alarm was 1021 // registered. 1022 virtual void OnShutdown(EpollServer* eps) = 0; 1023 1024 virtual ~EpollAlarmCallbackInterface() {} 1025 1026 protected: 1027 EpollAlarmCallbackInterface() {} 1028 }; 1029 1030 // A simple alarm which unregisters itself on destruction. 1031 // 1032 // PLEASE NOTE: 1033 // Any classes overriding these functions must either call the implementation 1034 // of the parent class, or is must otherwise make sure that the 'registered_' 1035 // boolean and the token, 'token_', are updated appropriately. 1036 class EpollAlarm : public EpollAlarmCallbackInterface { 1037 public: 1038 EpollAlarm(); 1039 1040 virtual ~EpollAlarm(); 1041 1042 // Marks the alarm as unregistered and returns 0. The return value may be 1043 // safely ignored by subclasses. 1044 virtual int64 OnAlarm(); 1045 1046 // Marks the alarm as registered, and stores the token. 1047 virtual void OnRegistration(const EpollServer::AlarmRegToken& token, 1048 EpollServer* eps); 1049 1050 // Marks the alarm as unregistered. 1051 virtual void OnUnregistration(); 1052 1053 // Marks the alarm as unregistered. 1054 virtual void OnShutdown(EpollServer* eps); 1055 1056 // If the alarm was registered, unregister it. 1057 void UnregisterIfRegistered(); 1058 1059 bool registered() const { return registered_; } 1060 1061 const EpollServer* eps() const { return eps_; } 1062 1063 private: 1064 EpollServer::AlarmRegToken token_; 1065 EpollServer* eps_; 1066 bool registered_; 1067 }; 1068 1069 } // namespace net 1070 1071 #endif // NET_TOOLS_FLIP_SERVER_EPOLL_SERVER_H_ 1072 1073