1 // Copyright 2013 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/socket/tcp_socket.h" 6 #include "net/socket/tcp_socket_win.h" 7 8 #include <mstcpip.h> 9 10 #include "base/callback_helpers.h" 11 #include "base/logging.h" 12 #include "base/metrics/stats_counters.h" 13 #include "base/win/windows_version.h" 14 #include "net/base/address_list.h" 15 #include "net/base/connection_type_histograms.h" 16 #include "net/base/io_buffer.h" 17 #include "net/base/ip_endpoint.h" 18 #include "net/base/net_errors.h" 19 #include "net/base/net_util.h" 20 #include "net/base/network_change_notifier.h" 21 #include "net/base/winsock_init.h" 22 #include "net/base/winsock_util.h" 23 #include "net/socket/socket_descriptor.h" 24 #include "net/socket/socket_net_log_params.h" 25 26 namespace net { 27 28 namespace { 29 30 const int kTCPKeepAliveSeconds = 45; 31 32 int SetSocketReceiveBufferSize(SOCKET socket, int32 size) { 33 int rv = setsockopt(socket, SOL_SOCKET, SO_RCVBUF, 34 reinterpret_cast<const char*>(&size), sizeof(size)); 35 int net_error = (rv == 0) ? OK : MapSystemError(WSAGetLastError()); 36 DCHECK(!rv) << "Could not set socket receive buffer size: " << net_error; 37 return net_error; 38 } 39 40 int SetSocketSendBufferSize(SOCKET socket, int32 size) { 41 int rv = setsockopt(socket, SOL_SOCKET, SO_SNDBUF, 42 reinterpret_cast<const char*>(&size), sizeof(size)); 43 int net_error = (rv == 0) ? OK : MapSystemError(WSAGetLastError()); 44 DCHECK(!rv) << "Could not set socket send buffer size: " << net_error; 45 return net_error; 46 } 47 48 // Disable Nagle. 49 // The Nagle implementation on windows is governed by RFC 896. The idea 50 // behind Nagle is to reduce small packets on the network. When Nagle is 51 // enabled, if a partial packet has been sent, the TCP stack will disallow 52 // further *partial* packets until an ACK has been received from the other 53 // side. Good applications should always strive to send as much data as 54 // possible and avoid partial-packet sends. However, in most real world 55 // applications, there are edge cases where this does not happen, and two 56 // partial packets may be sent back to back. For a browser, it is NEVER 57 // a benefit to delay for an RTT before the second packet is sent. 58 // 59 // As a practical example in Chromium today, consider the case of a small 60 // POST. I have verified this: 61 // Client writes 649 bytes of header (partial packet #1) 62 // Client writes 50 bytes of POST data (partial packet #2) 63 // In the above example, with Nagle, a RTT delay is inserted between these 64 // two sends due to nagle. RTTs can easily be 100ms or more. The best 65 // fix is to make sure that for POSTing data, we write as much data as 66 // possible and minimize partial packets. We will fix that. But disabling 67 // Nagle also ensure we don't run into this delay in other edge cases. 68 // See also: 69 // http://technet.microsoft.com/en-us/library/bb726981.aspx 70 bool DisableNagle(SOCKET socket, bool disable) { 71 BOOL val = disable ? TRUE : FALSE; 72 int rv = setsockopt(socket, IPPROTO_TCP, TCP_NODELAY, 73 reinterpret_cast<const char*>(&val), 74 sizeof(val)); 75 DCHECK(!rv) << "Could not disable nagle"; 76 return rv == 0; 77 } 78 79 // Enable TCP Keep-Alive to prevent NAT routers from timing out TCP 80 // connections. See http://crbug.com/27400 for details. 81 bool SetTCPKeepAlive(SOCKET socket, BOOL enable, int delay_secs) { 82 int delay = delay_secs * 1000; 83 struct tcp_keepalive keepalive_vals = { 84 enable ? 1 : 0, // TCP keep-alive on. 85 delay, // Delay seconds before sending first TCP keep-alive packet. 86 delay, // Delay seconds between sending TCP keep-alive packets. 87 }; 88 DWORD bytes_returned = 0xABAB; 89 int rv = WSAIoctl(socket, SIO_KEEPALIVE_VALS, &keepalive_vals, 90 sizeof(keepalive_vals), NULL, 0, 91 &bytes_returned, NULL, NULL); 92 DCHECK(!rv) << "Could not enable TCP Keep-Alive for socket: " << socket 93 << " [error: " << WSAGetLastError() << "]."; 94 95 // Disregard any failure in disabling nagle or enabling TCP Keep-Alive. 96 return rv == 0; 97 } 98 99 int MapConnectError(int os_error) { 100 switch (os_error) { 101 // connect fails with WSAEACCES when Windows Firewall blocks the 102 // connection. 103 case WSAEACCES: 104 return ERR_NETWORK_ACCESS_DENIED; 105 case WSAETIMEDOUT: 106 return ERR_CONNECTION_TIMED_OUT; 107 default: { 108 int net_error = MapSystemError(os_error); 109 if (net_error == ERR_FAILED) 110 return ERR_CONNECTION_FAILED; // More specific than ERR_FAILED. 111 112 // Give a more specific error when the user is offline. 113 if (net_error == ERR_ADDRESS_UNREACHABLE && 114 NetworkChangeNotifier::IsOffline()) { 115 return ERR_INTERNET_DISCONNECTED; 116 } 117 118 return net_error; 119 } 120 } 121 } 122 123 } // namespace 124 125 //----------------------------------------------------------------------------- 126 127 // Nothing to do for Windows since it doesn't support TCP FastOpen. 128 // TODO(jri): Remove these along with the corresponding global variables. 129 bool IsTCPFastOpenSupported() { return false; } 130 bool IsTCPFastOpenUserEnabled() { return false; } 131 void CheckSupportAndMaybeEnableTCPFastOpen(bool user_enabled) {} 132 133 // This class encapsulates all the state that has to be preserved as long as 134 // there is a network IO operation in progress. If the owner TCPSocketWin is 135 // destroyed while an operation is in progress, the Core is detached and it 136 // lives until the operation completes and the OS doesn't reference any resource 137 // declared on this class anymore. 138 class TCPSocketWin::Core : public base::RefCounted<Core> { 139 public: 140 explicit Core(TCPSocketWin* socket); 141 142 // Start watching for the end of a read or write operation. 143 void WatchForRead(); 144 void WatchForWrite(); 145 146 // The TCPSocketWin is going away. 147 void Detach() { socket_ = NULL; } 148 149 // The separate OVERLAPPED variables for asynchronous operation. 150 // |read_overlapped_| is used for both Connect() and Read(). 151 // |write_overlapped_| is only used for Write(); 152 OVERLAPPED read_overlapped_; 153 OVERLAPPED write_overlapped_; 154 155 // The buffers used in Read() and Write(). 156 scoped_refptr<IOBuffer> read_iobuffer_; 157 scoped_refptr<IOBuffer> write_iobuffer_; 158 int read_buffer_length_; 159 int write_buffer_length_; 160 161 bool non_blocking_reads_initialized_; 162 163 private: 164 friend class base::RefCounted<Core>; 165 166 class ReadDelegate : public base::win::ObjectWatcher::Delegate { 167 public: 168 explicit ReadDelegate(Core* core) : core_(core) {} 169 virtual ~ReadDelegate() {} 170 171 // base::ObjectWatcher::Delegate methods: 172 virtual void OnObjectSignaled(HANDLE object); 173 174 private: 175 Core* const core_; 176 }; 177 178 class WriteDelegate : public base::win::ObjectWatcher::Delegate { 179 public: 180 explicit WriteDelegate(Core* core) : core_(core) {} 181 virtual ~WriteDelegate() {} 182 183 // base::ObjectWatcher::Delegate methods: 184 virtual void OnObjectSignaled(HANDLE object); 185 186 private: 187 Core* const core_; 188 }; 189 190 ~Core(); 191 192 // The socket that created this object. 193 TCPSocketWin* socket_; 194 195 // |reader_| handles the signals from |read_watcher_|. 196 ReadDelegate reader_; 197 // |writer_| handles the signals from |write_watcher_|. 198 WriteDelegate writer_; 199 200 // |read_watcher_| watches for events from Connect() and Read(). 201 base::win::ObjectWatcher read_watcher_; 202 // |write_watcher_| watches for events from Write(); 203 base::win::ObjectWatcher write_watcher_; 204 205 DISALLOW_COPY_AND_ASSIGN(Core); 206 }; 207 208 TCPSocketWin::Core::Core(TCPSocketWin* socket) 209 : read_buffer_length_(0), 210 write_buffer_length_(0), 211 non_blocking_reads_initialized_(false), 212 socket_(socket), 213 reader_(this), 214 writer_(this) { 215 memset(&read_overlapped_, 0, sizeof(read_overlapped_)); 216 memset(&write_overlapped_, 0, sizeof(write_overlapped_)); 217 218 read_overlapped_.hEvent = WSACreateEvent(); 219 write_overlapped_.hEvent = WSACreateEvent(); 220 } 221 222 TCPSocketWin::Core::~Core() { 223 // Make sure the message loop is not watching this object anymore. 224 read_watcher_.StopWatching(); 225 write_watcher_.StopWatching(); 226 227 WSACloseEvent(read_overlapped_.hEvent); 228 memset(&read_overlapped_, 0xaf, sizeof(read_overlapped_)); 229 WSACloseEvent(write_overlapped_.hEvent); 230 memset(&write_overlapped_, 0xaf, sizeof(write_overlapped_)); 231 } 232 233 void TCPSocketWin::Core::WatchForRead() { 234 // We grab an extra reference because there is an IO operation in progress. 235 // Balanced in ReadDelegate::OnObjectSignaled(). 236 AddRef(); 237 read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_); 238 } 239 240 void TCPSocketWin::Core::WatchForWrite() { 241 // We grab an extra reference because there is an IO operation in progress. 242 // Balanced in WriteDelegate::OnObjectSignaled(). 243 AddRef(); 244 write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_); 245 } 246 247 void TCPSocketWin::Core::ReadDelegate::OnObjectSignaled(HANDLE object) { 248 DCHECK_EQ(object, core_->read_overlapped_.hEvent); 249 if (core_->socket_) { 250 if (core_->socket_->waiting_connect_) 251 core_->socket_->DidCompleteConnect(); 252 else 253 core_->socket_->DidSignalRead(); 254 } 255 256 core_->Release(); 257 } 258 259 void TCPSocketWin::Core::WriteDelegate::OnObjectSignaled( 260 HANDLE object) { 261 DCHECK_EQ(object, core_->write_overlapped_.hEvent); 262 if (core_->socket_) 263 core_->socket_->DidCompleteWrite(); 264 265 core_->Release(); 266 } 267 268 //----------------------------------------------------------------------------- 269 270 TCPSocketWin::TCPSocketWin(net::NetLog* net_log, 271 const net::NetLog::Source& source) 272 : socket_(INVALID_SOCKET), 273 accept_event_(WSA_INVALID_EVENT), 274 accept_socket_(NULL), 275 accept_address_(NULL), 276 waiting_connect_(false), 277 waiting_read_(false), 278 waiting_write_(false), 279 connect_os_error_(0), 280 logging_multiple_connect_attempts_(false), 281 net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)) { 282 net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE, 283 source.ToEventParametersCallback()); 284 EnsureWinsockInit(); 285 } 286 287 TCPSocketWin::~TCPSocketWin() { 288 Close(); 289 net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE); 290 } 291 292 int TCPSocketWin::Open(AddressFamily family) { 293 DCHECK(CalledOnValidThread()); 294 DCHECK_EQ(socket_, INVALID_SOCKET); 295 296 socket_ = CreatePlatformSocket(ConvertAddressFamily(family), SOCK_STREAM, 297 IPPROTO_TCP); 298 if (socket_ == INVALID_SOCKET) { 299 PLOG(ERROR) << "CreatePlatformSocket() returned an error"; 300 return MapSystemError(WSAGetLastError()); 301 } 302 303 if (SetNonBlocking(socket_)) { 304 int result = MapSystemError(WSAGetLastError()); 305 Close(); 306 return result; 307 } 308 309 return OK; 310 } 311 312 int TCPSocketWin::AdoptConnectedSocket(SOCKET socket, 313 const IPEndPoint& peer_address) { 314 DCHECK(CalledOnValidThread()); 315 DCHECK_EQ(socket_, INVALID_SOCKET); 316 DCHECK(!core_); 317 318 socket_ = socket; 319 320 if (SetNonBlocking(socket_)) { 321 int result = MapSystemError(WSAGetLastError()); 322 Close(); 323 return result; 324 } 325 326 core_ = new Core(this); 327 peer_address_.reset(new IPEndPoint(peer_address)); 328 329 return OK; 330 } 331 332 int TCPSocketWin::AdoptListenSocket(SOCKET socket) { 333 DCHECK(CalledOnValidThread()); 334 DCHECK_EQ(socket_, INVALID_SOCKET); 335 336 socket_ = socket; 337 338 if (SetNonBlocking(socket_)) { 339 int result = MapSystemError(WSAGetLastError()); 340 Close(); 341 return result; 342 } 343 344 // |core_| is not needed for sockets that are used to accept connections. 345 // The operation here is more like Open but with an existing socket. 346 347 return OK; 348 } 349 350 int TCPSocketWin::Bind(const IPEndPoint& address) { 351 DCHECK(CalledOnValidThread()); 352 DCHECK_NE(socket_, INVALID_SOCKET); 353 354 SockaddrStorage storage; 355 if (!address.ToSockAddr(storage.addr, &storage.addr_len)) 356 return ERR_ADDRESS_INVALID; 357 358 int result = bind(socket_, storage.addr, storage.addr_len); 359 if (result < 0) { 360 PLOG(ERROR) << "bind() returned an error"; 361 return MapSystemError(WSAGetLastError()); 362 } 363 364 return OK; 365 } 366 367 int TCPSocketWin::Listen(int backlog) { 368 DCHECK(CalledOnValidThread()); 369 DCHECK_GT(backlog, 0); 370 DCHECK_NE(socket_, INVALID_SOCKET); 371 DCHECK_EQ(accept_event_, WSA_INVALID_EVENT); 372 373 accept_event_ = WSACreateEvent(); 374 if (accept_event_ == WSA_INVALID_EVENT) { 375 PLOG(ERROR) << "WSACreateEvent()"; 376 return MapSystemError(WSAGetLastError()); 377 } 378 379 int result = listen(socket_, backlog); 380 if (result < 0) { 381 PLOG(ERROR) << "listen() returned an error"; 382 return MapSystemError(WSAGetLastError()); 383 } 384 385 return OK; 386 } 387 388 int TCPSocketWin::Accept(scoped_ptr<TCPSocketWin>* socket, 389 IPEndPoint* address, 390 const CompletionCallback& callback) { 391 DCHECK(CalledOnValidThread()); 392 DCHECK(socket); 393 DCHECK(address); 394 DCHECK(!callback.is_null()); 395 DCHECK(accept_callback_.is_null()); 396 397 net_log_.BeginEvent(NetLog::TYPE_TCP_ACCEPT); 398 399 int result = AcceptInternal(socket, address); 400 401 if (result == ERR_IO_PENDING) { 402 // Start watching. 403 WSAEventSelect(socket_, accept_event_, FD_ACCEPT); 404 accept_watcher_.StartWatching(accept_event_, this); 405 406 accept_socket_ = socket; 407 accept_address_ = address; 408 accept_callback_ = callback; 409 } 410 411 return result; 412 } 413 414 int TCPSocketWin::Connect(const IPEndPoint& address, 415 const CompletionCallback& callback) { 416 DCHECK(CalledOnValidThread()); 417 DCHECK_NE(socket_, INVALID_SOCKET); 418 DCHECK(!waiting_connect_); 419 420 // |peer_address_| and |core_| will be non-NULL if Connect() has been called. 421 // Unless Close() is called to reset the internal state, a second call to 422 // Connect() is not allowed. 423 // Please note that we enforce this even if the previous Connect() has 424 // completed and failed. Although it is allowed to connect the same |socket_| 425 // again after a connection attempt failed on Windows, it results in 426 // unspecified behavior according to POSIX. Therefore, we make it behave in 427 // the same way as TCPSocketLibevent. 428 DCHECK(!peer_address_ && !core_); 429 430 if (!logging_multiple_connect_attempts_) 431 LogConnectBegin(AddressList(address)); 432 433 peer_address_.reset(new IPEndPoint(address)); 434 435 int rv = DoConnect(); 436 if (rv == ERR_IO_PENDING) { 437 // Synchronous operation not supported. 438 DCHECK(!callback.is_null()); 439 read_callback_ = callback; 440 waiting_connect_ = true; 441 } else { 442 DoConnectComplete(rv); 443 } 444 445 return rv; 446 } 447 448 bool TCPSocketWin::IsConnected() const { 449 DCHECK(CalledOnValidThread()); 450 451 if (socket_ == INVALID_SOCKET || waiting_connect_) 452 return false; 453 454 if (waiting_read_) 455 return true; 456 457 // Check if connection is alive. 458 char c; 459 int rv = recv(socket_, &c, 1, MSG_PEEK); 460 if (rv == 0) 461 return false; 462 if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK) 463 return false; 464 465 return true; 466 } 467 468 bool TCPSocketWin::IsConnectedAndIdle() const { 469 DCHECK(CalledOnValidThread()); 470 471 if (socket_ == INVALID_SOCKET || waiting_connect_) 472 return false; 473 474 if (waiting_read_) 475 return true; 476 477 // Check if connection is alive and we haven't received any data 478 // unexpectedly. 479 char c; 480 int rv = recv(socket_, &c, 1, MSG_PEEK); 481 if (rv >= 0) 482 return false; 483 if (WSAGetLastError() != WSAEWOULDBLOCK) 484 return false; 485 486 return true; 487 } 488 489 int TCPSocketWin::Read(IOBuffer* buf, 490 int buf_len, 491 const CompletionCallback& callback) { 492 DCHECK(CalledOnValidThread()); 493 DCHECK_NE(socket_, INVALID_SOCKET); 494 DCHECK(!waiting_read_); 495 DCHECK(read_callback_.is_null()); 496 DCHECK(!core_->read_iobuffer_); 497 498 return DoRead(buf, buf_len, callback); 499 } 500 501 int TCPSocketWin::Write(IOBuffer* buf, 502 int buf_len, 503 const CompletionCallback& callback) { 504 DCHECK(CalledOnValidThread()); 505 DCHECK_NE(socket_, INVALID_SOCKET); 506 DCHECK(!waiting_write_); 507 DCHECK(write_callback_.is_null()); 508 DCHECK_GT(buf_len, 0); 509 DCHECK(!core_->write_iobuffer_); 510 511 base::StatsCounter writes("tcp.writes"); 512 writes.Increment(); 513 514 WSABUF write_buffer; 515 write_buffer.len = buf_len; 516 write_buffer.buf = buf->data(); 517 518 // TODO(wtc): Remove the assertion after enough testing. 519 AssertEventNotSignaled(core_->write_overlapped_.hEvent); 520 DWORD num; 521 int rv = WSASend(socket_, &write_buffer, 1, &num, 0, 522 &core_->write_overlapped_, NULL); 523 if (rv == 0) { 524 if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) { 525 rv = static_cast<int>(num); 526 if (rv > buf_len || rv < 0) { 527 // It seems that some winsock interceptors report that more was written 528 // than was available. Treat this as an error. http://crbug.com/27870 529 LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len 530 << " bytes, but " << rv << " bytes reported."; 531 return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES; 532 } 533 base::StatsCounter write_bytes("tcp.write_bytes"); 534 write_bytes.Add(rv); 535 net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, rv, 536 buf->data()); 537 return rv; 538 } 539 } else { 540 int os_error = WSAGetLastError(); 541 if (os_error != WSA_IO_PENDING) { 542 int net_error = MapSystemError(os_error); 543 net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR, 544 CreateNetLogSocketErrorCallback(net_error, os_error)); 545 return net_error; 546 } 547 } 548 waiting_write_ = true; 549 write_callback_ = callback; 550 core_->write_iobuffer_ = buf; 551 core_->write_buffer_length_ = buf_len; 552 core_->WatchForWrite(); 553 return ERR_IO_PENDING; 554 } 555 556 int TCPSocketWin::GetLocalAddress(IPEndPoint* address) const { 557 DCHECK(CalledOnValidThread()); 558 DCHECK(address); 559 560 SockaddrStorage storage; 561 if (getsockname(socket_, storage.addr, &storage.addr_len)) 562 return MapSystemError(WSAGetLastError()); 563 if (!address->FromSockAddr(storage.addr, storage.addr_len)) 564 return ERR_ADDRESS_INVALID; 565 566 return OK; 567 } 568 569 int TCPSocketWin::GetPeerAddress(IPEndPoint* address) const { 570 DCHECK(CalledOnValidThread()); 571 DCHECK(address); 572 if (!IsConnected()) 573 return ERR_SOCKET_NOT_CONNECTED; 574 *address = *peer_address_; 575 return OK; 576 } 577 578 int TCPSocketWin::SetDefaultOptionsForServer() { 579 return SetExclusiveAddrUse(); 580 } 581 582 void TCPSocketWin::SetDefaultOptionsForClient() { 583 // Increase the socket buffer sizes from the default sizes for WinXP. In 584 // performance testing, there is substantial benefit by increasing from 8KB 585 // to 64KB. 586 // See also: 587 // http://support.microsoft.com/kb/823764/EN-US 588 // On Vista, if we manually set these sizes, Vista turns off its receive 589 // window auto-tuning feature. 590 // http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx 591 // Since Vista's auto-tune is better than any static value we can could set, 592 // only change these on pre-vista machines. 593 if (base::win::GetVersion() < base::win::VERSION_VISTA) { 594 const int32 kSocketBufferSize = 64 * 1024; 595 SetSocketReceiveBufferSize(socket_, kSocketBufferSize); 596 SetSocketSendBufferSize(socket_, kSocketBufferSize); 597 } 598 599 DisableNagle(socket_, true); 600 SetTCPKeepAlive(socket_, true, kTCPKeepAliveSeconds); 601 } 602 603 int TCPSocketWin::SetExclusiveAddrUse() { 604 // On Windows, a bound end point can be hijacked by another process by 605 // setting SO_REUSEADDR. Therefore a Windows-only option SO_EXCLUSIVEADDRUSE 606 // was introduced in Windows NT 4.0 SP4. If the socket that is bound to the 607 // end point has SO_EXCLUSIVEADDRUSE enabled, it is not possible for another 608 // socket to forcibly bind to the end point until the end point is unbound. 609 // It is recommend that all server applications must use SO_EXCLUSIVEADDRUSE. 610 // MSDN: http://goo.gl/M6fjQ. 611 // 612 // Unlike on *nix, on Windows a TCP server socket can always bind to an end 613 // point in TIME_WAIT state without setting SO_REUSEADDR, therefore it is not 614 // needed here. 615 // 616 // SO_EXCLUSIVEADDRUSE will prevent a TCP client socket from binding to an end 617 // point in TIME_WAIT status. It does not have this effect for a TCP server 618 // socket. 619 620 BOOL true_value = 1; 621 int rv = setsockopt(socket_, SOL_SOCKET, SO_EXCLUSIVEADDRUSE, 622 reinterpret_cast<const char*>(&true_value), 623 sizeof(true_value)); 624 if (rv < 0) 625 return MapSystemError(errno); 626 return OK; 627 } 628 629 int TCPSocketWin::SetReceiveBufferSize(int32 size) { 630 DCHECK(CalledOnValidThread()); 631 return SetSocketReceiveBufferSize(socket_, size); 632 } 633 634 int TCPSocketWin::SetSendBufferSize(int32 size) { 635 DCHECK(CalledOnValidThread()); 636 return SetSocketSendBufferSize(socket_, size); 637 } 638 639 bool TCPSocketWin::SetKeepAlive(bool enable, int delay) { 640 return SetTCPKeepAlive(socket_, enable, delay); 641 } 642 643 bool TCPSocketWin::SetNoDelay(bool no_delay) { 644 return DisableNagle(socket_, no_delay); 645 } 646 647 void TCPSocketWin::Close() { 648 DCHECK(CalledOnValidThread()); 649 650 if (socket_ != INVALID_SOCKET) { 651 // Only log the close event if there's actually a socket to close. 652 net_log_.AddEvent(NetLog::EventType::TYPE_SOCKET_CLOSED); 653 654 // Note: don't use CancelIo to cancel pending IO because it doesn't work 655 // when there is a Winsock layered service provider. 656 657 // In most socket implementations, closing a socket results in a graceful 658 // connection shutdown, but in Winsock we have to call shutdown explicitly. 659 // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure" 660 // at http://msdn.microsoft.com/en-us/library/ms738547.aspx 661 shutdown(socket_, SD_SEND); 662 663 // This cancels any pending IO. 664 if (closesocket(socket_) < 0) 665 PLOG(ERROR) << "closesocket"; 666 socket_ = INVALID_SOCKET; 667 } 668 669 if (!accept_callback_.is_null()) { 670 accept_watcher_.StopWatching(); 671 accept_socket_ = NULL; 672 accept_address_ = NULL; 673 accept_callback_.Reset(); 674 } 675 676 if (accept_event_) { 677 WSACloseEvent(accept_event_); 678 accept_event_ = WSA_INVALID_EVENT; 679 } 680 681 if (core_) { 682 if (waiting_connect_) { 683 // We closed the socket, so this notification will never come. 684 // From MSDN' WSAEventSelect documentation: 685 // "Closing a socket with closesocket also cancels the association and 686 // selection of network events specified in WSAEventSelect for the 687 // socket". 688 core_->Release(); 689 } 690 core_->Detach(); 691 core_ = NULL; 692 } 693 694 waiting_connect_ = false; 695 waiting_read_ = false; 696 waiting_write_ = false; 697 698 read_callback_.Reset(); 699 write_callback_.Reset(); 700 peer_address_.reset(); 701 connect_os_error_ = 0; 702 } 703 704 void TCPSocketWin::StartLoggingMultipleConnectAttempts( 705 const AddressList& addresses) { 706 if (!logging_multiple_connect_attempts_) { 707 logging_multiple_connect_attempts_ = true; 708 LogConnectBegin(addresses); 709 } else { 710 NOTREACHED(); 711 } 712 } 713 714 void TCPSocketWin::EndLoggingMultipleConnectAttempts(int net_error) { 715 if (logging_multiple_connect_attempts_) { 716 LogConnectEnd(net_error); 717 logging_multiple_connect_attempts_ = false; 718 } else { 719 NOTREACHED(); 720 } 721 } 722 723 int TCPSocketWin::AcceptInternal(scoped_ptr<TCPSocketWin>* socket, 724 IPEndPoint* address) { 725 SockaddrStorage storage; 726 int new_socket = accept(socket_, storage.addr, &storage.addr_len); 727 if (new_socket < 0) { 728 int net_error = MapSystemError(WSAGetLastError()); 729 if (net_error != ERR_IO_PENDING) 730 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error); 731 return net_error; 732 } 733 734 IPEndPoint ip_end_point; 735 if (!ip_end_point.FromSockAddr(storage.addr, storage.addr_len)) { 736 NOTREACHED(); 737 if (closesocket(new_socket) < 0) 738 PLOG(ERROR) << "closesocket"; 739 int net_error = ERR_ADDRESS_INVALID; 740 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error); 741 return net_error; 742 } 743 scoped_ptr<TCPSocketWin> tcp_socket(new TCPSocketWin( 744 net_log_.net_log(), net_log_.source())); 745 int adopt_result = tcp_socket->AdoptConnectedSocket(new_socket, ip_end_point); 746 if (adopt_result != OK) { 747 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, adopt_result); 748 return adopt_result; 749 } 750 *socket = tcp_socket.Pass(); 751 *address = ip_end_point; 752 net_log_.EndEvent(NetLog::TYPE_TCP_ACCEPT, 753 CreateNetLogIPEndPointCallback(&ip_end_point)); 754 return OK; 755 } 756 757 void TCPSocketWin::OnObjectSignaled(HANDLE object) { 758 WSANETWORKEVENTS ev; 759 if (WSAEnumNetworkEvents(socket_, accept_event_, &ev) == SOCKET_ERROR) { 760 PLOG(ERROR) << "WSAEnumNetworkEvents()"; 761 return; 762 } 763 764 if (ev.lNetworkEvents & FD_ACCEPT) { 765 int result = AcceptInternal(accept_socket_, accept_address_); 766 if (result != ERR_IO_PENDING) { 767 accept_socket_ = NULL; 768 accept_address_ = NULL; 769 base::ResetAndReturn(&accept_callback_).Run(result); 770 } 771 } else { 772 // This happens when a client opens a connection and closes it before we 773 // have a chance to accept it. 774 DCHECK(ev.lNetworkEvents == 0); 775 776 // Start watching the next FD_ACCEPT event. 777 WSAEventSelect(socket_, accept_event_, FD_ACCEPT); 778 accept_watcher_.StartWatching(accept_event_, this); 779 } 780 } 781 782 int TCPSocketWin::DoConnect() { 783 DCHECK_EQ(connect_os_error_, 0); 784 DCHECK(!core_); 785 786 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT, 787 CreateNetLogIPEndPointCallback(peer_address_.get())); 788 789 core_ = new Core(this); 790 // WSAEventSelect sets the socket to non-blocking mode as a side effect. 791 // Our connect() and recv() calls require that the socket be non-blocking. 792 WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT); 793 794 SockaddrStorage storage; 795 if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len)) 796 return ERR_ADDRESS_INVALID; 797 if (!connect(socket_, storage.addr, storage.addr_len)) { 798 // Connected without waiting! 799 // 800 // The MSDN page for connect says: 801 // With a nonblocking socket, the connection attempt cannot be completed 802 // immediately. In this case, connect will return SOCKET_ERROR, and 803 // WSAGetLastError will return WSAEWOULDBLOCK. 804 // which implies that for a nonblocking socket, connect never returns 0. 805 // It's not documented whether the event object will be signaled or not 806 // if connect does return 0. So the code below is essentially dead code 807 // and we don't know if it's correct. 808 NOTREACHED(); 809 810 if (ResetEventIfSignaled(core_->read_overlapped_.hEvent)) 811 return OK; 812 } else { 813 int os_error = WSAGetLastError(); 814 if (os_error != WSAEWOULDBLOCK) { 815 LOG(ERROR) << "connect failed: " << os_error; 816 connect_os_error_ = os_error; 817 int rv = MapConnectError(os_error); 818 CHECK_NE(ERR_IO_PENDING, rv); 819 return rv; 820 } 821 } 822 823 core_->WatchForRead(); 824 return ERR_IO_PENDING; 825 } 826 827 void TCPSocketWin::DoConnectComplete(int result) { 828 // Log the end of this attempt (and any OS error it threw). 829 int os_error = connect_os_error_; 830 connect_os_error_ = 0; 831 if (result != OK) { 832 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT, 833 NetLog::IntegerCallback("os_error", os_error)); 834 } else { 835 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT); 836 } 837 838 if (!logging_multiple_connect_attempts_) 839 LogConnectEnd(result); 840 } 841 842 void TCPSocketWin::LogConnectBegin(const AddressList& addresses) { 843 base::StatsCounter connects("tcp.connect"); 844 connects.Increment(); 845 846 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT, 847 addresses.CreateNetLogCallback()); 848 } 849 850 void TCPSocketWin::LogConnectEnd(int net_error) { 851 if (net_error == OK) 852 UpdateConnectionTypeHistograms(CONNECTION_ANY); 853 854 if (net_error != OK) { 855 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error); 856 return; 857 } 858 859 struct sockaddr_storage source_address; 860 socklen_t addrlen = sizeof(source_address); 861 int rv = getsockname( 862 socket_, reinterpret_cast<struct sockaddr*>(&source_address), &addrlen); 863 if (rv != 0) { 864 LOG(ERROR) << "getsockname() [rv: " << rv 865 << "] error: " << WSAGetLastError(); 866 NOTREACHED(); 867 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv); 868 return; 869 } 870 871 net_log_.EndEvent( 872 NetLog::TYPE_TCP_CONNECT, 873 CreateNetLogSourceAddressCallback( 874 reinterpret_cast<const struct sockaddr*>(&source_address), 875 sizeof(source_address))); 876 } 877 878 int TCPSocketWin::DoRead(IOBuffer* buf, int buf_len, 879 const CompletionCallback& callback) { 880 if (!core_->non_blocking_reads_initialized_) { 881 WSAEventSelect(socket_, core_->read_overlapped_.hEvent, 882 FD_READ | FD_CLOSE); 883 core_->non_blocking_reads_initialized_ = true; 884 } 885 int rv = recv(socket_, buf->data(), buf_len, 0); 886 if (rv == SOCKET_ERROR) { 887 int os_error = WSAGetLastError(); 888 if (os_error != WSAEWOULDBLOCK) { 889 int net_error = MapSystemError(os_error); 890 net_log_.AddEvent( 891 NetLog::TYPE_SOCKET_READ_ERROR, 892 CreateNetLogSocketErrorCallback(net_error, os_error)); 893 return net_error; 894 } 895 } else { 896 base::StatsCounter read_bytes("tcp.read_bytes"); 897 if (rv > 0) 898 read_bytes.Add(rv); 899 net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, rv, 900 buf->data()); 901 return rv; 902 } 903 904 waiting_read_ = true; 905 read_callback_ = callback; 906 core_->read_iobuffer_ = buf; 907 core_->read_buffer_length_ = buf_len; 908 core_->WatchForRead(); 909 return ERR_IO_PENDING; 910 } 911 912 void TCPSocketWin::DidCompleteConnect() { 913 DCHECK(waiting_connect_); 914 DCHECK(!read_callback_.is_null()); 915 int result; 916 917 WSANETWORKEVENTS events; 918 int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent, 919 &events); 920 int os_error = 0; 921 if (rv == SOCKET_ERROR) { 922 NOTREACHED(); 923 os_error = WSAGetLastError(); 924 result = MapSystemError(os_error); 925 } else if (events.lNetworkEvents & FD_CONNECT) { 926 os_error = events.iErrorCode[FD_CONNECT_BIT]; 927 result = MapConnectError(os_error); 928 } else { 929 NOTREACHED(); 930 result = ERR_UNEXPECTED; 931 } 932 933 connect_os_error_ = os_error; 934 DoConnectComplete(result); 935 waiting_connect_ = false; 936 937 DCHECK_NE(result, ERR_IO_PENDING); 938 base::ResetAndReturn(&read_callback_).Run(result); 939 } 940 941 void TCPSocketWin::DidCompleteWrite() { 942 DCHECK(waiting_write_); 943 DCHECK(!write_callback_.is_null()); 944 945 DWORD num_bytes, flags; 946 BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_, 947 &num_bytes, FALSE, &flags); 948 WSAResetEvent(core_->write_overlapped_.hEvent); 949 waiting_write_ = false; 950 int rv; 951 if (!ok) { 952 int os_error = WSAGetLastError(); 953 rv = MapSystemError(os_error); 954 net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR, 955 CreateNetLogSocketErrorCallback(rv, os_error)); 956 } else { 957 rv = static_cast<int>(num_bytes); 958 if (rv > core_->write_buffer_length_ || rv < 0) { 959 // It seems that some winsock interceptors report that more was written 960 // than was available. Treat this as an error. http://crbug.com/27870 961 LOG(ERROR) << "Detected broken LSP: Asked to write " 962 << core_->write_buffer_length_ << " bytes, but " << rv 963 << " bytes reported."; 964 rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES; 965 } else { 966 base::StatsCounter write_bytes("tcp.write_bytes"); 967 write_bytes.Add(num_bytes); 968 net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes, 969 core_->write_iobuffer_->data()); 970 } 971 } 972 973 core_->write_iobuffer_ = NULL; 974 975 DCHECK_NE(rv, ERR_IO_PENDING); 976 base::ResetAndReturn(&write_callback_).Run(rv); 977 } 978 979 void TCPSocketWin::DidSignalRead() { 980 DCHECK(waiting_read_); 981 DCHECK(!read_callback_.is_null()); 982 983 int os_error = 0; 984 WSANETWORKEVENTS network_events; 985 int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent, 986 &network_events); 987 if (rv == SOCKET_ERROR) { 988 os_error = WSAGetLastError(); 989 rv = MapSystemError(os_error); 990 } else if (network_events.lNetworkEvents) { 991 DCHECK_EQ(network_events.lNetworkEvents & ~(FD_READ | FD_CLOSE), 0); 992 // If network_events.lNetworkEvents is FD_CLOSE and 993 // network_events.iErrorCode[FD_CLOSE_BIT] is 0, it is a graceful 994 // connection closure. It is tempting to directly set rv to 0 in 995 // this case, but the MSDN pages for WSAEventSelect and 996 // WSAAsyncSelect recommend we still call DoRead(): 997 // FD_CLOSE should only be posted after all data is read from a 998 // socket, but an application should check for remaining data upon 999 // receipt of FD_CLOSE to avoid any possibility of losing data. 1000 // 1001 // If network_events.iErrorCode[FD_READ_BIT] or 1002 // network_events.iErrorCode[FD_CLOSE_BIT] is nonzero, still call 1003 // DoRead() because recv() reports a more accurate error code 1004 // (WSAECONNRESET vs. WSAECONNABORTED) when the connection was 1005 // reset. 1006 rv = DoRead(core_->read_iobuffer_, core_->read_buffer_length_, 1007 read_callback_); 1008 if (rv == ERR_IO_PENDING) 1009 return; 1010 } else { 1011 // This may happen because Read() may succeed synchronously and 1012 // consume all the received data without resetting the event object. 1013 core_->WatchForRead(); 1014 return; 1015 } 1016 1017 waiting_read_ = false; 1018 core_->read_iobuffer_ = NULL; 1019 core_->read_buffer_length_ = 0; 1020 1021 DCHECK_NE(rv, ERR_IO_PENDING); 1022 base::ResetAndReturn(&read_callback_).Run(rv); 1023 } 1024 1025 } // namespace net 1026