1 // Copyright (c) 2011 The Chromium Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. 4 5 #include "net/socket/tcp_client_socket_win.h" 6 7 #include <mstcpip.h> 8 9 #include "base/basictypes.h" 10 #include "base/compiler_specific.h" 11 #include "base/memory/memory_debug.h" 12 #include "base/metrics/stats_counters.h" 13 #include "base/string_util.h" 14 #include "base/sys_info.h" 15 #include "base/win/object_watcher.h" 16 #include "net/base/address_list_net_log_param.h" 17 #include "net/base/connection_type_histograms.h" 18 #include "net/base/io_buffer.h" 19 #include "net/base/ip_endpoint.h" 20 #include "net/base/net_errors.h" 21 #include "net/base/net_log.h" 22 #include "net/base/net_util.h" 23 #include "net/base/network_change_notifier.h" 24 #include "net/base/sys_addrinfo.h" 25 #include "net/base/winsock_init.h" 26 #include "net/base/winsock_util.h" 27 28 namespace net { 29 30 namespace { 31 32 int MapConnectError(int os_error) { 33 switch (os_error) { 34 // connect fails with WSAEACCES when Windows Firewall blocks the 35 // connection. 36 case WSAEACCES: 37 return ERR_NETWORK_ACCESS_DENIED; 38 case WSAETIMEDOUT: 39 return ERR_CONNECTION_TIMED_OUT; 40 default: { 41 int net_error = MapSystemError(os_error); 42 if (net_error == ERR_FAILED) 43 return ERR_CONNECTION_FAILED; // More specific than ERR_FAILED. 44 45 // Give a more specific error when the user is offline. 46 if (net_error == ERR_ADDRESS_UNREACHABLE && 47 NetworkChangeNotifier::IsOffline()) { 48 return ERR_INTERNET_DISCONNECTED; 49 } 50 51 return net_error; 52 } 53 } 54 } 55 56 } // namespace 57 58 //----------------------------------------------------------------------------- 59 60 // This class encapsulates all the state that has to be preserved as long as 61 // there is a network IO operation in progress. If the owner TCPClientSocketWin 62 // is destroyed while an operation is in progress, the Core is detached and it 63 // lives until the operation completes and the OS doesn't reference any resource 64 // declared on this class anymore. 65 class TCPClientSocketWin::Core : public base::RefCounted<Core> { 66 public: 67 explicit Core(TCPClientSocketWin* socket); 68 69 // Start watching for the end of a read or write operation. 70 void WatchForRead(); 71 void WatchForWrite(); 72 73 // The TCPClientSocketWin is going away. 74 void Detach() { socket_ = NULL; } 75 76 // The separate OVERLAPPED variables for asynchronous operation. 77 // |read_overlapped_| is used for both Connect() and Read(). 78 // |write_overlapped_| is only used for Write(); 79 OVERLAPPED read_overlapped_; 80 OVERLAPPED write_overlapped_; 81 82 // The buffers used in Read() and Write(). 83 WSABUF read_buffer_; 84 WSABUF write_buffer_; 85 scoped_refptr<IOBuffer> read_iobuffer_; 86 scoped_refptr<IOBuffer> write_iobuffer_; 87 int write_buffer_length_; 88 89 // Throttle the read size based on our current slow start state. 90 // Returns the throttled read size. 91 int ThrottleReadSize(int size) { 92 if (slow_start_throttle_ < kMaxSlowStartThrottle) { 93 size = std::min(size, slow_start_throttle_); 94 slow_start_throttle_ *= 2; 95 } 96 return size; 97 } 98 99 private: 100 friend class base::RefCounted<Core>; 101 102 class ReadDelegate : public base::win::ObjectWatcher::Delegate { 103 public: 104 explicit ReadDelegate(Core* core) : core_(core) {} 105 virtual ~ReadDelegate() {} 106 107 // base::ObjectWatcher::Delegate methods: 108 virtual void OnObjectSignaled(HANDLE object); 109 110 private: 111 Core* const core_; 112 }; 113 114 class WriteDelegate : public base::win::ObjectWatcher::Delegate { 115 public: 116 explicit WriteDelegate(Core* core) : core_(core) {} 117 virtual ~WriteDelegate() {} 118 119 // base::ObjectWatcher::Delegate methods: 120 virtual void OnObjectSignaled(HANDLE object); 121 122 private: 123 Core* const core_; 124 }; 125 126 ~Core(); 127 128 // The socket that created this object. 129 TCPClientSocketWin* socket_; 130 131 // |reader_| handles the signals from |read_watcher_|. 132 ReadDelegate reader_; 133 // |writer_| handles the signals from |write_watcher_|. 134 WriteDelegate writer_; 135 136 // |read_watcher_| watches for events from Connect() and Read(). 137 base::win::ObjectWatcher read_watcher_; 138 // |write_watcher_| watches for events from Write(); 139 base::win::ObjectWatcher write_watcher_; 140 141 // When doing reads from the socket, we try to mirror TCP's slow start. 142 // We do this because otherwise the async IO subsystem artifically delays 143 // returning data to the application. 144 static const int kInitialSlowStartThrottle = 1 * 1024; 145 static const int kMaxSlowStartThrottle = 32 * kInitialSlowStartThrottle; 146 int slow_start_throttle_; 147 148 DISALLOW_COPY_AND_ASSIGN(Core); 149 }; 150 151 TCPClientSocketWin::Core::Core( 152 TCPClientSocketWin* socket) 153 : write_buffer_length_(0), 154 socket_(socket), 155 ALLOW_THIS_IN_INITIALIZER_LIST(reader_(this)), 156 ALLOW_THIS_IN_INITIALIZER_LIST(writer_(this)), 157 slow_start_throttle_(kInitialSlowStartThrottle) { 158 memset(&read_overlapped_, 0, sizeof(read_overlapped_)); 159 memset(&write_overlapped_, 0, sizeof(write_overlapped_)); 160 } 161 162 TCPClientSocketWin::Core::~Core() { 163 // Make sure the message loop is not watching this object anymore. 164 read_watcher_.StopWatching(); 165 write_watcher_.StopWatching(); 166 167 WSACloseEvent(read_overlapped_.hEvent); 168 memset(&read_overlapped_, 0xaf, sizeof(read_overlapped_)); 169 WSACloseEvent(write_overlapped_.hEvent); 170 memset(&write_overlapped_, 0xaf, sizeof(write_overlapped_)); 171 } 172 173 void TCPClientSocketWin::Core::WatchForRead() { 174 // We grab an extra reference because there is an IO operation in progress. 175 // Balanced in ReadDelegate::OnObjectSignaled(). 176 AddRef(); 177 read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_); 178 } 179 180 void TCPClientSocketWin::Core::WatchForWrite() { 181 // We grab an extra reference because there is an IO operation in progress. 182 // Balanced in WriteDelegate::OnObjectSignaled(). 183 AddRef(); 184 write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_); 185 } 186 187 void TCPClientSocketWin::Core::ReadDelegate::OnObjectSignaled( 188 HANDLE object) { 189 DCHECK_EQ(object, core_->read_overlapped_.hEvent); 190 if (core_->socket_) { 191 if (core_->socket_->waiting_connect()) { 192 core_->socket_->DidCompleteConnect(); 193 } else { 194 core_->socket_->DidCompleteRead(); 195 } 196 } 197 198 core_->Release(); 199 } 200 201 void TCPClientSocketWin::Core::WriteDelegate::OnObjectSignaled( 202 HANDLE object) { 203 DCHECK_EQ(object, core_->write_overlapped_.hEvent); 204 if (core_->socket_) 205 core_->socket_->DidCompleteWrite(); 206 207 core_->Release(); 208 } 209 210 //----------------------------------------------------------------------------- 211 212 TCPClientSocketWin::TCPClientSocketWin(const AddressList& addresses, 213 net::NetLog* net_log, 214 const net::NetLog::Source& source) 215 : socket_(INVALID_SOCKET), 216 addresses_(addresses), 217 current_ai_(NULL), 218 waiting_read_(false), 219 waiting_write_(false), 220 read_callback_(NULL), 221 write_callback_(NULL), 222 next_connect_state_(CONNECT_STATE_NONE), 223 connect_os_error_(0), 224 net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)), 225 previously_disconnected_(false) { 226 scoped_refptr<NetLog::EventParameters> params; 227 if (source.is_valid()) 228 params = new NetLogSourceParameter("source_dependency", source); 229 net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE, params); 230 EnsureWinsockInit(); 231 } 232 233 TCPClientSocketWin::~TCPClientSocketWin() { 234 Disconnect(); 235 net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE, NULL); 236 } 237 238 void TCPClientSocketWin::AdoptSocket(SOCKET socket) { 239 DCHECK_EQ(socket_, INVALID_SOCKET); 240 socket_ = socket; 241 int error = SetupSocket(); 242 DCHECK_EQ(0, error); 243 core_ = new Core(this); 244 current_ai_ = addresses_.head(); 245 use_history_.set_was_ever_connected(); 246 } 247 248 #ifdef ANDROID 249 // TODO(kristianm): handle the case when wait_for_connect is true 250 // (sync requests) 251 #endif 252 int TCPClientSocketWin::Connect(CompletionCallback* callback 253 #ifdef ANDROID 254 , bool wait_for_connect 255 #endif 256 ) { 257 DCHECK(CalledOnValidThread()); 258 259 // If already connected, then just return OK. 260 if (socket_ != INVALID_SOCKET) 261 return OK; 262 263 base::StatsCounter connects("tcp.connect"); 264 connects.Increment(); 265 266 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT, 267 new AddressListNetLogParam(addresses_)); 268 269 // We will try to connect to each address in addresses_. Start with the 270 // first one in the list. 271 next_connect_state_ = CONNECT_STATE_CONNECT; 272 current_ai_ = addresses_.head(); 273 274 int rv = DoConnectLoop(OK); 275 if (rv == ERR_IO_PENDING) { 276 // Synchronous operation not supported. 277 DCHECK(callback); 278 read_callback_ = callback; 279 } else { 280 LogConnectCompletion(rv); 281 } 282 283 return rv; 284 } 285 286 int TCPClientSocketWin::DoConnectLoop(int result) { 287 DCHECK_NE(next_connect_state_, CONNECT_STATE_NONE); 288 289 int rv = result; 290 do { 291 ConnectState state = next_connect_state_; 292 next_connect_state_ = CONNECT_STATE_NONE; 293 switch (state) { 294 case CONNECT_STATE_CONNECT: 295 DCHECK_EQ(OK, rv); 296 rv = DoConnect(); 297 break; 298 case CONNECT_STATE_CONNECT_COMPLETE: 299 rv = DoConnectComplete(rv); 300 break; 301 default: 302 LOG(DFATAL) << "bad state " << state; 303 rv = ERR_UNEXPECTED; 304 break; 305 } 306 } while (rv != ERR_IO_PENDING && next_connect_state_ != CONNECT_STATE_NONE); 307 308 return rv; 309 } 310 311 int TCPClientSocketWin::DoConnect() { 312 const struct addrinfo* ai = current_ai_; 313 DCHECK(ai); 314 DCHECK_EQ(0, connect_os_error_); 315 316 if (previously_disconnected_) { 317 use_history_.Reset(); 318 previously_disconnected_ = false; 319 } 320 321 net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT, 322 new NetLogStringParameter( 323 "address", NetAddressToStringWithPort(current_ai_))); 324 325 next_connect_state_ = CONNECT_STATE_CONNECT_COMPLETE; 326 327 connect_os_error_ = CreateSocket(ai); 328 if (connect_os_error_ != 0) 329 return MapSystemError(connect_os_error_); 330 331 DCHECK(!core_); 332 core_ = new Core(this); 333 334 // WSACreateEvent creates a manual-reset event object. 335 core_->read_overlapped_.hEvent = WSACreateEvent(); 336 // WSAEventSelect sets the socket to non-blocking mode as a side effect. 337 // Our connect() and recv() calls require that the socket be non-blocking. 338 WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT); 339 340 core_->write_overlapped_.hEvent = WSACreateEvent(); 341 342 if (!connect(socket_, ai->ai_addr, static_cast<int>(ai->ai_addrlen))) { 343 // Connected without waiting! 344 // 345 // The MSDN page for connect says: 346 // With a nonblocking socket, the connection attempt cannot be completed 347 // immediately. In this case, connect will return SOCKET_ERROR, and 348 // WSAGetLastError will return WSAEWOULDBLOCK. 349 // which implies that for a nonblocking socket, connect never returns 0. 350 // It's not documented whether the event object will be signaled or not 351 // if connect does return 0. So the code below is essentially dead code 352 // and we don't know if it's correct. 353 NOTREACHED(); 354 355 if (ResetEventIfSignaled(core_->read_overlapped_.hEvent)) 356 return OK; 357 } else { 358 int os_error = WSAGetLastError(); 359 if (os_error != WSAEWOULDBLOCK) { 360 LOG(ERROR) << "connect failed: " << os_error; 361 connect_os_error_ = os_error; 362 return MapConnectError(os_error); 363 } 364 } 365 366 core_->WatchForRead(); 367 return ERR_IO_PENDING; 368 } 369 370 int TCPClientSocketWin::DoConnectComplete(int result) { 371 // Log the end of this attempt (and any OS error it threw). 372 int os_error = connect_os_error_; 373 connect_os_error_ = 0; 374 scoped_refptr<NetLog::EventParameters> params; 375 if (result != OK) 376 params = new NetLogIntegerParameter("os_error", os_error); 377 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT, params); 378 379 if (result == OK) { 380 use_history_.set_was_ever_connected(); 381 return OK; // Done! 382 } 383 384 // Close whatever partially connected socket we currently have. 385 DoDisconnect(); 386 387 // Try to fall back to the next address in the list. 388 if (current_ai_->ai_next) { 389 next_connect_state_ = CONNECT_STATE_CONNECT; 390 current_ai_ = current_ai_->ai_next; 391 return OK; 392 } 393 394 // Otherwise there is nothing to fall back to, so give up. 395 return result; 396 } 397 398 void TCPClientSocketWin::Disconnect() { 399 DoDisconnect(); 400 current_ai_ = NULL; 401 } 402 403 void TCPClientSocketWin::DoDisconnect() { 404 DCHECK(CalledOnValidThread()); 405 406 if (socket_ == INVALID_SOCKET) 407 return; 408 409 // Note: don't use CancelIo to cancel pending IO because it doesn't work 410 // when there is a Winsock layered service provider. 411 412 // In most socket implementations, closing a socket results in a graceful 413 // connection shutdown, but in Winsock we have to call shutdown explicitly. 414 // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure" 415 // at http://msdn.microsoft.com/en-us/library/ms738547.aspx 416 shutdown(socket_, SD_SEND); 417 418 // This cancels any pending IO. 419 closesocket(socket_); 420 socket_ = INVALID_SOCKET; 421 422 if (waiting_connect()) { 423 // We closed the socket, so this notification will never come. 424 // From MSDN' WSAEventSelect documentation: 425 // "Closing a socket with closesocket also cancels the association and 426 // selection of network events specified in WSAEventSelect for the socket". 427 core_->Release(); 428 } 429 430 waiting_read_ = false; 431 waiting_write_ = false; 432 433 core_->Detach(); 434 core_ = NULL; 435 436 previously_disconnected_ = true; 437 } 438 439 bool TCPClientSocketWin::IsConnected() const { 440 DCHECK(CalledOnValidThread()); 441 442 if (socket_ == INVALID_SOCKET || waiting_connect()) 443 return false; 444 445 // Check if connection is alive. 446 char c; 447 int rv = recv(socket_, &c, 1, MSG_PEEK); 448 if (rv == 0) 449 return false; 450 if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK) 451 return false; 452 453 return true; 454 } 455 456 bool TCPClientSocketWin::IsConnectedAndIdle() const { 457 DCHECK(CalledOnValidThread()); 458 459 if (socket_ == INVALID_SOCKET || waiting_connect()) 460 return false; 461 462 // Check if connection is alive and we haven't received any data 463 // unexpectedly. 464 char c; 465 int rv = recv(socket_, &c, 1, MSG_PEEK); 466 if (rv >= 0) 467 return false; 468 if (WSAGetLastError() != WSAEWOULDBLOCK) 469 return false; 470 471 return true; 472 } 473 474 int TCPClientSocketWin::GetPeerAddress(AddressList* address) const { 475 DCHECK(CalledOnValidThread()); 476 DCHECK(address); 477 if (!IsConnected()) 478 return ERR_SOCKET_NOT_CONNECTED; 479 address->Copy(current_ai_, false); 480 return OK; 481 } 482 483 int TCPClientSocketWin::GetLocalAddress(IPEndPoint* address) const { 484 DCHECK(CalledOnValidThread()); 485 DCHECK(address); 486 if (!IsConnected()) 487 return ERR_SOCKET_NOT_CONNECTED; 488 489 struct sockaddr_storage addr_storage; 490 socklen_t addr_len = sizeof(addr_storage); 491 struct sockaddr* addr = reinterpret_cast<struct sockaddr*>(&addr_storage); 492 if (getsockname(socket_, addr, &addr_len)) 493 return MapSystemError(WSAGetLastError()); 494 if (!address->FromSockAddr(addr, addr_len)) 495 return ERR_FAILED; 496 return OK; 497 } 498 499 void TCPClientSocketWin::SetSubresourceSpeculation() { 500 use_history_.set_subresource_speculation(); 501 } 502 503 void TCPClientSocketWin::SetOmniboxSpeculation() { 504 use_history_.set_omnibox_speculation(); 505 } 506 507 bool TCPClientSocketWin::WasEverUsed() const { 508 return use_history_.was_used_to_convey_data(); 509 } 510 511 bool TCPClientSocketWin::UsingTCPFastOpen() const { 512 // Not supported on windows. 513 return false; 514 } 515 516 int TCPClientSocketWin::Read(IOBuffer* buf, 517 int buf_len, 518 CompletionCallback* callback) { 519 DCHECK(CalledOnValidThread()); 520 DCHECK_NE(socket_, INVALID_SOCKET); 521 DCHECK(!waiting_read_); 522 DCHECK(!read_callback_); 523 DCHECK(!core_->read_iobuffer_); 524 525 buf_len = core_->ThrottleReadSize(buf_len); 526 527 core_->read_buffer_.len = buf_len; 528 core_->read_buffer_.buf = buf->data(); 529 530 // TODO(wtc): Remove the assertion after enough testing. 531 AssertEventNotSignaled(core_->read_overlapped_.hEvent); 532 DWORD num, flags = 0; 533 int rv = WSARecv(socket_, &core_->read_buffer_, 1, &num, &flags, 534 &core_->read_overlapped_, NULL); 535 if (rv == 0) { 536 if (ResetEventIfSignaled(core_->read_overlapped_.hEvent)) { 537 // Because of how WSARecv fills memory when used asynchronously, Purify 538 // isn't able to detect that it's been initialized, so it scans for 0xcd 539 // in the buffer and reports UMRs (uninitialized memory reads) for those 540 // individual bytes. We override that in PURIFY builds to avoid the 541 // false error reports. 542 // See bug 5297. 543 base::MemoryDebug::MarkAsInitialized(core_->read_buffer_.buf, num); 544 base::StatsCounter read_bytes("tcp.read_bytes"); 545 read_bytes.Add(num); 546 if (num > 0) 547 use_history_.set_was_used_to_convey_data(); 548 LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_RECEIVED, num, 549 core_->read_buffer_.buf); 550 return static_cast<int>(num); 551 } 552 } else { 553 int os_error = WSAGetLastError(); 554 if (os_error != WSA_IO_PENDING) 555 return MapSystemError(os_error); 556 } 557 core_->WatchForRead(); 558 waiting_read_ = true; 559 read_callback_ = callback; 560 core_->read_iobuffer_ = buf; 561 return ERR_IO_PENDING; 562 } 563 564 int TCPClientSocketWin::Write(IOBuffer* buf, 565 int buf_len, 566 CompletionCallback* callback) { 567 DCHECK(CalledOnValidThread()); 568 DCHECK_NE(socket_, INVALID_SOCKET); 569 DCHECK(!waiting_write_); 570 DCHECK(!write_callback_); 571 DCHECK_GT(buf_len, 0); 572 DCHECK(!core_->write_iobuffer_); 573 574 base::StatsCounter writes("tcp.writes"); 575 writes.Increment(); 576 577 core_->write_buffer_.len = buf_len; 578 core_->write_buffer_.buf = buf->data(); 579 core_->write_buffer_length_ = buf_len; 580 581 // TODO(wtc): Remove the assertion after enough testing. 582 AssertEventNotSignaled(core_->write_overlapped_.hEvent); 583 DWORD num; 584 int rv = WSASend(socket_, &core_->write_buffer_, 1, &num, 0, 585 &core_->write_overlapped_, NULL); 586 if (rv == 0) { 587 if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) { 588 rv = static_cast<int>(num); 589 if (rv > buf_len || rv < 0) { 590 // It seems that some winsock interceptors report that more was written 591 // than was available. Treat this as an error. http://crbug.com/27870 592 LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len 593 << " bytes, but " << rv << " bytes reported."; 594 return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES; 595 } 596 base::StatsCounter write_bytes("tcp.write_bytes"); 597 write_bytes.Add(rv); 598 if (rv > 0) 599 use_history_.set_was_used_to_convey_data(); 600 LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_SENT, rv, 601 core_->write_buffer_.buf); 602 return rv; 603 } 604 } else { 605 int os_error = WSAGetLastError(); 606 if (os_error != WSA_IO_PENDING) 607 return MapSystemError(os_error); 608 } 609 core_->WatchForWrite(); 610 waiting_write_ = true; 611 write_callback_ = callback; 612 core_->write_iobuffer_ = buf; 613 return ERR_IO_PENDING; 614 } 615 616 bool TCPClientSocketWin::SetReceiveBufferSize(int32 size) { 617 DCHECK(CalledOnValidThread()); 618 int rv = setsockopt(socket_, SOL_SOCKET, SO_RCVBUF, 619 reinterpret_cast<const char*>(&size), sizeof(size)); 620 DCHECK(!rv) << "Could not set socket receive buffer size: " << GetLastError(); 621 return rv == 0; 622 } 623 624 bool TCPClientSocketWin::SetSendBufferSize(int32 size) { 625 DCHECK(CalledOnValidThread()); 626 int rv = setsockopt(socket_, SOL_SOCKET, SO_SNDBUF, 627 reinterpret_cast<const char*>(&size), sizeof(size)); 628 DCHECK(!rv) << "Could not set socket send buffer size: " << GetLastError(); 629 return rv == 0; 630 } 631 632 int TCPClientSocketWin::CreateSocket(const struct addrinfo* ai) { 633 socket_ = WSASocket(ai->ai_family, ai->ai_socktype, ai->ai_protocol, NULL, 0, 634 WSA_FLAG_OVERLAPPED); 635 if (socket_ == INVALID_SOCKET) { 636 int os_error = WSAGetLastError(); 637 LOG(ERROR) << "WSASocket failed: " << os_error; 638 return os_error; 639 } 640 return SetupSocket(); 641 } 642 643 int TCPClientSocketWin::SetupSocket() { 644 // Increase the socket buffer sizes from the default sizes for WinXP. In 645 // performance testing, there is substantial benefit by increasing from 8KB 646 // to 64KB. 647 // See also: 648 // http://support.microsoft.com/kb/823764/EN-US 649 // On Vista, if we manually set these sizes, Vista turns off its receive 650 // window auto-tuning feature. 651 // http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx 652 // Since Vista's auto-tune is better than any static value we can could set, 653 // only change these on pre-vista machines. 654 int32 major_version, minor_version, fix_version; 655 base::SysInfo::OperatingSystemVersionNumbers(&major_version, &minor_version, 656 &fix_version); 657 if (major_version < 6) { 658 const int32 kSocketBufferSize = 64 * 1024; 659 SetReceiveBufferSize(kSocketBufferSize); 660 SetSendBufferSize(kSocketBufferSize); 661 } 662 663 // Disable Nagle. 664 // The Nagle implementation on windows is governed by RFC 896. The idea 665 // behind Nagle is to reduce small packets on the network. When Nagle is 666 // enabled, if a partial packet has been sent, the TCP stack will disallow 667 // further *partial* packets until an ACK has been received from the other 668 // side. Good applications should always strive to send as much data as 669 // possible and avoid partial-packet sends. However, in most real world 670 // applications, there are edge cases where this does not happen, and two 671 // partil packets may be sent back to back. For a browser, it is NEVER 672 // a benefit to delay for an RTT before the second packet is sent. 673 // 674 // As a practical example in Chromium today, consider the case of a small 675 // POST. I have verified this: 676 // Client writes 649 bytes of header (partial packet #1) 677 // Client writes 50 bytes of POST data (partial packet #2) 678 // In the above example, with Nagle, a RTT delay is inserted between these 679 // two sends due to nagle. RTTs can easily be 100ms or more. The best 680 // fix is to make sure that for POSTing data, we write as much data as 681 // possible and minimize partial packets. We will fix that. But disabling 682 // Nagle also ensure we don't run into this delay in other edge cases. 683 // See also: 684 // http://technet.microsoft.com/en-us/library/bb726981.aspx 685 const BOOL kDisableNagle = TRUE; 686 int rv = setsockopt(socket_, IPPROTO_TCP, TCP_NODELAY, 687 reinterpret_cast<const char*>(&kDisableNagle), 688 sizeof(kDisableNagle)); 689 DCHECK(!rv) << "Could not disable nagle"; 690 691 // Enable TCP Keep-Alive to prevent NAT routers from timing out TCP 692 // connections. See http://crbug.com/27400 for details. 693 694 struct tcp_keepalive keepalive_vals = { 695 1, // TCP keep-alive on. 696 45000, // Wait 45s until sending first TCP keep-alive packet. 697 45000, // Wait 45s between sending TCP keep-alive packets. 698 }; 699 DWORD bytes_returned = 0xABAB; 700 rv = WSAIoctl(socket_, SIO_KEEPALIVE_VALS, &keepalive_vals, 701 sizeof(keepalive_vals), NULL, 0, 702 &bytes_returned, NULL, NULL); 703 DCHECK(!rv) << "Could not enable TCP Keep-Alive for socket: " << socket_ 704 << " [error: " << WSAGetLastError() << "]."; 705 706 // Disregard any failure in disabling nagle or enabling TCP Keep-Alive. 707 return 0; 708 } 709 710 void TCPClientSocketWin::LogConnectCompletion(int net_error) { 711 if (net_error == OK) 712 UpdateConnectionTypeHistograms(CONNECTION_ANY); 713 714 if (net_error != OK) { 715 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error); 716 return; 717 } 718 719 struct sockaddr_storage source_address; 720 socklen_t addrlen = sizeof(source_address); 721 int rv = getsockname( 722 socket_, reinterpret_cast<struct sockaddr*>(&source_address), &addrlen); 723 if (rv != 0) { 724 LOG(ERROR) << "getsockname() [rv: " << rv 725 << "] error: " << WSAGetLastError(); 726 NOTREACHED(); 727 net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv); 728 return; 729 } 730 731 const std::string source_address_str = 732 NetAddressToStringWithPort( 733 reinterpret_cast<const struct sockaddr*>(&source_address), 734 sizeof(source_address)); 735 net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT, 736 make_scoped_refptr(new NetLogStringParameter( 737 "source address", 738 source_address_str))); 739 } 740 741 void TCPClientSocketWin::DoReadCallback(int rv) { 742 DCHECK_NE(rv, ERR_IO_PENDING); 743 DCHECK(read_callback_); 744 745 // since Run may result in Read being called, clear read_callback_ up front. 746 CompletionCallback* c = read_callback_; 747 read_callback_ = NULL; 748 c->Run(rv); 749 } 750 751 void TCPClientSocketWin::DoWriteCallback(int rv) { 752 DCHECK_NE(rv, ERR_IO_PENDING); 753 DCHECK(write_callback_); 754 755 // since Run may result in Write being called, clear write_callback_ up front. 756 CompletionCallback* c = write_callback_; 757 write_callback_ = NULL; 758 c->Run(rv); 759 } 760 761 void TCPClientSocketWin::DidCompleteConnect() { 762 DCHECK_EQ(next_connect_state_, CONNECT_STATE_CONNECT_COMPLETE); 763 int result; 764 765 WSANETWORKEVENTS events; 766 int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent, 767 &events); 768 int os_error = 0; 769 if (rv == SOCKET_ERROR) { 770 NOTREACHED(); 771 os_error = WSAGetLastError(); 772 result = MapSystemError(os_error); 773 } else if (events.lNetworkEvents & FD_CONNECT) { 774 os_error = events.iErrorCode[FD_CONNECT_BIT]; 775 result = MapConnectError(os_error); 776 } else { 777 NOTREACHED(); 778 result = ERR_UNEXPECTED; 779 } 780 781 connect_os_error_ = os_error; 782 rv = DoConnectLoop(result); 783 if (rv != ERR_IO_PENDING) { 784 LogConnectCompletion(rv); 785 DoReadCallback(rv); 786 } 787 } 788 789 void TCPClientSocketWin::DidCompleteRead() { 790 DCHECK(waiting_read_); 791 DWORD num_bytes, flags; 792 BOOL ok = WSAGetOverlappedResult(socket_, &core_->read_overlapped_, 793 &num_bytes, FALSE, &flags); 794 WSAResetEvent(core_->read_overlapped_.hEvent); 795 waiting_read_ = false; 796 core_->read_iobuffer_ = NULL; 797 if (ok) { 798 base::StatsCounter read_bytes("tcp.read_bytes"); 799 read_bytes.Add(num_bytes); 800 if (num_bytes > 0) 801 use_history_.set_was_used_to_convey_data(); 802 LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_RECEIVED, num_bytes, 803 core_->read_buffer_.buf); 804 } 805 DoReadCallback(ok ? num_bytes : MapSystemError(WSAGetLastError())); 806 } 807 808 void TCPClientSocketWin::DidCompleteWrite() { 809 DCHECK(waiting_write_); 810 811 DWORD num_bytes, flags; 812 BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_, 813 &num_bytes, FALSE, &flags); 814 WSAResetEvent(core_->write_overlapped_.hEvent); 815 waiting_write_ = false; 816 int rv; 817 if (!ok) { 818 rv = MapSystemError(WSAGetLastError()); 819 } else { 820 rv = static_cast<int>(num_bytes); 821 if (rv > core_->write_buffer_length_ || rv < 0) { 822 // It seems that some winsock interceptors report that more was written 823 // than was available. Treat this as an error. http://crbug.com/27870 824 LOG(ERROR) << "Detected broken LSP: Asked to write " 825 << core_->write_buffer_length_ << " bytes, but " << rv 826 << " bytes reported."; 827 rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES; 828 } else { 829 base::StatsCounter write_bytes("tcp.write_bytes"); 830 write_bytes.Add(num_bytes); 831 if (num_bytes > 0) 832 use_history_.set_was_used_to_convey_data(); 833 LogByteTransfer(net_log_, NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes, 834 core_->write_buffer_.buf); 835 } 836 } 837 core_->write_iobuffer_ = NULL; 838 DoWriteCallback(rv); 839 } 840 841 } // namespace net 842