Home | History | Annotate | Download | only in socket
      1 // Copyright 2013 The Chromium Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style license that can be
      3 // found in the LICENSE file.
      4 
      5 #include "net/socket/tcp_socket.h"
      6 #include "net/socket/tcp_socket_win.h"
      7 
      8 #include <mstcpip.h>
      9 
     10 #include "base/callback_helpers.h"
     11 #include "base/logging.h"
     12 #include "base/metrics/stats_counters.h"
     13 #include "base/win/windows_version.h"
     14 #include "net/base/address_list.h"
     15 #include "net/base/connection_type_histograms.h"
     16 #include "net/base/io_buffer.h"
     17 #include "net/base/ip_endpoint.h"
     18 #include "net/base/net_errors.h"
     19 #include "net/base/net_util.h"
     20 #include "net/base/network_change_notifier.h"
     21 #include "net/base/winsock_init.h"
     22 #include "net/base/winsock_util.h"
     23 #include "net/socket/socket_descriptor.h"
     24 #include "net/socket/socket_net_log_params.h"
     25 
     26 namespace net {
     27 
     28 namespace {
     29 
     30 const int kTCPKeepAliveSeconds = 45;
     31 
     32 int SetSocketReceiveBufferSize(SOCKET socket, int32 size) {
     33   int rv = setsockopt(socket, SOL_SOCKET, SO_RCVBUF,
     34                       reinterpret_cast<const char*>(&size), sizeof(size));
     35   int net_error = (rv == 0) ? OK : MapSystemError(WSAGetLastError());
     36   DCHECK(!rv) << "Could not set socket receive buffer size: " << net_error;
     37   return net_error;
     38 }
     39 
     40 int SetSocketSendBufferSize(SOCKET socket, int32 size) {
     41   int rv = setsockopt(socket, SOL_SOCKET, SO_SNDBUF,
     42                       reinterpret_cast<const char*>(&size), sizeof(size));
     43   int net_error = (rv == 0) ? OK : MapSystemError(WSAGetLastError());
     44   DCHECK(!rv) << "Could not set socket send buffer size: " << net_error;
     45   return net_error;
     46 }
     47 
     48 // Disable Nagle.
     49 // The Nagle implementation on windows is governed by RFC 896.  The idea
     50 // behind Nagle is to reduce small packets on the network.  When Nagle is
     51 // enabled, if a partial packet has been sent, the TCP stack will disallow
     52 // further *partial* packets until an ACK has been received from the other
     53 // side.  Good applications should always strive to send as much data as
     54 // possible and avoid partial-packet sends.  However, in most real world
     55 // applications, there are edge cases where this does not happen, and two
     56 // partial packets may be sent back to back.  For a browser, it is NEVER
     57 // a benefit to delay for an RTT before the second packet is sent.
     58 //
     59 // As a practical example in Chromium today, consider the case of a small
     60 // POST.  I have verified this:
     61 //     Client writes 649 bytes of header  (partial packet #1)
     62 //     Client writes 50 bytes of POST data (partial packet #2)
     63 // In the above example, with Nagle, a RTT delay is inserted between these
     64 // two sends due to nagle.  RTTs can easily be 100ms or more.  The best
     65 // fix is to make sure that for POSTing data, we write as much data as
     66 // possible and minimize partial packets.  We will fix that.  But disabling
     67 // Nagle also ensure we don't run into this delay in other edge cases.
     68 // See also:
     69 //    http://technet.microsoft.com/en-us/library/bb726981.aspx
     70 bool DisableNagle(SOCKET socket, bool disable) {
     71   BOOL val = disable ? TRUE : FALSE;
     72   int rv = setsockopt(socket, IPPROTO_TCP, TCP_NODELAY,
     73                       reinterpret_cast<const char*>(&val),
     74                       sizeof(val));
     75   DCHECK(!rv) << "Could not disable nagle";
     76   return rv == 0;
     77 }
     78 
     79 // Enable TCP Keep-Alive to prevent NAT routers from timing out TCP
     80 // connections. See http://crbug.com/27400 for details.
     81 bool SetTCPKeepAlive(SOCKET socket, BOOL enable, int delay_secs) {
     82   int delay = delay_secs * 1000;
     83   struct tcp_keepalive keepalive_vals = {
     84     enable ? 1 : 0,  // TCP keep-alive on.
     85     delay,  // Delay seconds before sending first TCP keep-alive packet.
     86     delay,  // Delay seconds between sending TCP keep-alive packets.
     87   };
     88   DWORD bytes_returned = 0xABAB;
     89   int rv = WSAIoctl(socket, SIO_KEEPALIVE_VALS, &keepalive_vals,
     90                     sizeof(keepalive_vals), NULL, 0,
     91                     &bytes_returned, NULL, NULL);
     92   DCHECK(!rv) << "Could not enable TCP Keep-Alive for socket: " << socket
     93               << " [error: " << WSAGetLastError() << "].";
     94 
     95   // Disregard any failure in disabling nagle or enabling TCP Keep-Alive.
     96   return rv == 0;
     97 }
     98 
     99 int MapConnectError(int os_error) {
    100   switch (os_error) {
    101     // connect fails with WSAEACCES when Windows Firewall blocks the
    102     // connection.
    103     case WSAEACCES:
    104       return ERR_NETWORK_ACCESS_DENIED;
    105     case WSAETIMEDOUT:
    106       return ERR_CONNECTION_TIMED_OUT;
    107     default: {
    108       int net_error = MapSystemError(os_error);
    109       if (net_error == ERR_FAILED)
    110         return ERR_CONNECTION_FAILED;  // More specific than ERR_FAILED.
    111 
    112       // Give a more specific error when the user is offline.
    113       if (net_error == ERR_ADDRESS_UNREACHABLE &&
    114           NetworkChangeNotifier::IsOffline()) {
    115         return ERR_INTERNET_DISCONNECTED;
    116       }
    117 
    118       return net_error;
    119     }
    120   }
    121 }
    122 
    123 }  // namespace
    124 
    125 //-----------------------------------------------------------------------------
    126 
    127 // Nothing to do for Windows since it doesn't support TCP FastOpen.
    128 // TODO(jri): Remove these along with the corresponding global variables.
    129 bool IsTCPFastOpenSupported() { return false; }
    130 bool IsTCPFastOpenUserEnabled() { return false; }
    131 void CheckSupportAndMaybeEnableTCPFastOpen(bool user_enabled) {}
    132 
    133 // This class encapsulates all the state that has to be preserved as long as
    134 // there is a network IO operation in progress. If the owner TCPSocketWin is
    135 // destroyed while an operation is in progress, the Core is detached and it
    136 // lives until the operation completes and the OS doesn't reference any resource
    137 // declared on this class anymore.
    138 class TCPSocketWin::Core : public base::RefCounted<Core> {
    139  public:
    140   explicit Core(TCPSocketWin* socket);
    141 
    142   // Start watching for the end of a read or write operation.
    143   void WatchForRead();
    144   void WatchForWrite();
    145 
    146   // The TCPSocketWin is going away.
    147   void Detach() { socket_ = NULL; }
    148 
    149   // The separate OVERLAPPED variables for asynchronous operation.
    150   // |read_overlapped_| is used for both Connect() and Read().
    151   // |write_overlapped_| is only used for Write();
    152   OVERLAPPED read_overlapped_;
    153   OVERLAPPED write_overlapped_;
    154 
    155   // The buffers used in Read() and Write().
    156   scoped_refptr<IOBuffer> read_iobuffer_;
    157   scoped_refptr<IOBuffer> write_iobuffer_;
    158   int read_buffer_length_;
    159   int write_buffer_length_;
    160 
    161   bool non_blocking_reads_initialized_;
    162 
    163  private:
    164   friend class base::RefCounted<Core>;
    165 
    166   class ReadDelegate : public base::win::ObjectWatcher::Delegate {
    167    public:
    168     explicit ReadDelegate(Core* core) : core_(core) {}
    169     virtual ~ReadDelegate() {}
    170 
    171     // base::ObjectWatcher::Delegate methods:
    172     virtual void OnObjectSignaled(HANDLE object);
    173 
    174    private:
    175     Core* const core_;
    176   };
    177 
    178   class WriteDelegate : public base::win::ObjectWatcher::Delegate {
    179    public:
    180     explicit WriteDelegate(Core* core) : core_(core) {}
    181     virtual ~WriteDelegate() {}
    182 
    183     // base::ObjectWatcher::Delegate methods:
    184     virtual void OnObjectSignaled(HANDLE object);
    185 
    186    private:
    187     Core* const core_;
    188   };
    189 
    190   ~Core();
    191 
    192   // The socket that created this object.
    193   TCPSocketWin* socket_;
    194 
    195   // |reader_| handles the signals from |read_watcher_|.
    196   ReadDelegate reader_;
    197   // |writer_| handles the signals from |write_watcher_|.
    198   WriteDelegate writer_;
    199 
    200   // |read_watcher_| watches for events from Connect() and Read().
    201   base::win::ObjectWatcher read_watcher_;
    202   // |write_watcher_| watches for events from Write();
    203   base::win::ObjectWatcher write_watcher_;
    204 
    205   DISALLOW_COPY_AND_ASSIGN(Core);
    206 };
    207 
    208 TCPSocketWin::Core::Core(TCPSocketWin* socket)
    209     : read_buffer_length_(0),
    210       write_buffer_length_(0),
    211       non_blocking_reads_initialized_(false),
    212       socket_(socket),
    213       reader_(this),
    214       writer_(this) {
    215   memset(&read_overlapped_, 0, sizeof(read_overlapped_));
    216   memset(&write_overlapped_, 0, sizeof(write_overlapped_));
    217 
    218   read_overlapped_.hEvent = WSACreateEvent();
    219   write_overlapped_.hEvent = WSACreateEvent();
    220 }
    221 
    222 TCPSocketWin::Core::~Core() {
    223   // Make sure the message loop is not watching this object anymore.
    224   read_watcher_.StopWatching();
    225   write_watcher_.StopWatching();
    226 
    227   WSACloseEvent(read_overlapped_.hEvent);
    228   memset(&read_overlapped_, 0xaf, sizeof(read_overlapped_));
    229   WSACloseEvent(write_overlapped_.hEvent);
    230   memset(&write_overlapped_, 0xaf, sizeof(write_overlapped_));
    231 }
    232 
    233 void TCPSocketWin::Core::WatchForRead() {
    234   // We grab an extra reference because there is an IO operation in progress.
    235   // Balanced in ReadDelegate::OnObjectSignaled().
    236   AddRef();
    237   read_watcher_.StartWatching(read_overlapped_.hEvent, &reader_);
    238 }
    239 
    240 void TCPSocketWin::Core::WatchForWrite() {
    241   // We grab an extra reference because there is an IO operation in progress.
    242   // Balanced in WriteDelegate::OnObjectSignaled().
    243   AddRef();
    244   write_watcher_.StartWatching(write_overlapped_.hEvent, &writer_);
    245 }
    246 
    247 void TCPSocketWin::Core::ReadDelegate::OnObjectSignaled(HANDLE object) {
    248   DCHECK_EQ(object, core_->read_overlapped_.hEvent);
    249   if (core_->socket_) {
    250     if (core_->socket_->waiting_connect_)
    251       core_->socket_->DidCompleteConnect();
    252     else
    253       core_->socket_->DidSignalRead();
    254   }
    255 
    256   core_->Release();
    257 }
    258 
    259 void TCPSocketWin::Core::WriteDelegate::OnObjectSignaled(
    260     HANDLE object) {
    261   DCHECK_EQ(object, core_->write_overlapped_.hEvent);
    262   if (core_->socket_)
    263     core_->socket_->DidCompleteWrite();
    264 
    265   core_->Release();
    266 }
    267 
    268 //-----------------------------------------------------------------------------
    269 
    270 TCPSocketWin::TCPSocketWin(net::NetLog* net_log,
    271                            const net::NetLog::Source& source)
    272     : socket_(INVALID_SOCKET),
    273       accept_event_(WSA_INVALID_EVENT),
    274       accept_socket_(NULL),
    275       accept_address_(NULL),
    276       waiting_connect_(false),
    277       waiting_read_(false),
    278       waiting_write_(false),
    279       connect_os_error_(0),
    280       logging_multiple_connect_attempts_(false),
    281       net_log_(BoundNetLog::Make(net_log, NetLog::SOURCE_SOCKET)) {
    282   net_log_.BeginEvent(NetLog::TYPE_SOCKET_ALIVE,
    283                       source.ToEventParametersCallback());
    284   EnsureWinsockInit();
    285 }
    286 
    287 TCPSocketWin::~TCPSocketWin() {
    288   Close();
    289   net_log_.EndEvent(NetLog::TYPE_SOCKET_ALIVE);
    290 }
    291 
    292 int TCPSocketWin::Open(AddressFamily family) {
    293   DCHECK(CalledOnValidThread());
    294   DCHECK_EQ(socket_, INVALID_SOCKET);
    295 
    296   socket_ = CreatePlatformSocket(ConvertAddressFamily(family), SOCK_STREAM,
    297                                  IPPROTO_TCP);
    298   if (socket_ == INVALID_SOCKET) {
    299     PLOG(ERROR) << "CreatePlatformSocket() returned an error";
    300     return MapSystemError(WSAGetLastError());
    301   }
    302 
    303   if (SetNonBlocking(socket_)) {
    304     int result = MapSystemError(WSAGetLastError());
    305     Close();
    306     return result;
    307   }
    308 
    309   return OK;
    310 }
    311 
    312 int TCPSocketWin::AdoptConnectedSocket(SOCKET socket,
    313                                        const IPEndPoint& peer_address) {
    314   DCHECK(CalledOnValidThread());
    315   DCHECK_EQ(socket_, INVALID_SOCKET);
    316   DCHECK(!core_);
    317 
    318   socket_ = socket;
    319 
    320   if (SetNonBlocking(socket_)) {
    321     int result = MapSystemError(WSAGetLastError());
    322     Close();
    323     return result;
    324   }
    325 
    326   core_ = new Core(this);
    327   peer_address_.reset(new IPEndPoint(peer_address));
    328 
    329   return OK;
    330 }
    331 
    332 int TCPSocketWin::AdoptListenSocket(SOCKET socket) {
    333   DCHECK(CalledOnValidThread());
    334   DCHECK_EQ(socket_, INVALID_SOCKET);
    335 
    336   socket_ = socket;
    337 
    338   if (SetNonBlocking(socket_)) {
    339     int result = MapSystemError(WSAGetLastError());
    340     Close();
    341     return result;
    342   }
    343 
    344   // |core_| is not needed for sockets that are used to accept connections.
    345   // The operation here is more like Open but with an existing socket.
    346 
    347   return OK;
    348 }
    349 
    350 int TCPSocketWin::Bind(const IPEndPoint& address) {
    351   DCHECK(CalledOnValidThread());
    352   DCHECK_NE(socket_, INVALID_SOCKET);
    353 
    354   SockaddrStorage storage;
    355   if (!address.ToSockAddr(storage.addr, &storage.addr_len))
    356     return ERR_ADDRESS_INVALID;
    357 
    358   int result = bind(socket_, storage.addr, storage.addr_len);
    359   if (result < 0) {
    360     PLOG(ERROR) << "bind() returned an error";
    361     return MapSystemError(WSAGetLastError());
    362   }
    363 
    364   return OK;
    365 }
    366 
    367 int TCPSocketWin::Listen(int backlog) {
    368   DCHECK(CalledOnValidThread());
    369   DCHECK_GT(backlog, 0);
    370   DCHECK_NE(socket_, INVALID_SOCKET);
    371   DCHECK_EQ(accept_event_, WSA_INVALID_EVENT);
    372 
    373   accept_event_ = WSACreateEvent();
    374   if (accept_event_ == WSA_INVALID_EVENT) {
    375     PLOG(ERROR) << "WSACreateEvent()";
    376     return MapSystemError(WSAGetLastError());
    377   }
    378 
    379   int result = listen(socket_, backlog);
    380   if (result < 0) {
    381     PLOG(ERROR) << "listen() returned an error";
    382     return MapSystemError(WSAGetLastError());
    383   }
    384 
    385   return OK;
    386 }
    387 
    388 int TCPSocketWin::Accept(scoped_ptr<TCPSocketWin>* socket,
    389                          IPEndPoint* address,
    390                          const CompletionCallback& callback) {
    391   DCHECK(CalledOnValidThread());
    392   DCHECK(socket);
    393   DCHECK(address);
    394   DCHECK(!callback.is_null());
    395   DCHECK(accept_callback_.is_null());
    396 
    397   net_log_.BeginEvent(NetLog::TYPE_TCP_ACCEPT);
    398 
    399   int result = AcceptInternal(socket, address);
    400 
    401   if (result == ERR_IO_PENDING) {
    402     // Start watching.
    403     WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
    404     accept_watcher_.StartWatching(accept_event_, this);
    405 
    406     accept_socket_ = socket;
    407     accept_address_ = address;
    408     accept_callback_ = callback;
    409   }
    410 
    411   return result;
    412 }
    413 
    414 int TCPSocketWin::Connect(const IPEndPoint& address,
    415                           const CompletionCallback& callback) {
    416   DCHECK(CalledOnValidThread());
    417   DCHECK_NE(socket_, INVALID_SOCKET);
    418   DCHECK(!waiting_connect_);
    419 
    420   // |peer_address_| and |core_| will be non-NULL if Connect() has been called.
    421   // Unless Close() is called to reset the internal state, a second call to
    422   // Connect() is not allowed.
    423   // Please note that we enforce this even if the previous Connect() has
    424   // completed and failed. Although it is allowed to connect the same |socket_|
    425   // again after a connection attempt failed on Windows, it results in
    426   // unspecified behavior according to POSIX. Therefore, we make it behave in
    427   // the same way as TCPSocketLibevent.
    428   DCHECK(!peer_address_ && !core_);
    429 
    430   if (!logging_multiple_connect_attempts_)
    431     LogConnectBegin(AddressList(address));
    432 
    433   peer_address_.reset(new IPEndPoint(address));
    434 
    435   int rv = DoConnect();
    436   if (rv == ERR_IO_PENDING) {
    437     // Synchronous operation not supported.
    438     DCHECK(!callback.is_null());
    439     read_callback_ = callback;
    440     waiting_connect_ = true;
    441   } else {
    442     DoConnectComplete(rv);
    443   }
    444 
    445   return rv;
    446 }
    447 
    448 bool TCPSocketWin::IsConnected() const {
    449   DCHECK(CalledOnValidThread());
    450 
    451   if (socket_ == INVALID_SOCKET || waiting_connect_)
    452     return false;
    453 
    454   if (waiting_read_)
    455     return true;
    456 
    457   // Check if connection is alive.
    458   char c;
    459   int rv = recv(socket_, &c, 1, MSG_PEEK);
    460   if (rv == 0)
    461     return false;
    462   if (rv == SOCKET_ERROR && WSAGetLastError() != WSAEWOULDBLOCK)
    463     return false;
    464 
    465   return true;
    466 }
    467 
    468 bool TCPSocketWin::IsConnectedAndIdle() const {
    469   DCHECK(CalledOnValidThread());
    470 
    471   if (socket_ == INVALID_SOCKET || waiting_connect_)
    472     return false;
    473 
    474   if (waiting_read_)
    475     return true;
    476 
    477   // Check if connection is alive and we haven't received any data
    478   // unexpectedly.
    479   char c;
    480   int rv = recv(socket_, &c, 1, MSG_PEEK);
    481   if (rv >= 0)
    482     return false;
    483   if (WSAGetLastError() != WSAEWOULDBLOCK)
    484     return false;
    485 
    486   return true;
    487 }
    488 
    489 int TCPSocketWin::Read(IOBuffer* buf,
    490                        int buf_len,
    491                        const CompletionCallback& callback) {
    492   DCHECK(CalledOnValidThread());
    493   DCHECK_NE(socket_, INVALID_SOCKET);
    494   DCHECK(!waiting_read_);
    495   DCHECK(read_callback_.is_null());
    496   DCHECK(!core_->read_iobuffer_);
    497 
    498   return DoRead(buf, buf_len, callback);
    499 }
    500 
    501 int TCPSocketWin::Write(IOBuffer* buf,
    502                         int buf_len,
    503                         const CompletionCallback& callback) {
    504   DCHECK(CalledOnValidThread());
    505   DCHECK_NE(socket_, INVALID_SOCKET);
    506   DCHECK(!waiting_write_);
    507   DCHECK(write_callback_.is_null());
    508   DCHECK_GT(buf_len, 0);
    509   DCHECK(!core_->write_iobuffer_);
    510 
    511   base::StatsCounter writes("tcp.writes");
    512   writes.Increment();
    513 
    514   WSABUF write_buffer;
    515   write_buffer.len = buf_len;
    516   write_buffer.buf = buf->data();
    517 
    518   // TODO(wtc): Remove the assertion after enough testing.
    519   AssertEventNotSignaled(core_->write_overlapped_.hEvent);
    520   DWORD num;
    521   int rv = WSASend(socket_, &write_buffer, 1, &num, 0,
    522                    &core_->write_overlapped_, NULL);
    523   if (rv == 0) {
    524     if (ResetEventIfSignaled(core_->write_overlapped_.hEvent)) {
    525       rv = static_cast<int>(num);
    526       if (rv > buf_len || rv < 0) {
    527         // It seems that some winsock interceptors report that more was written
    528         // than was available. Treat this as an error.  http://crbug.com/27870
    529         LOG(ERROR) << "Detected broken LSP: Asked to write " << buf_len
    530                    << " bytes, but " << rv << " bytes reported.";
    531         return ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
    532       }
    533       base::StatsCounter write_bytes("tcp.write_bytes");
    534       write_bytes.Add(rv);
    535       net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, rv,
    536                                     buf->data());
    537       return rv;
    538     }
    539   } else {
    540     int os_error = WSAGetLastError();
    541     if (os_error != WSA_IO_PENDING) {
    542       int net_error = MapSystemError(os_error);
    543       net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
    544                         CreateNetLogSocketErrorCallback(net_error, os_error));
    545       return net_error;
    546     }
    547   }
    548   waiting_write_ = true;
    549   write_callback_ = callback;
    550   core_->write_iobuffer_ = buf;
    551   core_->write_buffer_length_ = buf_len;
    552   core_->WatchForWrite();
    553   return ERR_IO_PENDING;
    554 }
    555 
    556 int TCPSocketWin::GetLocalAddress(IPEndPoint* address) const {
    557   DCHECK(CalledOnValidThread());
    558   DCHECK(address);
    559 
    560   SockaddrStorage storage;
    561   if (getsockname(socket_, storage.addr, &storage.addr_len))
    562     return MapSystemError(WSAGetLastError());
    563   if (!address->FromSockAddr(storage.addr, storage.addr_len))
    564     return ERR_ADDRESS_INVALID;
    565 
    566   return OK;
    567 }
    568 
    569 int TCPSocketWin::GetPeerAddress(IPEndPoint* address) const {
    570   DCHECK(CalledOnValidThread());
    571   DCHECK(address);
    572   if (!IsConnected())
    573     return ERR_SOCKET_NOT_CONNECTED;
    574   *address = *peer_address_;
    575   return OK;
    576 }
    577 
    578 int TCPSocketWin::SetDefaultOptionsForServer() {
    579   return SetExclusiveAddrUse();
    580 }
    581 
    582 void TCPSocketWin::SetDefaultOptionsForClient() {
    583   // Increase the socket buffer sizes from the default sizes for WinXP.  In
    584   // performance testing, there is substantial benefit by increasing from 8KB
    585   // to 64KB.
    586   // See also:
    587   //    http://support.microsoft.com/kb/823764/EN-US
    588   // On Vista, if we manually set these sizes, Vista turns off its receive
    589   // window auto-tuning feature.
    590   //    http://blogs.msdn.com/wndp/archive/2006/05/05/Winhec-blog-tcpip-2.aspx
    591   // Since Vista's auto-tune is better than any static value we can could set,
    592   // only change these on pre-vista machines.
    593   if (base::win::GetVersion() < base::win::VERSION_VISTA) {
    594     const int32 kSocketBufferSize = 64 * 1024;
    595     SetSocketReceiveBufferSize(socket_, kSocketBufferSize);
    596     SetSocketSendBufferSize(socket_, kSocketBufferSize);
    597   }
    598 
    599   DisableNagle(socket_, true);
    600   SetTCPKeepAlive(socket_, true, kTCPKeepAliveSeconds);
    601 }
    602 
    603 int TCPSocketWin::SetExclusiveAddrUse() {
    604   // On Windows, a bound end point can be hijacked by another process by
    605   // setting SO_REUSEADDR. Therefore a Windows-only option SO_EXCLUSIVEADDRUSE
    606   // was introduced in Windows NT 4.0 SP4. If the socket that is bound to the
    607   // end point has SO_EXCLUSIVEADDRUSE enabled, it is not possible for another
    608   // socket to forcibly bind to the end point until the end point is unbound.
    609   // It is recommend that all server applications must use SO_EXCLUSIVEADDRUSE.
    610   // MSDN: http://goo.gl/M6fjQ.
    611   //
    612   // Unlike on *nix, on Windows a TCP server socket can always bind to an end
    613   // point in TIME_WAIT state without setting SO_REUSEADDR, therefore it is not
    614   // needed here.
    615   //
    616   // SO_EXCLUSIVEADDRUSE will prevent a TCP client socket from binding to an end
    617   // point in TIME_WAIT status. It does not have this effect for a TCP server
    618   // socket.
    619 
    620   BOOL true_value = 1;
    621   int rv = setsockopt(socket_, SOL_SOCKET, SO_EXCLUSIVEADDRUSE,
    622                       reinterpret_cast<const char*>(&true_value),
    623                       sizeof(true_value));
    624   if (rv < 0)
    625     return MapSystemError(errno);
    626   return OK;
    627 }
    628 
    629 int TCPSocketWin::SetReceiveBufferSize(int32 size) {
    630   DCHECK(CalledOnValidThread());
    631   return SetSocketReceiveBufferSize(socket_, size);
    632 }
    633 
    634 int TCPSocketWin::SetSendBufferSize(int32 size) {
    635   DCHECK(CalledOnValidThread());
    636   return SetSocketSendBufferSize(socket_, size);
    637 }
    638 
    639 bool TCPSocketWin::SetKeepAlive(bool enable, int delay) {
    640   return SetTCPKeepAlive(socket_, enable, delay);
    641 }
    642 
    643 bool TCPSocketWin::SetNoDelay(bool no_delay) {
    644   return DisableNagle(socket_, no_delay);
    645 }
    646 
    647 void TCPSocketWin::Close() {
    648   DCHECK(CalledOnValidThread());
    649 
    650   if (socket_ != INVALID_SOCKET) {
    651     // Only log the close event if there's actually a socket to close.
    652     net_log_.AddEvent(NetLog::EventType::TYPE_SOCKET_CLOSED);
    653 
    654     // Note: don't use CancelIo to cancel pending IO because it doesn't work
    655     // when there is a Winsock layered service provider.
    656 
    657     // In most socket implementations, closing a socket results in a graceful
    658     // connection shutdown, but in Winsock we have to call shutdown explicitly.
    659     // See the MSDN page "Graceful Shutdown, Linger Options, and Socket Closure"
    660     // at http://msdn.microsoft.com/en-us/library/ms738547.aspx
    661     shutdown(socket_, SD_SEND);
    662 
    663     // This cancels any pending IO.
    664     if (closesocket(socket_) < 0)
    665       PLOG(ERROR) << "closesocket";
    666     socket_ = INVALID_SOCKET;
    667   }
    668 
    669   if (!accept_callback_.is_null()) {
    670     accept_watcher_.StopWatching();
    671     accept_socket_ = NULL;
    672     accept_address_ = NULL;
    673     accept_callback_.Reset();
    674   }
    675 
    676   if (accept_event_) {
    677     WSACloseEvent(accept_event_);
    678     accept_event_ = WSA_INVALID_EVENT;
    679   }
    680 
    681   if (core_) {
    682     if (waiting_connect_) {
    683       // We closed the socket, so this notification will never come.
    684       // From MSDN' WSAEventSelect documentation:
    685       // "Closing a socket with closesocket also cancels the association and
    686       // selection of network events specified in WSAEventSelect for the
    687       // socket".
    688       core_->Release();
    689     }
    690     core_->Detach();
    691     core_ = NULL;
    692   }
    693 
    694   waiting_connect_ = false;
    695   waiting_read_ = false;
    696   waiting_write_ = false;
    697 
    698   read_callback_.Reset();
    699   write_callback_.Reset();
    700   peer_address_.reset();
    701   connect_os_error_ = 0;
    702 }
    703 
    704 void TCPSocketWin::StartLoggingMultipleConnectAttempts(
    705     const AddressList& addresses) {
    706   if (!logging_multiple_connect_attempts_) {
    707     logging_multiple_connect_attempts_ = true;
    708     LogConnectBegin(addresses);
    709   } else {
    710     NOTREACHED();
    711   }
    712 }
    713 
    714 void TCPSocketWin::EndLoggingMultipleConnectAttempts(int net_error) {
    715   if (logging_multiple_connect_attempts_) {
    716     LogConnectEnd(net_error);
    717     logging_multiple_connect_attempts_ = false;
    718   } else {
    719     NOTREACHED();
    720   }
    721 }
    722 
    723 int TCPSocketWin::AcceptInternal(scoped_ptr<TCPSocketWin>* socket,
    724                                  IPEndPoint* address) {
    725   SockaddrStorage storage;
    726   int new_socket = accept(socket_, storage.addr, &storage.addr_len);
    727   if (new_socket < 0) {
    728     int net_error = MapSystemError(WSAGetLastError());
    729     if (net_error != ERR_IO_PENDING)
    730       net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
    731     return net_error;
    732   }
    733 
    734   IPEndPoint ip_end_point;
    735   if (!ip_end_point.FromSockAddr(storage.addr, storage.addr_len)) {
    736     NOTREACHED();
    737     if (closesocket(new_socket) < 0)
    738       PLOG(ERROR) << "closesocket";
    739     int net_error = ERR_ADDRESS_INVALID;
    740     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, net_error);
    741     return net_error;
    742   }
    743   scoped_ptr<TCPSocketWin> tcp_socket(new TCPSocketWin(
    744       net_log_.net_log(), net_log_.source()));
    745   int adopt_result = tcp_socket->AdoptConnectedSocket(new_socket, ip_end_point);
    746   if (adopt_result != OK) {
    747     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_ACCEPT, adopt_result);
    748     return adopt_result;
    749   }
    750   *socket = tcp_socket.Pass();
    751   *address = ip_end_point;
    752   net_log_.EndEvent(NetLog::TYPE_TCP_ACCEPT,
    753                     CreateNetLogIPEndPointCallback(&ip_end_point));
    754   return OK;
    755 }
    756 
    757 void TCPSocketWin::OnObjectSignaled(HANDLE object) {
    758   WSANETWORKEVENTS ev;
    759   if (WSAEnumNetworkEvents(socket_, accept_event_, &ev) == SOCKET_ERROR) {
    760     PLOG(ERROR) << "WSAEnumNetworkEvents()";
    761     return;
    762   }
    763 
    764   if (ev.lNetworkEvents & FD_ACCEPT) {
    765     int result = AcceptInternal(accept_socket_, accept_address_);
    766     if (result != ERR_IO_PENDING) {
    767       accept_socket_ = NULL;
    768       accept_address_ = NULL;
    769       base::ResetAndReturn(&accept_callback_).Run(result);
    770     }
    771   } else {
    772     // This happens when a client opens a connection and closes it before we
    773     // have a chance to accept it.
    774     DCHECK(ev.lNetworkEvents == 0);
    775 
    776     // Start watching the next FD_ACCEPT event.
    777     WSAEventSelect(socket_, accept_event_, FD_ACCEPT);
    778     accept_watcher_.StartWatching(accept_event_, this);
    779   }
    780 }
    781 
    782 int TCPSocketWin::DoConnect() {
    783   DCHECK_EQ(connect_os_error_, 0);
    784   DCHECK(!core_);
    785 
    786   net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
    787                       CreateNetLogIPEndPointCallback(peer_address_.get()));
    788 
    789   core_ = new Core(this);
    790   // WSAEventSelect sets the socket to non-blocking mode as a side effect.
    791   // Our connect() and recv() calls require that the socket be non-blocking.
    792   WSAEventSelect(socket_, core_->read_overlapped_.hEvent, FD_CONNECT);
    793 
    794   SockaddrStorage storage;
    795   if (!peer_address_->ToSockAddr(storage.addr, &storage.addr_len))
    796     return ERR_ADDRESS_INVALID;
    797   if (!connect(socket_, storage.addr, storage.addr_len)) {
    798     // Connected without waiting!
    799     //
    800     // The MSDN page for connect says:
    801     //   With a nonblocking socket, the connection attempt cannot be completed
    802     //   immediately. In this case, connect will return SOCKET_ERROR, and
    803     //   WSAGetLastError will return WSAEWOULDBLOCK.
    804     // which implies that for a nonblocking socket, connect never returns 0.
    805     // It's not documented whether the event object will be signaled or not
    806     // if connect does return 0.  So the code below is essentially dead code
    807     // and we don't know if it's correct.
    808     NOTREACHED();
    809 
    810     if (ResetEventIfSignaled(core_->read_overlapped_.hEvent))
    811       return OK;
    812   } else {
    813     int os_error = WSAGetLastError();
    814     if (os_error != WSAEWOULDBLOCK) {
    815       LOG(ERROR) << "connect failed: " << os_error;
    816       connect_os_error_ = os_error;
    817       int rv = MapConnectError(os_error);
    818       CHECK_NE(ERR_IO_PENDING, rv);
    819       return rv;
    820     }
    821   }
    822 
    823   core_->WatchForRead();
    824   return ERR_IO_PENDING;
    825 }
    826 
    827 void TCPSocketWin::DoConnectComplete(int result) {
    828   // Log the end of this attempt (and any OS error it threw).
    829   int os_error = connect_os_error_;
    830   connect_os_error_ = 0;
    831   if (result != OK) {
    832     net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT,
    833                       NetLog::IntegerCallback("os_error", os_error));
    834   } else {
    835     net_log_.EndEvent(NetLog::TYPE_TCP_CONNECT_ATTEMPT);
    836   }
    837 
    838   if (!logging_multiple_connect_attempts_)
    839     LogConnectEnd(result);
    840 }
    841 
    842 void TCPSocketWin::LogConnectBegin(const AddressList& addresses) {
    843   base::StatsCounter connects("tcp.connect");
    844   connects.Increment();
    845 
    846   net_log_.BeginEvent(NetLog::TYPE_TCP_CONNECT,
    847                       addresses.CreateNetLogCallback());
    848 }
    849 
    850 void TCPSocketWin::LogConnectEnd(int net_error) {
    851   if (net_error == OK)
    852     UpdateConnectionTypeHistograms(CONNECTION_ANY);
    853 
    854   if (net_error != OK) {
    855     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, net_error);
    856     return;
    857   }
    858 
    859   struct sockaddr_storage source_address;
    860   socklen_t addrlen = sizeof(source_address);
    861   int rv = getsockname(
    862       socket_, reinterpret_cast<struct sockaddr*>(&source_address), &addrlen);
    863   if (rv != 0) {
    864     LOG(ERROR) << "getsockname() [rv: " << rv
    865                << "] error: " << WSAGetLastError();
    866     NOTREACHED();
    867     net_log_.EndEventWithNetErrorCode(NetLog::TYPE_TCP_CONNECT, rv);
    868     return;
    869   }
    870 
    871   net_log_.EndEvent(
    872       NetLog::TYPE_TCP_CONNECT,
    873       CreateNetLogSourceAddressCallback(
    874           reinterpret_cast<const struct sockaddr*>(&source_address),
    875           sizeof(source_address)));
    876 }
    877 
    878 int TCPSocketWin::DoRead(IOBuffer* buf, int buf_len,
    879                          const CompletionCallback& callback) {
    880   if (!core_->non_blocking_reads_initialized_) {
    881     WSAEventSelect(socket_, core_->read_overlapped_.hEvent,
    882                    FD_READ | FD_CLOSE);
    883     core_->non_blocking_reads_initialized_ = true;
    884   }
    885   int rv = recv(socket_, buf->data(), buf_len, 0);
    886   if (rv == SOCKET_ERROR) {
    887     int os_error = WSAGetLastError();
    888     if (os_error != WSAEWOULDBLOCK) {
    889       int net_error = MapSystemError(os_error);
    890       net_log_.AddEvent(
    891           NetLog::TYPE_SOCKET_READ_ERROR,
    892           CreateNetLogSocketErrorCallback(net_error, os_error));
    893       return net_error;
    894     }
    895   } else {
    896     base::StatsCounter read_bytes("tcp.read_bytes");
    897     if (rv > 0)
    898       read_bytes.Add(rv);
    899     net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_RECEIVED, rv,
    900                                   buf->data());
    901     return rv;
    902   }
    903 
    904   waiting_read_ = true;
    905   read_callback_ = callback;
    906   core_->read_iobuffer_ = buf;
    907   core_->read_buffer_length_ = buf_len;
    908   core_->WatchForRead();
    909   return ERR_IO_PENDING;
    910 }
    911 
    912 void TCPSocketWin::DidCompleteConnect() {
    913   DCHECK(waiting_connect_);
    914   DCHECK(!read_callback_.is_null());
    915   int result;
    916 
    917   WSANETWORKEVENTS events;
    918   int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
    919                                 &events);
    920   int os_error = 0;
    921   if (rv == SOCKET_ERROR) {
    922     NOTREACHED();
    923     os_error = WSAGetLastError();
    924     result = MapSystemError(os_error);
    925   } else if (events.lNetworkEvents & FD_CONNECT) {
    926     os_error = events.iErrorCode[FD_CONNECT_BIT];
    927     result = MapConnectError(os_error);
    928   } else {
    929     NOTREACHED();
    930     result = ERR_UNEXPECTED;
    931   }
    932 
    933   connect_os_error_ = os_error;
    934   DoConnectComplete(result);
    935   waiting_connect_ = false;
    936 
    937   DCHECK_NE(result, ERR_IO_PENDING);
    938   base::ResetAndReturn(&read_callback_).Run(result);
    939 }
    940 
    941 void TCPSocketWin::DidCompleteWrite() {
    942   DCHECK(waiting_write_);
    943   DCHECK(!write_callback_.is_null());
    944 
    945   DWORD num_bytes, flags;
    946   BOOL ok = WSAGetOverlappedResult(socket_, &core_->write_overlapped_,
    947                                    &num_bytes, FALSE, &flags);
    948   WSAResetEvent(core_->write_overlapped_.hEvent);
    949   waiting_write_ = false;
    950   int rv;
    951   if (!ok) {
    952     int os_error = WSAGetLastError();
    953     rv = MapSystemError(os_error);
    954     net_log_.AddEvent(NetLog::TYPE_SOCKET_WRITE_ERROR,
    955                       CreateNetLogSocketErrorCallback(rv, os_error));
    956   } else {
    957     rv = static_cast<int>(num_bytes);
    958     if (rv > core_->write_buffer_length_ || rv < 0) {
    959       // It seems that some winsock interceptors report that more was written
    960       // than was available. Treat this as an error.  http://crbug.com/27870
    961       LOG(ERROR) << "Detected broken LSP: Asked to write "
    962                  << core_->write_buffer_length_ << " bytes, but " << rv
    963                  << " bytes reported.";
    964       rv = ERR_WINSOCK_UNEXPECTED_WRITTEN_BYTES;
    965     } else {
    966       base::StatsCounter write_bytes("tcp.write_bytes");
    967       write_bytes.Add(num_bytes);
    968       net_log_.AddByteTransferEvent(NetLog::TYPE_SOCKET_BYTES_SENT, num_bytes,
    969                                     core_->write_iobuffer_->data());
    970     }
    971   }
    972 
    973   core_->write_iobuffer_ = NULL;
    974 
    975   DCHECK_NE(rv, ERR_IO_PENDING);
    976   base::ResetAndReturn(&write_callback_).Run(rv);
    977 }
    978 
    979 void TCPSocketWin::DidSignalRead() {
    980   DCHECK(waiting_read_);
    981   DCHECK(!read_callback_.is_null());
    982 
    983   int os_error = 0;
    984   WSANETWORKEVENTS network_events;
    985   int rv = WSAEnumNetworkEvents(socket_, core_->read_overlapped_.hEvent,
    986                                 &network_events);
    987   if (rv == SOCKET_ERROR) {
    988     os_error = WSAGetLastError();
    989     rv = MapSystemError(os_error);
    990   } else if (network_events.lNetworkEvents) {
    991     DCHECK_EQ(network_events.lNetworkEvents & ~(FD_READ | FD_CLOSE), 0);
    992     // If network_events.lNetworkEvents is FD_CLOSE and
    993     // network_events.iErrorCode[FD_CLOSE_BIT] is 0, it is a graceful
    994     // connection closure. It is tempting to directly set rv to 0 in
    995     // this case, but the MSDN pages for WSAEventSelect and
    996     // WSAAsyncSelect recommend we still call DoRead():
    997     //   FD_CLOSE should only be posted after all data is read from a
    998     //   socket, but an application should check for remaining data upon
    999     //   receipt of FD_CLOSE to avoid any possibility of losing data.
   1000     //
   1001     // If network_events.iErrorCode[FD_READ_BIT] or
   1002     // network_events.iErrorCode[FD_CLOSE_BIT] is nonzero, still call
   1003     // DoRead() because recv() reports a more accurate error code
   1004     // (WSAECONNRESET vs. WSAECONNABORTED) when the connection was
   1005     // reset.
   1006     rv = DoRead(core_->read_iobuffer_, core_->read_buffer_length_,
   1007                 read_callback_);
   1008     if (rv == ERR_IO_PENDING)
   1009       return;
   1010   } else {
   1011     // This may happen because Read() may succeed synchronously and
   1012     // consume all the received data without resetting the event object.
   1013     core_->WatchForRead();
   1014     return;
   1015   }
   1016 
   1017   waiting_read_ = false;
   1018   core_->read_iobuffer_ = NULL;
   1019   core_->read_buffer_length_ = 0;
   1020 
   1021   DCHECK_NE(rv, ERR_IO_PENDING);
   1022   base::ResetAndReturn(&read_callback_).Run(rv);
   1023 }
   1024 
   1025 }  // namespace net
   1026