Home | History | Annotate | Download | only in base
      1 /*
      2  * libjingle
      3  * Copyright 2004--2005, Google Inc.
      4  *
      5  * Redistribution and use in source and binary forms, with or without
      6  * modification, are permitted provided that the following conditions are met:
      7  *
      8  *  1. Redistributions of source code must retain the above copyright notice,
      9  *     this list of conditions and the following disclaimer.
     10  *  2. Redistributions in binary form must reproduce the above copyright notice,
     11  *     this list of conditions and the following disclaimer in the documentation
     12  *     and/or other materials provided with the distribution.
     13  *  3. The name of the author may not be used to endorse or promote products
     14  *     derived from this software without specific prior written permission.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
     17  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
     18  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
     19  * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     20  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     21  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
     22  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
     23  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
     24  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
     25  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     26  */
     27 
     28 #include "talk/p2p/base/pseudotcp.h"
     29 
     30 #include <cstdio>
     31 #include <cstdlib>
     32 
     33 #include "talk/base/basictypes.h"
     34 #include "talk/base/byteorder.h"
     35 #include "talk/base/common.h"
     36 #include "talk/base/logging.h"
     37 #include "talk/base/socket.h"
     38 #include "talk/base/stringutils.h"
     39 #include "talk/base/time.h"
     40 
     41 // The following logging is for detailed (packet-level) analysis only.
     42 #define _DBG_NONE     0
     43 #define _DBG_NORMAL   1
     44 #define _DBG_VERBOSE  2
     45 #define _DEBUGMSG _DBG_NONE
     46 
     47 namespace cricket {
     48 
     49 //////////////////////////////////////////////////////////////////////
     50 // Network Constants
     51 //////////////////////////////////////////////////////////////////////
     52 
     53 // Standard MTUs
     54 const uint16 PACKET_MAXIMUMS[] = {
     55   65535,    // Theoretical maximum, Hyperchannel
     56   32000,    // Nothing
     57   17914,    // 16Mb IBM Token Ring
     58   8166,   // IEEE 802.4
     59   //4464,   // IEEE 802.5 (4Mb max)
     60   4352,   // FDDI
     61   //2048,   // Wideband Network
     62   2002,   // IEEE 802.5 (4Mb recommended)
     63   //1536,   // Expermental Ethernet Networks
     64   //1500,   // Ethernet, Point-to-Point (default)
     65   1492,   // IEEE 802.3
     66   1006,   // SLIP, ARPANET
     67   //576,    // X.25 Networks
     68   //544,    // DEC IP Portal
     69   //512,    // NETBIOS
     70   508,    // IEEE 802/Source-Rt Bridge, ARCNET
     71   296,    // Point-to-Point (low delay)
     72   //68,     // Official minimum
     73   0,      // End of list marker
     74 };
     75 
     76 const uint32 MAX_PACKET = 65535;
     77 // Note: we removed lowest level because packet overhead was larger!
     78 const uint32 MIN_PACKET = 296;
     79 
     80 const uint32 IP_HEADER_SIZE = 20; // (+ up to 40 bytes of options?)
     81 const uint32 ICMP_HEADER_SIZE = 8;
     82 const uint32 UDP_HEADER_SIZE = 8;
     83 // TODO: Make JINGLE_HEADER_SIZE transparent to this code?
     84 const uint32 JINGLE_HEADER_SIZE = 64; // when relay framing is in use
     85 
     86 //////////////////////////////////////////////////////////////////////
     87 // Global Constants and Functions
     88 //////////////////////////////////////////////////////////////////////
     89 //
     90 //    0                   1                   2                   3
     91 //    0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
     92 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
     93 //  0 |                      Conversation Number                      |
     94 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
     95 //  4 |                        Sequence Number                        |
     96 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
     97 //  8 |                     Acknowledgment Number                     |
     98 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
     99 //    |               |   |U|A|P|R|S|F|                               |
    100 // 12 |    Control    |   |R|C|S|S|Y|I|            Window             |
    101 //    |               |   |G|K|H|T|N|N|                               |
    102 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    103 // 16 |                       Timestamp sending                       |
    104 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    105 // 20 |                      Timestamp receiving                      |
    106 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    107 // 24 |                             data                              |
    108 //    +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
    109 //
    110 //////////////////////////////////////////////////////////////////////
    111 
    112 #define PSEUDO_KEEPALIVE 0
    113 
    114 const uint32 MAX_SEQ = 0xFFFFFFFF;
    115 const uint32 HEADER_SIZE = 24;
    116 const uint32 PACKET_OVERHEAD = HEADER_SIZE + UDP_HEADER_SIZE + IP_HEADER_SIZE + JINGLE_HEADER_SIZE;
    117 
    118 const uint32 MIN_RTO   =   250; // 250 ms (RFC1122, Sec 4.2.3.1 "fractions of a second")
    119 const uint32 DEF_RTO   =  3000; // 3 seconds (RFC1122, Sec 4.2.3.1)
    120 const uint32 MAX_RTO   = 60000; // 60 seconds
    121 const uint32 DEF_ACK_DELAY = 100; // 100 milliseconds
    122 
    123 const uint8 FLAG_CTL = 0x02;
    124 const uint8 FLAG_RST = 0x04;
    125 
    126 const uint8 CTL_CONNECT = 0;
    127 //const uint8 CTL_REDIRECT = 1;
    128 const uint8 CTL_EXTRA = 255;
    129 
    130 /*
    131 const uint8 FLAG_FIN = 0x01;
    132 const uint8 FLAG_SYN = 0x02;
    133 const uint8 FLAG_ACK = 0x10;
    134 */
    135 
    136 const uint32 CTRL_BOUND = 0x80000000;
    137 
    138 const long DEFAULT_TIMEOUT = 4000; // If there are no pending clocks, wake up every 4 seconds
    139 const long CLOSED_TIMEOUT = 60 * 1000; // If the connection is closed, once per minute
    140 
    141 #if PSEUDO_KEEPALIVE
    142 // !?! Rethink these times
    143 const uint32 IDLE_PING = 20 * 1000; // 20 seconds (note: WinXP SP2 firewall udp timeout is 90 seconds)
    144 const uint32 IDLE_TIMEOUT = 90 * 1000; // 90 seconds;
    145 #endif // PSEUDO_KEEPALIVE
    146 
    147 //////////////////////////////////////////////////////////////////////
    148 // Helper Functions
    149 //////////////////////////////////////////////////////////////////////
    150 
    151 inline void long_to_bytes(uint32 val, void* buf) {
    152   *static_cast<uint32*>(buf) = talk_base::HostToNetwork32(val);
    153 }
    154 
    155 inline void short_to_bytes(uint16 val, void* buf) {
    156   *static_cast<uint16*>(buf) = talk_base::HostToNetwork16(val);
    157 }
    158 
    159 inline uint32 bytes_to_long(const void* buf) {
    160   return talk_base::NetworkToHost32(*static_cast<const uint32*>(buf));
    161 }
    162 
    163 inline uint16 bytes_to_short(const void* buf) {
    164   return talk_base::NetworkToHost16(*static_cast<const uint16*>(buf));
    165 }
    166 
    167 uint32 bound(uint32 lower, uint32 middle, uint32 upper) {
    168   return talk_base::_min(talk_base::_max(lower, middle), upper);
    169 }
    170 
    171 //////////////////////////////////////////////////////////////////////
    172 // Debugging Statistics
    173 //////////////////////////////////////////////////////////////////////
    174 
    175 #if 0  // Not used yet
    176 
    177 enum Stat {
    178   S_SENT_PACKET,   // All packet sends
    179   S_RESENT_PACKET, // All packet sends that are retransmits
    180   S_RECV_PACKET,   // All packet receives
    181   S_RECV_NEW,      // All packet receives that are too new
    182   S_RECV_OLD,      // All packet receives that are too old
    183   S_NUM_STATS
    184 };
    185 
    186 const char* const STAT_NAMES[S_NUM_STATS] = {
    187   "snt",
    188   "snt-r",
    189   "rcv"
    190   "rcv-n",
    191   "rcv-o"
    192 };
    193 
    194 int g_stats[S_NUM_STATS];
    195 inline void Incr(Stat s) { ++g_stats[s]; }
    196 void ReportStats() {
    197   char buffer[256];
    198   size_t len = 0;
    199   for (int i = 0; i < S_NUM_STATS; ++i) {
    200     len += talk_base::sprintfn(buffer, ARRAY_SIZE(buffer), "%s%s:%d",
    201                                (i == 0) ? "" : ",", STAT_NAMES[i], g_stats[i]);
    202     g_stats[i] = 0;
    203   }
    204   LOG(LS_INFO) << "Stats[" << buffer << "]";
    205 }
    206 
    207 #endif
    208 
    209 //////////////////////////////////////////////////////////////////////
    210 // PseudoTcp
    211 //////////////////////////////////////////////////////////////////////
    212 
    213 uint32 PseudoTcp::Now() {
    214 #if 0  // Use this to synchronize timers with logging timestamps (easier debug)
    215   return talk_base::TimeSince(StartTime());
    216 #else
    217   return talk_base::Time();
    218 #endif
    219 }
    220 
    221 PseudoTcp::PseudoTcp(IPseudoTcpNotify* notify, uint32 conv)
    222     : m_notify(notify), m_shutdown(SD_NONE), m_error(0) {
    223 
    224   // Sanity check on buffer sizes (needed for OnTcpWriteable notification logic)
    225   ASSERT(sizeof(m_rbuf) + MIN_PACKET < sizeof(m_sbuf));
    226 
    227   uint32 now = Now();
    228 
    229   m_state = TCP_LISTEN;
    230   m_conv = conv;
    231   m_rcv_wnd = sizeof(m_rbuf);
    232   m_snd_nxt = m_slen = 0;
    233   m_snd_wnd = 1;
    234   m_snd_una = m_rcv_nxt = m_rlen = 0;
    235   m_bReadEnable = true;
    236   m_bWriteEnable = false;
    237   m_t_ack = 0;
    238 
    239   m_msslevel = 0;
    240   m_largest = 0;
    241   ASSERT(MIN_PACKET > PACKET_OVERHEAD);
    242   m_mss = MIN_PACKET - PACKET_OVERHEAD;
    243   m_mtu_advise = MAX_PACKET;
    244 
    245   m_rto_base = 0;
    246 
    247   m_cwnd = 2 * m_mss;
    248   m_ssthresh = sizeof(m_rbuf);
    249   m_lastrecv = m_lastsend = m_lasttraffic = now;
    250   m_bOutgoing = false;
    251 
    252   m_dup_acks = 0;
    253   m_recover = 0;
    254 
    255   m_ts_recent = m_ts_lastack = 0;
    256 
    257   m_rx_rto = DEF_RTO;
    258   m_rx_srtt = m_rx_rttvar = 0;
    259 
    260   m_use_nagling = true;
    261   m_ack_delay = DEF_ACK_DELAY;
    262 }
    263 
    264 PseudoTcp::~PseudoTcp() {
    265 }
    266 
    267 int PseudoTcp::Connect() {
    268   if (m_state != TCP_LISTEN) {
    269     m_error = EINVAL;
    270     return -1;
    271   }
    272 
    273   m_state = TCP_SYN_SENT;
    274   LOG(LS_INFO) << "State: TCP_SYN_SENT";
    275 
    276   char buffer[1];
    277   buffer[0] = CTL_CONNECT;
    278   queue(buffer, 1, true);
    279   attemptSend();
    280 
    281   return 0;
    282 }
    283 
    284 void PseudoTcp::NotifyMTU(uint16 mtu) {
    285   m_mtu_advise = mtu;
    286   if (m_state == TCP_ESTABLISHED) {
    287     adjustMTU();
    288   }
    289 }
    290 
    291 void PseudoTcp::NotifyClock(uint32 now) {
    292   if (m_state == TCP_CLOSED)
    293     return;
    294 
    295     // Check if it's time to retransmit a segment
    296   if (m_rto_base && (talk_base::TimeDiff(m_rto_base + m_rx_rto, now) <= 0)) {
    297     if (m_slist.empty()) {
    298       ASSERT(false);
    299     } else {
    300       // Note: (m_slist.front().xmit == 0)) {
    301       // retransmit segments
    302 #if _DEBUGMSG >= _DBG_NORMAL
    303       LOG(LS_INFO) << "timeout retransmit (rto: " << m_rx_rto
    304                    << ") (rto_base: " << m_rto_base
    305                    << ") (now: " << now
    306                    << ") (dup_acks: " << static_cast<unsigned>(m_dup_acks)
    307                    << ")";
    308 #endif // _DEBUGMSG
    309       if (!transmit(m_slist.begin(), now)) {
    310         closedown(ECONNABORTED);
    311         return;
    312       }
    313 
    314       uint32 nInFlight = m_snd_nxt - m_snd_una;
    315       m_ssthresh = talk_base::_max(nInFlight / 2, 2 * m_mss);
    316       //LOG(LS_INFO) << "m_ssthresh: " << m_ssthresh << "  nInFlight: " << nInFlight << "  m_mss: " << m_mss;
    317       m_cwnd = m_mss;
    318 
    319       // Back off retransmit timer.  Note: the limit is lower when connecting.
    320       uint32 rto_limit = (m_state < TCP_ESTABLISHED) ? DEF_RTO : MAX_RTO;
    321       m_rx_rto = talk_base::_min(rto_limit, m_rx_rto * 2);
    322       m_rto_base = now;
    323     }
    324   }
    325 
    326   // Check if it's time to probe closed windows
    327   if ((m_snd_wnd == 0)
    328         && (talk_base::TimeDiff(m_lastsend + m_rx_rto, now) <= 0)) {
    329     if (talk_base::TimeDiff(now, m_lastrecv) >= 15000) {
    330       closedown(ECONNABORTED);
    331       return;
    332     }
    333 
    334     // probe the window
    335     packet(m_snd_nxt - 1, 0, 0, 0);
    336     m_lastsend = now;
    337 
    338     // back off retransmit timer
    339     m_rx_rto = talk_base::_min(MAX_RTO, m_rx_rto * 2);
    340   }
    341 
    342   // Check if it's time to send delayed acks
    343   if (m_t_ack && (talk_base::TimeDiff(m_t_ack + m_ack_delay, now) <= 0)) {
    344     packet(m_snd_nxt, 0, 0, 0);
    345   }
    346 
    347 #if PSEUDO_KEEPALIVE
    348   // Check for idle timeout
    349   if ((m_state == TCP_ESTABLISHED) && (TimeDiff(m_lastrecv + IDLE_TIMEOUT, now) <= 0)) {
    350     closedown(ECONNABORTED);
    351     return;
    352   }
    353 
    354   // Check for ping timeout (to keep udp mapping open)
    355   if ((m_state == TCP_ESTABLISHED) && (TimeDiff(m_lasttraffic + (m_bOutgoing ? IDLE_PING * 3/2 : IDLE_PING), now) <= 0)) {
    356     packet(m_snd_nxt, 0, 0, 0);
    357   }
    358 #endif // PSEUDO_KEEPALIVE
    359 }
    360 
    361 bool PseudoTcp::NotifyPacket(const char* buffer, size_t len) {
    362   if (len > MAX_PACKET) {
    363     LOG_F(WARNING) << "packet too large";
    364     return false;
    365   }
    366   return parse(reinterpret_cast<const uint8 *>(buffer), uint32(len));
    367 }
    368 
    369 bool PseudoTcp::GetNextClock(uint32 now, long& timeout) {
    370   return clock_check(now, timeout);
    371 }
    372 
    373 void PseudoTcp::GetOption(Option opt, int* value) {
    374   if (opt == OPT_NODELAY) {
    375     *value = m_use_nagling ? 0 : 1;
    376   } else if (opt == OPT_ACKDELAY) {
    377     *value = m_ack_delay;
    378   } else {
    379     ASSERT(false);
    380   }
    381 }
    382 
    383 void PseudoTcp::SetOption(Option opt, int value) {
    384   if (opt == OPT_NODELAY) {
    385     m_use_nagling = value == 0;
    386   } else if (opt == OPT_ACKDELAY) {
    387     m_ack_delay = value;
    388   } else {
    389     ASSERT(false);
    390   }
    391 }
    392 
    393 //
    394 // IPStream Implementation
    395 //
    396 
    397 int PseudoTcp::Recv(char* buffer, size_t len) {
    398   if (m_state != TCP_ESTABLISHED) {
    399     m_error = ENOTCONN;
    400     return SOCKET_ERROR;
    401   }
    402 
    403   if (m_rlen == 0) {
    404     m_bReadEnable = true;
    405     m_error = EWOULDBLOCK;
    406     return SOCKET_ERROR;
    407   }
    408 
    409   uint32 read = talk_base::_min(uint32(len), m_rlen);
    410   memcpy(buffer, m_rbuf, read);
    411   m_rlen -= read;
    412 
    413   // !?! until we create a circular buffer, we need to move all of the rest of the buffer up!
    414   memmove(m_rbuf, m_rbuf + read, sizeof(m_rbuf) - read/*m_rlen*/);
    415 
    416   if ((sizeof(m_rbuf) - m_rlen - m_rcv_wnd)
    417       >= talk_base::_min<uint32>(sizeof(m_rbuf) / 2, m_mss)) {
    418     bool bWasClosed = (m_rcv_wnd == 0); // !?! Not sure about this was closed business
    419 
    420     m_rcv_wnd = sizeof(m_rbuf) - m_rlen;
    421 
    422     if (bWasClosed) {
    423       attemptSend(sfImmediateAck);
    424     }
    425   }
    426 
    427   return read;
    428 }
    429 
    430 int PseudoTcp::Send(const char* buffer, size_t len) {
    431   if (m_state != TCP_ESTABLISHED) {
    432     m_error = ENOTCONN;
    433     return SOCKET_ERROR;
    434   }
    435 
    436   if (m_slen == sizeof(m_sbuf)) {
    437     m_bWriteEnable = true;
    438     m_error = EWOULDBLOCK;
    439     return SOCKET_ERROR;
    440   }
    441 
    442   int written = queue(buffer, uint32(len), false);
    443   attemptSend();
    444   return written;
    445 }
    446 
    447 void PseudoTcp::Close(bool force) {
    448   LOG_F(LS_VERBOSE) << "(" << (force ? "true" : "false") << ")";
    449   m_shutdown = force ? SD_FORCEFUL : SD_GRACEFUL;
    450 }
    451 
    452 int PseudoTcp::GetError() {
    453   return m_error;
    454 }
    455 
    456 //
    457 // Internal Implementation
    458 //
    459 
    460 uint32 PseudoTcp::queue(const char* data, uint32 len, bool bCtrl) {
    461   if (len > sizeof(m_sbuf) - m_slen) {
    462     ASSERT(!bCtrl);
    463     len = sizeof(m_sbuf) - m_slen;
    464   }
    465 
    466   // We can concatenate data if the last segment is the same type
    467   // (control v. regular data), and has not been transmitted yet
    468   if (!m_slist.empty() && (m_slist.back().bCtrl == bCtrl) && (m_slist.back().xmit == 0)) {
    469     m_slist.back().len += len;
    470   } else {
    471     SSegment sseg(m_snd_una + m_slen, len, bCtrl);
    472     m_slist.push_back(sseg);
    473   }
    474 
    475   memcpy(m_sbuf + m_slen, data, len);
    476   m_slen += len;
    477   //LOG(LS_INFO) << "PseudoTcp::queue - m_slen = " << m_slen;
    478   return len;
    479 }
    480 
    481 IPseudoTcpNotify::WriteResult PseudoTcp::packet(uint32 seq, uint8 flags,
    482                                                 const char* data, uint32 len) {
    483   ASSERT(HEADER_SIZE + len <= MAX_PACKET);
    484 
    485   uint32 now = Now();
    486 
    487   uint8 buffer[MAX_PACKET];
    488   long_to_bytes(m_conv, buffer);
    489   long_to_bytes(seq, buffer + 4);
    490   long_to_bytes(m_rcv_nxt, buffer + 8);
    491   buffer[12] = 0;
    492   buffer[13] = flags;
    493   short_to_bytes(uint16(m_rcv_wnd), buffer + 14);
    494 
    495   // Timestamp computations
    496   long_to_bytes(now, buffer + 16);
    497   long_to_bytes(m_ts_recent, buffer + 20);
    498   m_ts_lastack = m_rcv_nxt;
    499 
    500   memcpy(buffer + HEADER_SIZE, data, len);
    501 
    502 #if _DEBUGMSG >= _DBG_VERBOSE
    503   LOG(LS_INFO) << "<-- <CONV=" << m_conv
    504                << "><FLG=" << static_cast<unsigned>(flags)
    505                << "><SEQ=" << seq << ":" << seq + len
    506                << "><ACK=" << m_rcv_nxt
    507                << "><WND=" << m_rcv_wnd
    508                << "><TS="  << (now % 10000)
    509                << "><TSR=" << (m_ts_recent % 10000)
    510                << "><LEN=" << len << ">";
    511 #endif // _DEBUGMSG
    512 
    513   IPseudoTcpNotify::WriteResult wres = m_notify->TcpWritePacket(this, reinterpret_cast<char *>(buffer), len + HEADER_SIZE);
    514   // Note: When data is NULL, this is an ACK packet.  We don't read the return value for those,
    515   // and thus we won't retry.  So go ahead and treat the packet as a success (basically simulate
    516   // as if it were dropped), which will prevent our timers from being messed up.
    517   if ((wres != IPseudoTcpNotify::WR_SUCCESS) && (NULL != data))
    518     return wres;
    519 
    520   m_t_ack = 0;
    521   if (len > 0) {
    522     m_lastsend = now;
    523   }
    524   m_lasttraffic = now;
    525   m_bOutgoing = true;
    526 
    527   return IPseudoTcpNotify::WR_SUCCESS;
    528 }
    529 
    530 bool PseudoTcp::parse(const uint8* buffer, uint32 size) {
    531   if (size < 12)
    532     return false;
    533 
    534   Segment seg;
    535   seg.conv = bytes_to_long(buffer);
    536   seg.seq = bytes_to_long(buffer + 4);
    537   seg.ack = bytes_to_long(buffer + 8);
    538   seg.flags = buffer[13];
    539   seg.wnd = bytes_to_short(buffer + 14);
    540 
    541   seg.tsval = bytes_to_long(buffer + 16);
    542   seg.tsecr = bytes_to_long(buffer + 20);
    543 
    544   seg.data = reinterpret_cast<const char *>(buffer) + HEADER_SIZE;
    545   seg.len = size - HEADER_SIZE;
    546 
    547 #if _DEBUGMSG >= _DBG_VERBOSE
    548   LOG(LS_INFO) << "--> <CONV=" << seg.conv
    549                << "><FLG=" << static_cast<unsigned>(seg.flags)
    550                << "><SEQ=" << seg.seq << ":" << seg.seq + seg.len
    551                << "><ACK=" << seg.ack
    552                << "><WND=" << seg.wnd
    553                << "><TS="  << (seg.tsval % 10000)
    554                << "><TSR=" << (seg.tsecr % 10000)
    555                << "><LEN=" << seg.len << ">";
    556 #endif // _DEBUGMSG
    557 
    558   return process(seg);
    559 }
    560 
    561 bool PseudoTcp::clock_check(uint32 now, long& nTimeout) {
    562   if (m_shutdown == SD_FORCEFUL)
    563     return false;
    564 
    565   if ((m_shutdown == SD_GRACEFUL)
    566       && ((m_state != TCP_ESTABLISHED)
    567           || ((m_slen == 0) && (m_t_ack == 0)))) {
    568     return false;
    569   }
    570 
    571   if (m_state == TCP_CLOSED) {
    572     nTimeout = CLOSED_TIMEOUT;
    573     return true;
    574   }
    575 
    576   nTimeout = DEFAULT_TIMEOUT;
    577 
    578   if (m_t_ack) {
    579     nTimeout = talk_base::_min<int32>(nTimeout,
    580       talk_base::TimeDiff(m_t_ack + m_ack_delay, now));
    581   }
    582   if (m_rto_base) {
    583     nTimeout = talk_base::_min<int32>(nTimeout,
    584       talk_base::TimeDiff(m_rto_base + m_rx_rto, now));
    585   }
    586   if (m_snd_wnd == 0) {
    587     nTimeout = talk_base::_min<int32>(nTimeout, talk_base::TimeDiff(m_lastsend + m_rx_rto, now));
    588   }
    589 #if PSEUDO_KEEPALIVE
    590   if (m_state == TCP_ESTABLISHED) {
    591     nTimeout = talk_base::_min<int32>(nTimeout,
    592       talk_base::TimeDiff(m_lasttraffic + (m_bOutgoing ? IDLE_PING * 3/2 : IDLE_PING), now));
    593   }
    594 #endif // PSEUDO_KEEPALIVE
    595   return true;
    596 }
    597 
    598 bool PseudoTcp::process(Segment& seg) {
    599   // If this is the wrong conversation, send a reset!?! (with the correct conversation?)
    600   if (seg.conv != m_conv) {
    601     //if ((seg.flags & FLAG_RST) == 0) {
    602     //  packet(tcb, seg.ack, 0, FLAG_RST, 0, 0);
    603     //}
    604     LOG_F(LS_ERROR) << "wrong conversation";
    605     return false;
    606   }
    607 
    608   uint32 now = Now();
    609   m_lasttraffic = m_lastrecv = now;
    610   m_bOutgoing = false;
    611 
    612   if (m_state == TCP_CLOSED) {
    613     // !?! send reset?
    614     LOG_F(LS_ERROR) << "closed";
    615     return false;
    616   }
    617 
    618   // Check if this is a reset segment
    619   if (seg.flags & FLAG_RST) {
    620     closedown(ECONNRESET);
    621     return false;
    622   }
    623 
    624   // Check for control data
    625   bool bConnect = false;
    626   if (seg.flags & FLAG_CTL) {
    627     if (seg.len == 0) {
    628       LOG_F(LS_ERROR) << "Missing control code";
    629       return false;
    630     } else if (seg.data[0] == CTL_CONNECT) {
    631       bConnect = true;
    632       if (m_state == TCP_LISTEN) {
    633         m_state = TCP_SYN_RECEIVED;
    634         LOG(LS_INFO) << "State: TCP_SYN_RECEIVED";
    635         //m_notify->associate(addr);
    636         char buffer[1];
    637         buffer[0] = CTL_CONNECT;
    638         queue(buffer, 1, true);
    639       } else if (m_state == TCP_SYN_SENT) {
    640         m_state = TCP_ESTABLISHED;
    641         LOG(LS_INFO) << "State: TCP_ESTABLISHED";
    642         adjustMTU();
    643         if (m_notify) {
    644           m_notify->OnTcpOpen(this);
    645         }
    646         //notify(evOpen);
    647       }
    648     } else {
    649       LOG_F(LS_WARNING) << "Unknown control code: " << seg.data[0];
    650       return false;
    651     }
    652   }
    653 
    654   // Update timestamp
    655   if ((seg.seq <= m_ts_lastack) && (m_ts_lastack < seg.seq + seg.len)) {
    656     m_ts_recent = seg.tsval;
    657   }
    658 
    659   // Check if this is a valuable ack
    660   if ((seg.ack > m_snd_una) && (seg.ack <= m_snd_nxt)) {
    661     // Calculate round-trip time
    662     if (seg.tsecr) {
    663       long rtt = talk_base::TimeDiff(now, seg.tsecr);
    664       if (rtt >= 0) {
    665         if (m_rx_srtt == 0) {
    666           m_rx_srtt = rtt;
    667           m_rx_rttvar = rtt / 2;
    668         } else {
    669           m_rx_rttvar = (3 * m_rx_rttvar + abs(long(rtt - m_rx_srtt))) / 4;
    670           m_rx_srtt = (7 * m_rx_srtt + rtt) / 8;
    671         }
    672         m_rx_rto = bound(MIN_RTO, m_rx_srtt +
    673             talk_base::_max<uint32>(1, 4 * m_rx_rttvar), MAX_RTO);
    674 #if _DEBUGMSG >= _DBG_VERBOSE
    675         LOG(LS_INFO) << "rtt: " << rtt
    676                      << "  srtt: " << m_rx_srtt
    677                      << "  rto: " << m_rx_rto;
    678 #endif // _DEBUGMSG
    679       } else {
    680         ASSERT(false);
    681       }
    682     }
    683 
    684     m_snd_wnd = seg.wnd;
    685 
    686     uint32 nAcked = seg.ack - m_snd_una;
    687     m_snd_una = seg.ack;
    688 
    689     m_rto_base = (m_snd_una == m_snd_nxt) ? 0 : now;
    690 
    691     m_slen -= nAcked;
    692     memmove(m_sbuf, m_sbuf + nAcked, m_slen);
    693     //LOG(LS_INFO) << "PseudoTcp::process - m_slen = " << m_slen;
    694 
    695     for (uint32 nFree = nAcked; nFree > 0; ) {
    696       ASSERT(!m_slist.empty());
    697       if (nFree < m_slist.front().len) {
    698         m_slist.front().len -= nFree;
    699         nFree = 0;
    700       } else {
    701         if (m_slist.front().len > m_largest) {
    702           m_largest = m_slist.front().len;
    703         }
    704         nFree -= m_slist.front().len;
    705         m_slist.pop_front();
    706       }
    707     }
    708 
    709     if (m_dup_acks >= 3) {
    710       if (m_snd_una >= m_recover) { // NewReno
    711         uint32 nInFlight = m_snd_nxt - m_snd_una;
    712         m_cwnd = talk_base::_min(m_ssthresh, nInFlight + m_mss); // (Fast Retransmit)
    713 #if _DEBUGMSG >= _DBG_NORMAL
    714         LOG(LS_INFO) << "exit recovery";
    715 #endif // _DEBUGMSG
    716         m_dup_acks = 0;
    717       } else {
    718 #if _DEBUGMSG >= _DBG_NORMAL
    719         LOG(LS_INFO) << "recovery retransmit";
    720 #endif // _DEBUGMSG
    721         if (!transmit(m_slist.begin(), now)) {
    722           closedown(ECONNABORTED);
    723           return false;
    724         }
    725         m_cwnd += m_mss - talk_base::_min(nAcked, m_cwnd);
    726       }
    727     } else {
    728       m_dup_acks = 0;
    729       // Slow start, congestion avoidance
    730       if (m_cwnd < m_ssthresh) {
    731         m_cwnd += m_mss;
    732       } else {
    733         m_cwnd += talk_base::_max<uint32>(1, m_mss * m_mss / m_cwnd);
    734       }
    735     }
    736 
    737     // !?! A bit hacky
    738     if ((m_state == TCP_SYN_RECEIVED) && !bConnect) {
    739       m_state = TCP_ESTABLISHED;
    740       LOG(LS_INFO) << "State: TCP_ESTABLISHED";
    741       adjustMTU();
    742       if (m_notify) {
    743         m_notify->OnTcpOpen(this);
    744       }
    745       //notify(evOpen);
    746     }
    747 
    748     // If we make room in the send queue, notify the user
    749     // The goal it to make sure we always have at least enough data to fill the
    750     // window.  We'd like to notify the app when we are halfway to that point.
    751     const uint32 kIdealRefillSize = (sizeof(m_sbuf) + sizeof(m_rbuf)) / 2;
    752     if (m_bWriteEnable && (m_slen < kIdealRefillSize)) {
    753       m_bWriteEnable = false;
    754       if (m_notify) {
    755         m_notify->OnTcpWriteable(this);
    756       }
    757       //notify(evWrite);
    758     }
    759   } else if (seg.ack == m_snd_una) {
    760     // !?! Note, tcp says don't do this... but otherwise how does a closed window become open?
    761     m_snd_wnd = seg.wnd;
    762 
    763     // Check duplicate acks
    764     if (seg.len > 0) {
    765       // it's a dup ack, but with a data payload, so don't modify m_dup_acks
    766     } else if (m_snd_una != m_snd_nxt) {
    767       m_dup_acks += 1;
    768       if (m_dup_acks == 3) { // (Fast Retransmit)
    769 #if _DEBUGMSG >= _DBG_NORMAL
    770         LOG(LS_INFO) << "enter recovery";
    771         LOG(LS_INFO) << "recovery retransmit";
    772 #endif // _DEBUGMSG
    773         if (!transmit(m_slist.begin(), now)) {
    774           closedown(ECONNABORTED);
    775           return false;
    776         }
    777         m_recover = m_snd_nxt;
    778         uint32 nInFlight = m_snd_nxt - m_snd_una;
    779         m_ssthresh = talk_base::_max(nInFlight / 2, 2 * m_mss);
    780         //LOG(LS_INFO) << "m_ssthresh: " << m_ssthresh << "  nInFlight: " << nInFlight << "  m_mss: " << m_mss;
    781         m_cwnd = m_ssthresh + 3 * m_mss;
    782       } else if (m_dup_acks > 3) {
    783         m_cwnd += m_mss;
    784       }
    785     } else {
    786       m_dup_acks = 0;
    787     }
    788   }
    789 
    790   // Conditions were acks must be sent:
    791   // 1) Segment is too old (they missed an ACK) (immediately)
    792   // 2) Segment is too new (we missed a segment) (immediately)
    793   // 3) Segment has data (so we need to ACK!) (delayed)
    794   // ... so the only time we don't need to ACK, is an empty segment that points to rcv_nxt!
    795 
    796   SendFlags sflags = sfNone;
    797   if (seg.seq != m_rcv_nxt) {
    798     sflags = sfImmediateAck; // (Fast Recovery)
    799   } else if (seg.len != 0) {
    800     if (m_ack_delay == 0) {
    801       sflags = sfImmediateAck;
    802     } else {
    803       sflags = sfDelayedAck;
    804     }
    805   }
    806 #if _DEBUGMSG >= _DBG_NORMAL
    807   if (sflags == sfImmediateAck) {
    808     if (seg.seq > m_rcv_nxt) {
    809       LOG_F(LS_INFO) << "too new";
    810     } else if (seg.seq + seg.len <= m_rcv_nxt) {
    811       LOG_F(LS_INFO) << "too old";
    812     }
    813   }
    814 #endif // _DEBUGMSG
    815 
    816   // Adjust the incoming segment to fit our receive buffer
    817   if (seg.seq < m_rcv_nxt) {
    818     uint32 nAdjust = m_rcv_nxt - seg.seq;
    819     if (nAdjust < seg.len) {
    820       seg.seq += nAdjust;
    821       seg.data += nAdjust;
    822       seg.len -= nAdjust;
    823     } else {
    824       seg.len = 0;
    825     }
    826   }
    827   if ((seg.seq + seg.len - m_rcv_nxt) > (sizeof(m_rbuf) - m_rlen)) {
    828     uint32 nAdjust = seg.seq + seg.len - m_rcv_nxt - (sizeof(m_rbuf) - m_rlen);
    829     if (nAdjust < seg.len) {
    830       seg.len -= nAdjust;
    831     } else {
    832       seg.len = 0;
    833     }
    834   }
    835 
    836   bool bIgnoreData = (seg.flags & FLAG_CTL) || (m_shutdown != SD_NONE);
    837   bool bNewData = false;
    838 
    839   if (seg.len > 0) {
    840     if (bIgnoreData) {
    841       if (seg.seq == m_rcv_nxt) {
    842         m_rcv_nxt += seg.len;
    843       }
    844     } else {
    845       uint32 nOffset = seg.seq - m_rcv_nxt;
    846       memcpy(m_rbuf + m_rlen + nOffset, seg.data, seg.len);
    847       if (seg.seq == m_rcv_nxt) {
    848         m_rlen += seg.len;
    849         m_rcv_nxt += seg.len;
    850         m_rcv_wnd -= seg.len;
    851         bNewData = true;
    852 
    853         RList::iterator it = m_rlist.begin();
    854         while ((it != m_rlist.end()) && (it->seq <= m_rcv_nxt)) {
    855           if (it->seq + it->len > m_rcv_nxt) {
    856             sflags = sfImmediateAck; // (Fast Recovery)
    857             uint32 nAdjust = (it->seq + it->len) - m_rcv_nxt;
    858 #if _DEBUGMSG >= _DBG_NORMAL
    859             LOG(LS_INFO) << "Recovered " << nAdjust << " bytes (" << m_rcv_nxt << " -> " << m_rcv_nxt + nAdjust << ")";
    860 #endif // _DEBUGMSG
    861             m_rlen += nAdjust;
    862             m_rcv_nxt += nAdjust;
    863             m_rcv_wnd -= nAdjust;
    864           }
    865           it = m_rlist.erase(it);
    866         }
    867       } else {
    868 #if _DEBUGMSG >= _DBG_NORMAL
    869         LOG(LS_INFO) << "Saving " << seg.len << " bytes (" << seg.seq << " -> " << seg.seq + seg.len << ")";
    870 #endif // _DEBUGMSG
    871         RSegment rseg;
    872         rseg.seq = seg.seq;
    873         rseg.len = seg.len;
    874         RList::iterator it = m_rlist.begin();
    875         while ((it != m_rlist.end()) && (it->seq < rseg.seq)) {
    876           ++it;
    877         }
    878         m_rlist.insert(it, rseg);
    879       }
    880     }
    881   }
    882 
    883   attemptSend(sflags);
    884 
    885   // If we have new data, notify the user
    886   if (bNewData && m_bReadEnable) {
    887     m_bReadEnable = false;
    888     if (m_notify) {
    889       m_notify->OnTcpReadable(this);
    890     }
    891     //notify(evRead);
    892   }
    893 
    894   return true;
    895 }
    896 
    897 bool PseudoTcp::transmit(const SList::iterator& seg, uint32 now) {
    898   if (seg->xmit >= ((m_state == TCP_ESTABLISHED) ? 15 : 30)) {
    899     LOG_F(LS_VERBOSE) << "too many retransmits";
    900     return false;
    901   }
    902 
    903   uint32 nTransmit = talk_base::_min(seg->len, m_mss);
    904 
    905   while (true) {
    906     uint32 seq = seg->seq;
    907     uint8 flags = (seg->bCtrl ? FLAG_CTL : 0);
    908     const char* buffer = m_sbuf + (seg->seq - m_snd_una);
    909     IPseudoTcpNotify::WriteResult wres = this->packet(seq, flags, buffer, nTransmit);
    910 
    911     if (wres == IPseudoTcpNotify::WR_SUCCESS)
    912       break;
    913 
    914     if (wres == IPseudoTcpNotify::WR_FAIL) {
    915       LOG_F(LS_VERBOSE) << "packet failed";
    916       return false;
    917     }
    918 
    919     ASSERT(wres == IPseudoTcpNotify::WR_TOO_LARGE);
    920 
    921     while (true) {
    922       if (PACKET_MAXIMUMS[m_msslevel + 1] == 0) {
    923         LOG_F(LS_VERBOSE) << "MTU too small";
    924         return false;
    925       }
    926       // !?! We need to break up all outstanding and pending packets and then retransmit!?!
    927 
    928       m_mss = PACKET_MAXIMUMS[++m_msslevel] - PACKET_OVERHEAD;
    929       m_cwnd = 2 * m_mss; // I added this... haven't researched actual formula
    930       if (m_mss < nTransmit) {
    931         nTransmit = m_mss;
    932         break;
    933       }
    934     }
    935 #if _DEBUGMSG >= _DBG_NORMAL
    936     LOG(LS_INFO) << "Adjusting mss to " << m_mss << " bytes";
    937 #endif // _DEBUGMSG
    938   }
    939 
    940   if (nTransmit < seg->len) {
    941     LOG_F(LS_VERBOSE) << "mss reduced to " << m_mss;
    942 
    943     SSegment subseg(seg->seq + nTransmit, seg->len - nTransmit, seg->bCtrl);
    944     //subseg.tstamp = seg->tstamp;
    945     subseg.xmit = seg->xmit;
    946     seg->len = nTransmit;
    947 
    948     SList::iterator next = seg;
    949     m_slist.insert(++next, subseg);
    950   }
    951 
    952   if (seg->xmit == 0) {
    953     m_snd_nxt += seg->len;
    954   }
    955   seg->xmit += 1;
    956   //seg->tstamp = now;
    957   if (m_rto_base == 0) {
    958     m_rto_base = now;
    959   }
    960 
    961   return true;
    962 }
    963 
    964 void PseudoTcp::attemptSend(SendFlags sflags) {
    965   uint32 now = Now();
    966 
    967   if (talk_base::TimeDiff(now, m_lastsend) > static_cast<long>(m_rx_rto)) {
    968     m_cwnd = m_mss;
    969   }
    970 
    971 #if _DEBUGMSG
    972   bool bFirst = true;
    973   UNUSED(bFirst);
    974 #endif // _DEBUGMSG
    975 
    976   while (true) {
    977     uint32 cwnd = m_cwnd;
    978     if ((m_dup_acks == 1) || (m_dup_acks == 2)) { // Limited Transmit
    979       cwnd += m_dup_acks * m_mss;
    980     }
    981     uint32 nWindow = talk_base::_min(m_snd_wnd, cwnd);
    982     uint32 nInFlight = m_snd_nxt - m_snd_una;
    983     uint32 nUseable = (nInFlight < nWindow) ? (nWindow - nInFlight) : 0;
    984 
    985     uint32 nAvailable = talk_base::_min(m_slen - nInFlight, m_mss);
    986 
    987     if (nAvailable > nUseable) {
    988       if (nUseable * 4 < nWindow) {
    989         // RFC 813 - avoid SWS
    990         nAvailable = 0;
    991       } else {
    992         nAvailable = nUseable;
    993       }
    994     }
    995 
    996 #if _DEBUGMSG >= _DBG_VERBOSE
    997     if (bFirst) {
    998       bFirst = false;
    999       LOG(LS_INFO) << "[cwnd: " << m_cwnd
   1000                    << "  nWindow: " << nWindow
   1001                    << "  nInFlight: " << nInFlight
   1002                    << "  nAvailable: " << nAvailable
   1003                    << "  nQueued: " << m_slen - nInFlight
   1004                    << "  nEmpty: " << sizeof(m_sbuf) - m_slen
   1005                    << "  ssthresh: " << m_ssthresh << "]";
   1006     }
   1007 #endif // _DEBUGMSG
   1008 
   1009     if (nAvailable == 0) {
   1010       if (sflags == sfNone)
   1011         return;
   1012 
   1013       // If this is an immediate ack, or the second delayed ack
   1014       if ((sflags == sfImmediateAck) || m_t_ack) {
   1015         packet(m_snd_nxt, 0, 0, 0);
   1016       } else {
   1017         m_t_ack = Now();
   1018       }
   1019       return;
   1020     }
   1021 
   1022     // Nagle's algorithm.
   1023     // If there is data already in-flight, and we haven't a full segment of
   1024     // data ready to send then hold off until we get more to send, or the
   1025     // in-flight data is acknowledged.
   1026     if (m_use_nagling && (m_snd_nxt > m_snd_una) && (nAvailable < m_mss))  {
   1027       return;
   1028     }
   1029 
   1030     // Find the next segment to transmit
   1031     SList::iterator it = m_slist.begin();
   1032     while (it->xmit > 0) {
   1033       ++it;
   1034       ASSERT(it != m_slist.end());
   1035     }
   1036     SList::iterator seg = it;
   1037 
   1038     // If the segment is too large, break it into two
   1039     if (seg->len > nAvailable) {
   1040       SSegment subseg(seg->seq + nAvailable, seg->len - nAvailable, seg->bCtrl);
   1041       seg->len = nAvailable;
   1042       m_slist.insert(++it, subseg);
   1043     }
   1044 
   1045     if (!transmit(seg, now)) {
   1046       LOG_F(LS_VERBOSE) << "transmit failed";
   1047       // TODO: consider closing socket
   1048       return;
   1049     }
   1050 
   1051     sflags = sfNone;
   1052   }
   1053 }
   1054 
   1055 void
   1056 PseudoTcp::closedown(uint32 err) {
   1057   m_slen = 0;
   1058 
   1059   LOG(LS_INFO) << "State: TCP_CLOSED";
   1060   m_state = TCP_CLOSED;
   1061   if (m_notify) {
   1062     m_notify->OnTcpClosed(this, err);
   1063   }
   1064   //notify(evClose, err);
   1065 }
   1066 
   1067 void
   1068 PseudoTcp::adjustMTU() {
   1069   // Determine our current mss level, so that we can adjust appropriately later
   1070   for (m_msslevel = 0; PACKET_MAXIMUMS[m_msslevel + 1] > 0; ++m_msslevel) {
   1071     if (static_cast<uint16>(PACKET_MAXIMUMS[m_msslevel]) <= m_mtu_advise) {
   1072       break;
   1073     }
   1074   }
   1075   m_mss = m_mtu_advise - PACKET_OVERHEAD;
   1076   // !?! Should we reset m_largest here?
   1077 #if _DEBUGMSG >= _DBG_NORMAL
   1078   LOG(LS_INFO) << "Adjusting mss to " << m_mss << " bytes";
   1079 #endif // _DEBUGMSG
   1080   // Enforce minimums on ssthresh and cwnd
   1081   m_ssthresh = talk_base::_max(m_ssthresh, 2 * m_mss);
   1082   m_cwnd = talk_base::_max(m_cwnd, m_mss);
   1083 }
   1084 
   1085 }  // namespace cricket
   1086