1 /* 2 * libjingle 3 * Copyright 2004--2005, Google Inc. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright notice, 11 * this list of conditions and the following disclaimer in the documentation 12 * and/or other materials provided with the distribution. 13 * 3. The name of the author may not be used to endorse or promote products 14 * derived from this software without specific prior written permission. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED 17 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 18 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO 19 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 20 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 21 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; 22 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR 24 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF 25 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 */ 27 28 #include "talk/p2p/base/pseudotcp.h" 29 30 #include <cstdio> 31 #include <cstdlib> 32 33 #include "talk/base/basictypes.h" 34 #include "talk/base/byteorder.h" 35 #include "talk/base/common.h" 36 #include "talk/base/logging.h" 37 #include "talk/base/socket.h" 38 #include "talk/base/stringutils.h" 39 #include "talk/base/time.h" 40 41 // The following logging is for detailed (packet-level) analysis only. 42 #define _DBG_NONE 0 43 #define _DBG_NORMAL 1 44 #define _DBG_VERBOSE 2 45 #define _DEBUGMSG _DBG_NONE 46 47 namespace cricket { 48 49 ////////////////////////////////////////////////////////////////////// 50 // Network Constants 51 ////////////////////////////////////////////////////////////////////// 52 53 // Standard MTUs 54 const uint16 PACKET_MAXIMUMS[] = { 55 65535, // Theoretical maximum, Hyperchannel 56 32000, // Nothing 57 17914, // 16Mb IBM Token Ring 58 8166, // IEEE 802.4 59 //4464, // IEEE 802.5 (4Mb max) 60 4352, // FDDI 61 //2048, // Wideband Network 62 2002, // IEEE 802.5 (4Mb recommended) 63 //1536, // Expermental Ethernet Networks 64 //1500, // Ethernet, Point-to-Point (default) 65 1492, // IEEE 802.3 66 1006, // SLIP, ARPANET 67 //576, // X.25 Networks 68 //544, // DEC IP Portal 69 //512, // NETBIOS 70 508, // IEEE 802/Source-Rt Bridge, ARCNET 71 296, // Point-to-Point (low delay) 72 //68, // Official minimum 73 0, // End of list marker 74 }; 75 76 const uint32 MAX_PACKET = 65535; 77 // Note: we removed lowest level because packet overhead was larger! 78 const uint32 MIN_PACKET = 296; 79 80 const uint32 IP_HEADER_SIZE = 20; // (+ up to 40 bytes of options?) 81 const uint32 ICMP_HEADER_SIZE = 8; 82 const uint32 UDP_HEADER_SIZE = 8; 83 // TODO: Make JINGLE_HEADER_SIZE transparent to this code? 84 const uint32 JINGLE_HEADER_SIZE = 64; // when relay framing is in use 85 86 ////////////////////////////////////////////////////////////////////// 87 // Global Constants and Functions 88 ////////////////////////////////////////////////////////////////////// 89 // 90 // 0 1 2 3 91 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 92 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 93 // 0 | Conversation Number | 94 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 95 // 4 | Sequence Number | 96 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 97 // 8 | Acknowledgment Number | 98 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 99 // | | |U|A|P|R|S|F| | 100 // 12 | Control | |R|C|S|S|Y|I| Window | 101 // | | |G|K|H|T|N|N| | 102 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 103 // 16 | Timestamp sending | 104 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 105 // 20 | Timestamp receiving | 106 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 107 // 24 | data | 108 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ 109 // 110 ////////////////////////////////////////////////////////////////////// 111 112 #define PSEUDO_KEEPALIVE 0 113 114 const uint32 MAX_SEQ = 0xFFFFFFFF; 115 const uint32 HEADER_SIZE = 24; 116 const uint32 PACKET_OVERHEAD = HEADER_SIZE + UDP_HEADER_SIZE + IP_HEADER_SIZE + JINGLE_HEADER_SIZE; 117 118 const uint32 MIN_RTO = 250; // 250 ms (RFC1122, Sec 4.2.3.1 "fractions of a second") 119 const uint32 DEF_RTO = 3000; // 3 seconds (RFC1122, Sec 4.2.3.1) 120 const uint32 MAX_RTO = 60000; // 60 seconds 121 const uint32 DEF_ACK_DELAY = 100; // 100 milliseconds 122 123 const uint8 FLAG_CTL = 0x02; 124 const uint8 FLAG_RST = 0x04; 125 126 const uint8 CTL_CONNECT = 0; 127 //const uint8 CTL_REDIRECT = 1; 128 const uint8 CTL_EXTRA = 255; 129 130 /* 131 const uint8 FLAG_FIN = 0x01; 132 const uint8 FLAG_SYN = 0x02; 133 const uint8 FLAG_ACK = 0x10; 134 */ 135 136 const uint32 CTRL_BOUND = 0x80000000; 137 138 const long DEFAULT_TIMEOUT = 4000; // If there are no pending clocks, wake up every 4 seconds 139 const long CLOSED_TIMEOUT = 60 * 1000; // If the connection is closed, once per minute 140 141 #if PSEUDO_KEEPALIVE 142 // !?! Rethink these times 143 const uint32 IDLE_PING = 20 * 1000; // 20 seconds (note: WinXP SP2 firewall udp timeout is 90 seconds) 144 const uint32 IDLE_TIMEOUT = 90 * 1000; // 90 seconds; 145 #endif // PSEUDO_KEEPALIVE 146 147 ////////////////////////////////////////////////////////////////////// 148 // Helper Functions 149 ////////////////////////////////////////////////////////////////////// 150 151 inline void long_to_bytes(uint32 val, void* buf) { 152 *static_cast<uint32*>(buf) = talk_base::HostToNetwork32(val); 153 } 154 155 inline void short_to_bytes(uint16 val, void* buf) { 156 *static_cast<uint16*>(buf) = talk_base::HostToNetwork16(val); 157 } 158 159 inline uint32 bytes_to_long(const void* buf) { 160 return talk_base::NetworkToHost32(*static_cast<const uint32*>(buf)); 161 } 162 163 inline uint16 bytes_to_short(const void* buf) { 164 return talk_base::NetworkToHost16(*static_cast<const uint16*>(buf)); 165 } 166 167 uint32 bound(uint32 lower, uint32 middle, uint32 upper) { 168 return talk_base::_min(talk_base::_max(lower, middle), upper); 169 } 170 171 ////////////////////////////////////////////////////////////////////// 172 // Debugging Statistics 173 ////////////////////////////////////////////////////////////////////// 174 175 #if 0 // Not used yet 176 177 enum Stat { 178 S_SENT_PACKET, // All packet sends 179 S_RESENT_PACKET, // All packet sends that are retransmits 180 S_RECV_PACKET, // All packet receives 181 S_RECV_NEW, // All packet receives that are too new 182 S_RECV_OLD, // All packet receives that are too old 183 S_NUM_STATS 184 }; 185 186 const char* const STAT_NAMES[S_NUM_STATS] = { 187 "snt", 188 "snt-r", 189 "rcv" 190 "rcv-n", 191 "rcv-o" 192 }; 193 194 int g_stats[S_NUM_STATS]; 195 inline void Incr(Stat s) { ++g_stats[s]; } 196 void ReportStats() { 197 char buffer[256]; 198 size_t len = 0; 199 for (int i = 0; i < S_NUM_STATS; ++i) { 200 len += talk_base::sprintfn(buffer, ARRAY_SIZE(buffer), "%s%s:%d", 201 (i == 0) ? "" : ",", STAT_NAMES[i], g_stats[i]); 202 g_stats[i] = 0; 203 } 204 LOG(LS_INFO) << "Stats[" << buffer << "]"; 205 } 206 207 #endif 208 209 ////////////////////////////////////////////////////////////////////// 210 // PseudoTcp 211 ////////////////////////////////////////////////////////////////////// 212 213 uint32 PseudoTcp::Now() { 214 #if 0 // Use this to synchronize timers with logging timestamps (easier debug) 215 return talk_base::TimeSince(StartTime()); 216 #else 217 return talk_base::Time(); 218 #endif 219 } 220 221 PseudoTcp::PseudoTcp(IPseudoTcpNotify* notify, uint32 conv) 222 : m_notify(notify), m_shutdown(SD_NONE), m_error(0) { 223 224 // Sanity check on buffer sizes (needed for OnTcpWriteable notification logic) 225 ASSERT(sizeof(m_rbuf) + MIN_PACKET < sizeof(m_sbuf)); 226 227 uint32 now = Now(); 228 229 m_state = TCP_LISTEN; 230 m_conv = conv; 231 m_rcv_wnd = sizeof(m_rbuf); 232 m_snd_nxt = m_slen = 0; 233 m_snd_wnd = 1; 234 m_snd_una = m_rcv_nxt = m_rlen = 0; 235 m_bReadEnable = true; 236 m_bWriteEnable = false; 237 m_t_ack = 0; 238 239 m_msslevel = 0; 240 m_largest = 0; 241 ASSERT(MIN_PACKET > PACKET_OVERHEAD); 242 m_mss = MIN_PACKET - PACKET_OVERHEAD; 243 m_mtu_advise = MAX_PACKET; 244 245 m_rto_base = 0; 246 247 m_cwnd = 2 * m_mss; 248 m_ssthresh = sizeof(m_rbuf); 249 m_lastrecv = m_lastsend = m_lasttraffic = now; 250 m_bOutgoing = false; 251 252 m_dup_acks = 0; 253 m_recover = 0; 254 255 m_ts_recent = m_ts_lastack = 0; 256 257 m_rx_rto = DEF_RTO; 258 m_rx_srtt = m_rx_rttvar = 0; 259 260 m_use_nagling = true; 261 m_ack_delay = DEF_ACK_DELAY; 262 } 263 264 PseudoTcp::~PseudoTcp() { 265 } 266 267 int PseudoTcp::Connect() { 268 if (m_state != TCP_LISTEN) { 269 m_error = EINVAL; 270 return -1; 271 } 272 273 m_state = TCP_SYN_SENT; 274 LOG(LS_INFO) << "State: TCP_SYN_SENT"; 275 276 char buffer[1]; 277 buffer[0] = CTL_CONNECT; 278 queue(buffer, 1, true); 279 attemptSend(); 280 281 return 0; 282 } 283 284 void PseudoTcp::NotifyMTU(uint16 mtu) { 285 m_mtu_advise = mtu; 286 if (m_state == TCP_ESTABLISHED) { 287 adjustMTU(); 288 } 289 } 290 291 void PseudoTcp::NotifyClock(uint32 now) { 292 if (m_state == TCP_CLOSED) 293 return; 294 295 // Check if it's time to retransmit a segment 296 if (m_rto_base && (talk_base::TimeDiff(m_rto_base + m_rx_rto, now) <= 0)) { 297 if (m_slist.empty()) { 298 ASSERT(false); 299 } else { 300 // Note: (m_slist.front().xmit == 0)) { 301 // retransmit segments 302 #if _DEBUGMSG >= _DBG_NORMAL 303 LOG(LS_INFO) << "timeout retransmit (rto: " << m_rx_rto 304 << ") (rto_base: " << m_rto_base 305 << ") (now: " << now 306 << ") (dup_acks: " << static_cast<unsigned>(m_dup_acks) 307 << ")"; 308 #endif // _DEBUGMSG 309 if (!transmit(m_slist.begin(), now)) { 310 closedown(ECONNABORTED); 311 return; 312 } 313 314 uint32 nInFlight = m_snd_nxt - m_snd_una; 315 m_ssthresh = talk_base::_max(nInFlight / 2, 2 * m_mss); 316 //LOG(LS_INFO) << "m_ssthresh: " << m_ssthresh << " nInFlight: " << nInFlight << " m_mss: " << m_mss; 317 m_cwnd = m_mss; 318 319 // Back off retransmit timer. Note: the limit is lower when connecting. 320 uint32 rto_limit = (m_state < TCP_ESTABLISHED) ? DEF_RTO : MAX_RTO; 321 m_rx_rto = talk_base::_min(rto_limit, m_rx_rto * 2); 322 m_rto_base = now; 323 } 324 } 325 326 // Check if it's time to probe closed windows 327 if ((m_snd_wnd == 0) 328 && (talk_base::TimeDiff(m_lastsend + m_rx_rto, now) <= 0)) { 329 if (talk_base::TimeDiff(now, m_lastrecv) >= 15000) { 330 closedown(ECONNABORTED); 331 return; 332 } 333 334 // probe the window 335 packet(m_snd_nxt - 1, 0, 0, 0); 336 m_lastsend = now; 337 338 // back off retransmit timer 339 m_rx_rto = talk_base::_min(MAX_RTO, m_rx_rto * 2); 340 } 341 342 // Check if it's time to send delayed acks 343 if (m_t_ack && (talk_base::TimeDiff(m_t_ack + m_ack_delay, now) <= 0)) { 344 packet(m_snd_nxt, 0, 0, 0); 345 } 346 347 #if PSEUDO_KEEPALIVE 348 // Check for idle timeout 349 if ((m_state == TCP_ESTABLISHED) && (TimeDiff(m_lastrecv + IDLE_TIMEOUT, now) <= 0)) { 350 closedown(ECONNABORTED); 351 return; 352 } 353 354 // Check for ping timeout (to keep udp mapping open) 355 if ((m_state == TCP_ESTABLISHED) && (TimeDiff(m_lasttraffic + (m_bOutgoing ? IDLE_PING * 3/2 : IDLE_PING), now) <= 0)) { 356 packet(m_snd_nxt, 0, 0, 0); 357 } 358 #endif // PSEUDO_KEEPALIVE 359 } 360 361 bool PseudoTcp::NotifyPacket(const char* buffer, size_t len) { 362 if (len > MAX_PACKET) { 363 LOG_F(WARNING) << "packet too large"; 364 return false; 365 } 366 return parse(reinterpret_cast<const uint8 *>(buffer), uint32(len)); 367 } 368 369 bool PseudoTcp::GetNextClock(uint32 now, long& timeout) { 370 return clock_check(now, timeout); 371 } 372 373 void PseudoTcp::GetOption(Option opt, int* value) { 374 if (opt == OPT_NODELAY) { 375 *value = m_use_nagling ? 0 : 1; 376 } else if (opt == OPT_ACKDELAY) { 377 *value = m_ack_delay; 378 } else { 379 ASSERT(false); 380 } 381 } 382 383 void PseudoTcp::SetOption(Option opt, int value) { 384 if (opt == OPT_NODELAY) { 385 m_use_nagling = value == 0; 386 } else if (opt == OPT_ACKDELAY) { 387 m_ack_delay = value; 388 } else { 389 ASSERT(false); 390 } 391 } 392 393 // 394 // IPStream Implementation 395 // 396 397 int PseudoTcp::Recv(char* buffer, size_t len) { 398 if (m_state != TCP_ESTABLISHED) { 399 m_error = ENOTCONN; 400 return SOCKET_ERROR; 401 } 402 403 if (m_rlen == 0) { 404 m_bReadEnable = true; 405 m_error = EWOULDBLOCK; 406 return SOCKET_ERROR; 407 } 408 409 uint32 read = talk_base::_min(uint32(len), m_rlen); 410 memcpy(buffer, m_rbuf, read); 411 m_rlen -= read; 412 413 // !?! until we create a circular buffer, we need to move all of the rest of the buffer up! 414 memmove(m_rbuf, m_rbuf + read, sizeof(m_rbuf) - read/*m_rlen*/); 415 416 if ((sizeof(m_rbuf) - m_rlen - m_rcv_wnd) 417 >= talk_base::_min<uint32>(sizeof(m_rbuf) / 2, m_mss)) { 418 bool bWasClosed = (m_rcv_wnd == 0); // !?! Not sure about this was closed business 419 420 m_rcv_wnd = sizeof(m_rbuf) - m_rlen; 421 422 if (bWasClosed) { 423 attemptSend(sfImmediateAck); 424 } 425 } 426 427 return read; 428 } 429 430 int PseudoTcp::Send(const char* buffer, size_t len) { 431 if (m_state != TCP_ESTABLISHED) { 432 m_error = ENOTCONN; 433 return SOCKET_ERROR; 434 } 435 436 if (m_slen == sizeof(m_sbuf)) { 437 m_bWriteEnable = true; 438 m_error = EWOULDBLOCK; 439 return SOCKET_ERROR; 440 } 441 442 int written = queue(buffer, uint32(len), false); 443 attemptSend(); 444 return written; 445 } 446 447 void PseudoTcp::Close(bool force) { 448 LOG_F(LS_VERBOSE) << "(" << (force ? "true" : "false") << ")"; 449 m_shutdown = force ? SD_FORCEFUL : SD_GRACEFUL; 450 } 451 452 int PseudoTcp::GetError() { 453 return m_error; 454 } 455 456 // 457 // Internal Implementation 458 // 459 460 uint32 PseudoTcp::queue(const char* data, uint32 len, bool bCtrl) { 461 if (len > sizeof(m_sbuf) - m_slen) { 462 ASSERT(!bCtrl); 463 len = sizeof(m_sbuf) - m_slen; 464 } 465 466 // We can concatenate data if the last segment is the same type 467 // (control v. regular data), and has not been transmitted yet 468 if (!m_slist.empty() && (m_slist.back().bCtrl == bCtrl) && (m_slist.back().xmit == 0)) { 469 m_slist.back().len += len; 470 } else { 471 SSegment sseg(m_snd_una + m_slen, len, bCtrl); 472 m_slist.push_back(sseg); 473 } 474 475 memcpy(m_sbuf + m_slen, data, len); 476 m_slen += len; 477 //LOG(LS_INFO) << "PseudoTcp::queue - m_slen = " << m_slen; 478 return len; 479 } 480 481 IPseudoTcpNotify::WriteResult PseudoTcp::packet(uint32 seq, uint8 flags, 482 const char* data, uint32 len) { 483 ASSERT(HEADER_SIZE + len <= MAX_PACKET); 484 485 uint32 now = Now(); 486 487 uint8 buffer[MAX_PACKET]; 488 long_to_bytes(m_conv, buffer); 489 long_to_bytes(seq, buffer + 4); 490 long_to_bytes(m_rcv_nxt, buffer + 8); 491 buffer[12] = 0; 492 buffer[13] = flags; 493 short_to_bytes(uint16(m_rcv_wnd), buffer + 14); 494 495 // Timestamp computations 496 long_to_bytes(now, buffer + 16); 497 long_to_bytes(m_ts_recent, buffer + 20); 498 m_ts_lastack = m_rcv_nxt; 499 500 memcpy(buffer + HEADER_SIZE, data, len); 501 502 #if _DEBUGMSG >= _DBG_VERBOSE 503 LOG(LS_INFO) << "<-- <CONV=" << m_conv 504 << "><FLG=" << static_cast<unsigned>(flags) 505 << "><SEQ=" << seq << ":" << seq + len 506 << "><ACK=" << m_rcv_nxt 507 << "><WND=" << m_rcv_wnd 508 << "><TS=" << (now % 10000) 509 << "><TSR=" << (m_ts_recent % 10000) 510 << "><LEN=" << len << ">"; 511 #endif // _DEBUGMSG 512 513 IPseudoTcpNotify::WriteResult wres = m_notify->TcpWritePacket(this, reinterpret_cast<char *>(buffer), len + HEADER_SIZE); 514 // Note: When data is NULL, this is an ACK packet. We don't read the return value for those, 515 // and thus we won't retry. So go ahead and treat the packet as a success (basically simulate 516 // as if it were dropped), which will prevent our timers from being messed up. 517 if ((wres != IPseudoTcpNotify::WR_SUCCESS) && (NULL != data)) 518 return wres; 519 520 m_t_ack = 0; 521 if (len > 0) { 522 m_lastsend = now; 523 } 524 m_lasttraffic = now; 525 m_bOutgoing = true; 526 527 return IPseudoTcpNotify::WR_SUCCESS; 528 } 529 530 bool PseudoTcp::parse(const uint8* buffer, uint32 size) { 531 if (size < 12) 532 return false; 533 534 Segment seg; 535 seg.conv = bytes_to_long(buffer); 536 seg.seq = bytes_to_long(buffer + 4); 537 seg.ack = bytes_to_long(buffer + 8); 538 seg.flags = buffer[13]; 539 seg.wnd = bytes_to_short(buffer + 14); 540 541 seg.tsval = bytes_to_long(buffer + 16); 542 seg.tsecr = bytes_to_long(buffer + 20); 543 544 seg.data = reinterpret_cast<const char *>(buffer) + HEADER_SIZE; 545 seg.len = size - HEADER_SIZE; 546 547 #if _DEBUGMSG >= _DBG_VERBOSE 548 LOG(LS_INFO) << "--> <CONV=" << seg.conv 549 << "><FLG=" << static_cast<unsigned>(seg.flags) 550 << "><SEQ=" << seg.seq << ":" << seg.seq + seg.len 551 << "><ACK=" << seg.ack 552 << "><WND=" << seg.wnd 553 << "><TS=" << (seg.tsval % 10000) 554 << "><TSR=" << (seg.tsecr % 10000) 555 << "><LEN=" << seg.len << ">"; 556 #endif // _DEBUGMSG 557 558 return process(seg); 559 } 560 561 bool PseudoTcp::clock_check(uint32 now, long& nTimeout) { 562 if (m_shutdown == SD_FORCEFUL) 563 return false; 564 565 if ((m_shutdown == SD_GRACEFUL) 566 && ((m_state != TCP_ESTABLISHED) 567 || ((m_slen == 0) && (m_t_ack == 0)))) { 568 return false; 569 } 570 571 if (m_state == TCP_CLOSED) { 572 nTimeout = CLOSED_TIMEOUT; 573 return true; 574 } 575 576 nTimeout = DEFAULT_TIMEOUT; 577 578 if (m_t_ack) { 579 nTimeout = talk_base::_min<int32>(nTimeout, 580 talk_base::TimeDiff(m_t_ack + m_ack_delay, now)); 581 } 582 if (m_rto_base) { 583 nTimeout = talk_base::_min<int32>(nTimeout, 584 talk_base::TimeDiff(m_rto_base + m_rx_rto, now)); 585 } 586 if (m_snd_wnd == 0) { 587 nTimeout = talk_base::_min<int32>(nTimeout, talk_base::TimeDiff(m_lastsend + m_rx_rto, now)); 588 } 589 #if PSEUDO_KEEPALIVE 590 if (m_state == TCP_ESTABLISHED) { 591 nTimeout = talk_base::_min<int32>(nTimeout, 592 talk_base::TimeDiff(m_lasttraffic + (m_bOutgoing ? IDLE_PING * 3/2 : IDLE_PING), now)); 593 } 594 #endif // PSEUDO_KEEPALIVE 595 return true; 596 } 597 598 bool PseudoTcp::process(Segment& seg) { 599 // If this is the wrong conversation, send a reset!?! (with the correct conversation?) 600 if (seg.conv != m_conv) { 601 //if ((seg.flags & FLAG_RST) == 0) { 602 // packet(tcb, seg.ack, 0, FLAG_RST, 0, 0); 603 //} 604 LOG_F(LS_ERROR) << "wrong conversation"; 605 return false; 606 } 607 608 uint32 now = Now(); 609 m_lasttraffic = m_lastrecv = now; 610 m_bOutgoing = false; 611 612 if (m_state == TCP_CLOSED) { 613 // !?! send reset? 614 LOG_F(LS_ERROR) << "closed"; 615 return false; 616 } 617 618 // Check if this is a reset segment 619 if (seg.flags & FLAG_RST) { 620 closedown(ECONNRESET); 621 return false; 622 } 623 624 // Check for control data 625 bool bConnect = false; 626 if (seg.flags & FLAG_CTL) { 627 if (seg.len == 0) { 628 LOG_F(LS_ERROR) << "Missing control code"; 629 return false; 630 } else if (seg.data[0] == CTL_CONNECT) { 631 bConnect = true; 632 if (m_state == TCP_LISTEN) { 633 m_state = TCP_SYN_RECEIVED; 634 LOG(LS_INFO) << "State: TCP_SYN_RECEIVED"; 635 //m_notify->associate(addr); 636 char buffer[1]; 637 buffer[0] = CTL_CONNECT; 638 queue(buffer, 1, true); 639 } else if (m_state == TCP_SYN_SENT) { 640 m_state = TCP_ESTABLISHED; 641 LOG(LS_INFO) << "State: TCP_ESTABLISHED"; 642 adjustMTU(); 643 if (m_notify) { 644 m_notify->OnTcpOpen(this); 645 } 646 //notify(evOpen); 647 } 648 } else { 649 LOG_F(LS_WARNING) << "Unknown control code: " << seg.data[0]; 650 return false; 651 } 652 } 653 654 // Update timestamp 655 if ((seg.seq <= m_ts_lastack) && (m_ts_lastack < seg.seq + seg.len)) { 656 m_ts_recent = seg.tsval; 657 } 658 659 // Check if this is a valuable ack 660 if ((seg.ack > m_snd_una) && (seg.ack <= m_snd_nxt)) { 661 // Calculate round-trip time 662 if (seg.tsecr) { 663 long rtt = talk_base::TimeDiff(now, seg.tsecr); 664 if (rtt >= 0) { 665 if (m_rx_srtt == 0) { 666 m_rx_srtt = rtt; 667 m_rx_rttvar = rtt / 2; 668 } else { 669 m_rx_rttvar = (3 * m_rx_rttvar + abs(long(rtt - m_rx_srtt))) / 4; 670 m_rx_srtt = (7 * m_rx_srtt + rtt) / 8; 671 } 672 m_rx_rto = bound(MIN_RTO, m_rx_srtt + 673 talk_base::_max<uint32>(1, 4 * m_rx_rttvar), MAX_RTO); 674 #if _DEBUGMSG >= _DBG_VERBOSE 675 LOG(LS_INFO) << "rtt: " << rtt 676 << " srtt: " << m_rx_srtt 677 << " rto: " << m_rx_rto; 678 #endif // _DEBUGMSG 679 } else { 680 ASSERT(false); 681 } 682 } 683 684 m_snd_wnd = seg.wnd; 685 686 uint32 nAcked = seg.ack - m_snd_una; 687 m_snd_una = seg.ack; 688 689 m_rto_base = (m_snd_una == m_snd_nxt) ? 0 : now; 690 691 m_slen -= nAcked; 692 memmove(m_sbuf, m_sbuf + nAcked, m_slen); 693 //LOG(LS_INFO) << "PseudoTcp::process - m_slen = " << m_slen; 694 695 for (uint32 nFree = nAcked; nFree > 0; ) { 696 ASSERT(!m_slist.empty()); 697 if (nFree < m_slist.front().len) { 698 m_slist.front().len -= nFree; 699 nFree = 0; 700 } else { 701 if (m_slist.front().len > m_largest) { 702 m_largest = m_slist.front().len; 703 } 704 nFree -= m_slist.front().len; 705 m_slist.pop_front(); 706 } 707 } 708 709 if (m_dup_acks >= 3) { 710 if (m_snd_una >= m_recover) { // NewReno 711 uint32 nInFlight = m_snd_nxt - m_snd_una; 712 m_cwnd = talk_base::_min(m_ssthresh, nInFlight + m_mss); // (Fast Retransmit) 713 #if _DEBUGMSG >= _DBG_NORMAL 714 LOG(LS_INFO) << "exit recovery"; 715 #endif // _DEBUGMSG 716 m_dup_acks = 0; 717 } else { 718 #if _DEBUGMSG >= _DBG_NORMAL 719 LOG(LS_INFO) << "recovery retransmit"; 720 #endif // _DEBUGMSG 721 if (!transmit(m_slist.begin(), now)) { 722 closedown(ECONNABORTED); 723 return false; 724 } 725 m_cwnd += m_mss - talk_base::_min(nAcked, m_cwnd); 726 } 727 } else { 728 m_dup_acks = 0; 729 // Slow start, congestion avoidance 730 if (m_cwnd < m_ssthresh) { 731 m_cwnd += m_mss; 732 } else { 733 m_cwnd += talk_base::_max<uint32>(1, m_mss * m_mss / m_cwnd); 734 } 735 } 736 737 // !?! A bit hacky 738 if ((m_state == TCP_SYN_RECEIVED) && !bConnect) { 739 m_state = TCP_ESTABLISHED; 740 LOG(LS_INFO) << "State: TCP_ESTABLISHED"; 741 adjustMTU(); 742 if (m_notify) { 743 m_notify->OnTcpOpen(this); 744 } 745 //notify(evOpen); 746 } 747 748 // If we make room in the send queue, notify the user 749 // The goal it to make sure we always have at least enough data to fill the 750 // window. We'd like to notify the app when we are halfway to that point. 751 const uint32 kIdealRefillSize = (sizeof(m_sbuf) + sizeof(m_rbuf)) / 2; 752 if (m_bWriteEnable && (m_slen < kIdealRefillSize)) { 753 m_bWriteEnable = false; 754 if (m_notify) { 755 m_notify->OnTcpWriteable(this); 756 } 757 //notify(evWrite); 758 } 759 } else if (seg.ack == m_snd_una) { 760 // !?! Note, tcp says don't do this... but otherwise how does a closed window become open? 761 m_snd_wnd = seg.wnd; 762 763 // Check duplicate acks 764 if (seg.len > 0) { 765 // it's a dup ack, but with a data payload, so don't modify m_dup_acks 766 } else if (m_snd_una != m_snd_nxt) { 767 m_dup_acks += 1; 768 if (m_dup_acks == 3) { // (Fast Retransmit) 769 #if _DEBUGMSG >= _DBG_NORMAL 770 LOG(LS_INFO) << "enter recovery"; 771 LOG(LS_INFO) << "recovery retransmit"; 772 #endif // _DEBUGMSG 773 if (!transmit(m_slist.begin(), now)) { 774 closedown(ECONNABORTED); 775 return false; 776 } 777 m_recover = m_snd_nxt; 778 uint32 nInFlight = m_snd_nxt - m_snd_una; 779 m_ssthresh = talk_base::_max(nInFlight / 2, 2 * m_mss); 780 //LOG(LS_INFO) << "m_ssthresh: " << m_ssthresh << " nInFlight: " << nInFlight << " m_mss: " << m_mss; 781 m_cwnd = m_ssthresh + 3 * m_mss; 782 } else if (m_dup_acks > 3) { 783 m_cwnd += m_mss; 784 } 785 } else { 786 m_dup_acks = 0; 787 } 788 } 789 790 // Conditions were acks must be sent: 791 // 1) Segment is too old (they missed an ACK) (immediately) 792 // 2) Segment is too new (we missed a segment) (immediately) 793 // 3) Segment has data (so we need to ACK!) (delayed) 794 // ... so the only time we don't need to ACK, is an empty segment that points to rcv_nxt! 795 796 SendFlags sflags = sfNone; 797 if (seg.seq != m_rcv_nxt) { 798 sflags = sfImmediateAck; // (Fast Recovery) 799 } else if (seg.len != 0) { 800 if (m_ack_delay == 0) { 801 sflags = sfImmediateAck; 802 } else { 803 sflags = sfDelayedAck; 804 } 805 } 806 #if _DEBUGMSG >= _DBG_NORMAL 807 if (sflags == sfImmediateAck) { 808 if (seg.seq > m_rcv_nxt) { 809 LOG_F(LS_INFO) << "too new"; 810 } else if (seg.seq + seg.len <= m_rcv_nxt) { 811 LOG_F(LS_INFO) << "too old"; 812 } 813 } 814 #endif // _DEBUGMSG 815 816 // Adjust the incoming segment to fit our receive buffer 817 if (seg.seq < m_rcv_nxt) { 818 uint32 nAdjust = m_rcv_nxt - seg.seq; 819 if (nAdjust < seg.len) { 820 seg.seq += nAdjust; 821 seg.data += nAdjust; 822 seg.len -= nAdjust; 823 } else { 824 seg.len = 0; 825 } 826 } 827 if ((seg.seq + seg.len - m_rcv_nxt) > (sizeof(m_rbuf) - m_rlen)) { 828 uint32 nAdjust = seg.seq + seg.len - m_rcv_nxt - (sizeof(m_rbuf) - m_rlen); 829 if (nAdjust < seg.len) { 830 seg.len -= nAdjust; 831 } else { 832 seg.len = 0; 833 } 834 } 835 836 bool bIgnoreData = (seg.flags & FLAG_CTL) || (m_shutdown != SD_NONE); 837 bool bNewData = false; 838 839 if (seg.len > 0) { 840 if (bIgnoreData) { 841 if (seg.seq == m_rcv_nxt) { 842 m_rcv_nxt += seg.len; 843 } 844 } else { 845 uint32 nOffset = seg.seq - m_rcv_nxt; 846 memcpy(m_rbuf + m_rlen + nOffset, seg.data, seg.len); 847 if (seg.seq == m_rcv_nxt) { 848 m_rlen += seg.len; 849 m_rcv_nxt += seg.len; 850 m_rcv_wnd -= seg.len; 851 bNewData = true; 852 853 RList::iterator it = m_rlist.begin(); 854 while ((it != m_rlist.end()) && (it->seq <= m_rcv_nxt)) { 855 if (it->seq + it->len > m_rcv_nxt) { 856 sflags = sfImmediateAck; // (Fast Recovery) 857 uint32 nAdjust = (it->seq + it->len) - m_rcv_nxt; 858 #if _DEBUGMSG >= _DBG_NORMAL 859 LOG(LS_INFO) << "Recovered " << nAdjust << " bytes (" << m_rcv_nxt << " -> " << m_rcv_nxt + nAdjust << ")"; 860 #endif // _DEBUGMSG 861 m_rlen += nAdjust; 862 m_rcv_nxt += nAdjust; 863 m_rcv_wnd -= nAdjust; 864 } 865 it = m_rlist.erase(it); 866 } 867 } else { 868 #if _DEBUGMSG >= _DBG_NORMAL 869 LOG(LS_INFO) << "Saving " << seg.len << " bytes (" << seg.seq << " -> " << seg.seq + seg.len << ")"; 870 #endif // _DEBUGMSG 871 RSegment rseg; 872 rseg.seq = seg.seq; 873 rseg.len = seg.len; 874 RList::iterator it = m_rlist.begin(); 875 while ((it != m_rlist.end()) && (it->seq < rseg.seq)) { 876 ++it; 877 } 878 m_rlist.insert(it, rseg); 879 } 880 } 881 } 882 883 attemptSend(sflags); 884 885 // If we have new data, notify the user 886 if (bNewData && m_bReadEnable) { 887 m_bReadEnable = false; 888 if (m_notify) { 889 m_notify->OnTcpReadable(this); 890 } 891 //notify(evRead); 892 } 893 894 return true; 895 } 896 897 bool PseudoTcp::transmit(const SList::iterator& seg, uint32 now) { 898 if (seg->xmit >= ((m_state == TCP_ESTABLISHED) ? 15 : 30)) { 899 LOG_F(LS_VERBOSE) << "too many retransmits"; 900 return false; 901 } 902 903 uint32 nTransmit = talk_base::_min(seg->len, m_mss); 904 905 while (true) { 906 uint32 seq = seg->seq; 907 uint8 flags = (seg->bCtrl ? FLAG_CTL : 0); 908 const char* buffer = m_sbuf + (seg->seq - m_snd_una); 909 IPseudoTcpNotify::WriteResult wres = this->packet(seq, flags, buffer, nTransmit); 910 911 if (wres == IPseudoTcpNotify::WR_SUCCESS) 912 break; 913 914 if (wres == IPseudoTcpNotify::WR_FAIL) { 915 LOG_F(LS_VERBOSE) << "packet failed"; 916 return false; 917 } 918 919 ASSERT(wres == IPseudoTcpNotify::WR_TOO_LARGE); 920 921 while (true) { 922 if (PACKET_MAXIMUMS[m_msslevel + 1] == 0) { 923 LOG_F(LS_VERBOSE) << "MTU too small"; 924 return false; 925 } 926 // !?! We need to break up all outstanding and pending packets and then retransmit!?! 927 928 m_mss = PACKET_MAXIMUMS[++m_msslevel] - PACKET_OVERHEAD; 929 m_cwnd = 2 * m_mss; // I added this... haven't researched actual formula 930 if (m_mss < nTransmit) { 931 nTransmit = m_mss; 932 break; 933 } 934 } 935 #if _DEBUGMSG >= _DBG_NORMAL 936 LOG(LS_INFO) << "Adjusting mss to " << m_mss << " bytes"; 937 #endif // _DEBUGMSG 938 } 939 940 if (nTransmit < seg->len) { 941 LOG_F(LS_VERBOSE) << "mss reduced to " << m_mss; 942 943 SSegment subseg(seg->seq + nTransmit, seg->len - nTransmit, seg->bCtrl); 944 //subseg.tstamp = seg->tstamp; 945 subseg.xmit = seg->xmit; 946 seg->len = nTransmit; 947 948 SList::iterator next = seg; 949 m_slist.insert(++next, subseg); 950 } 951 952 if (seg->xmit == 0) { 953 m_snd_nxt += seg->len; 954 } 955 seg->xmit += 1; 956 //seg->tstamp = now; 957 if (m_rto_base == 0) { 958 m_rto_base = now; 959 } 960 961 return true; 962 } 963 964 void PseudoTcp::attemptSend(SendFlags sflags) { 965 uint32 now = Now(); 966 967 if (talk_base::TimeDiff(now, m_lastsend) > static_cast<long>(m_rx_rto)) { 968 m_cwnd = m_mss; 969 } 970 971 #if _DEBUGMSG 972 bool bFirst = true; 973 UNUSED(bFirst); 974 #endif // _DEBUGMSG 975 976 while (true) { 977 uint32 cwnd = m_cwnd; 978 if ((m_dup_acks == 1) || (m_dup_acks == 2)) { // Limited Transmit 979 cwnd += m_dup_acks * m_mss; 980 } 981 uint32 nWindow = talk_base::_min(m_snd_wnd, cwnd); 982 uint32 nInFlight = m_snd_nxt - m_snd_una; 983 uint32 nUseable = (nInFlight < nWindow) ? (nWindow - nInFlight) : 0; 984 985 uint32 nAvailable = talk_base::_min(m_slen - nInFlight, m_mss); 986 987 if (nAvailable > nUseable) { 988 if (nUseable * 4 < nWindow) { 989 // RFC 813 - avoid SWS 990 nAvailable = 0; 991 } else { 992 nAvailable = nUseable; 993 } 994 } 995 996 #if _DEBUGMSG >= _DBG_VERBOSE 997 if (bFirst) { 998 bFirst = false; 999 LOG(LS_INFO) << "[cwnd: " << m_cwnd 1000 << " nWindow: " << nWindow 1001 << " nInFlight: " << nInFlight 1002 << " nAvailable: " << nAvailable 1003 << " nQueued: " << m_slen - nInFlight 1004 << " nEmpty: " << sizeof(m_sbuf) - m_slen 1005 << " ssthresh: " << m_ssthresh << "]"; 1006 } 1007 #endif // _DEBUGMSG 1008 1009 if (nAvailable == 0) { 1010 if (sflags == sfNone) 1011 return; 1012 1013 // If this is an immediate ack, or the second delayed ack 1014 if ((sflags == sfImmediateAck) || m_t_ack) { 1015 packet(m_snd_nxt, 0, 0, 0); 1016 } else { 1017 m_t_ack = Now(); 1018 } 1019 return; 1020 } 1021 1022 // Nagle's algorithm. 1023 // If there is data already in-flight, and we haven't a full segment of 1024 // data ready to send then hold off until we get more to send, or the 1025 // in-flight data is acknowledged. 1026 if (m_use_nagling && (m_snd_nxt > m_snd_una) && (nAvailable < m_mss)) { 1027 return; 1028 } 1029 1030 // Find the next segment to transmit 1031 SList::iterator it = m_slist.begin(); 1032 while (it->xmit > 0) { 1033 ++it; 1034 ASSERT(it != m_slist.end()); 1035 } 1036 SList::iterator seg = it; 1037 1038 // If the segment is too large, break it into two 1039 if (seg->len > nAvailable) { 1040 SSegment subseg(seg->seq + nAvailable, seg->len - nAvailable, seg->bCtrl); 1041 seg->len = nAvailable; 1042 m_slist.insert(++it, subseg); 1043 } 1044 1045 if (!transmit(seg, now)) { 1046 LOG_F(LS_VERBOSE) << "transmit failed"; 1047 // TODO: consider closing socket 1048 return; 1049 } 1050 1051 sflags = sfNone; 1052 } 1053 } 1054 1055 void 1056 PseudoTcp::closedown(uint32 err) { 1057 m_slen = 0; 1058 1059 LOG(LS_INFO) << "State: TCP_CLOSED"; 1060 m_state = TCP_CLOSED; 1061 if (m_notify) { 1062 m_notify->OnTcpClosed(this, err); 1063 } 1064 //notify(evClose, err); 1065 } 1066 1067 void 1068 PseudoTcp::adjustMTU() { 1069 // Determine our current mss level, so that we can adjust appropriately later 1070 for (m_msslevel = 0; PACKET_MAXIMUMS[m_msslevel + 1] > 0; ++m_msslevel) { 1071 if (static_cast<uint16>(PACKET_MAXIMUMS[m_msslevel]) <= m_mtu_advise) { 1072 break; 1073 } 1074 } 1075 m_mss = m_mtu_advise - PACKET_OVERHEAD; 1076 // !?! Should we reset m_largest here? 1077 #if _DEBUGMSG >= _DBG_NORMAL 1078 LOG(LS_INFO) << "Adjusting mss to " << m_mss << " bytes"; 1079 #endif // _DEBUGMSG 1080 // Enforce minimums on ssthresh and cwnd 1081 m_ssthresh = talk_base::_max(m_ssthresh, 2 * m_mss); 1082 m_cwnd = talk_base::_max(m_cwnd, m_mss); 1083 } 1084 1085 } // namespace cricket 1086