sys/netinet/tcp_timer.c

   1 /*-
   2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
   3  *      The Regents of the University of California.  All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  * 4. Neither the name of the University nor the names of its contributors
  14  *    may be used to endorse or promote products derived from this software
  15  *    without specific prior written permission.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  27  * SUCH DAMAGE.
  28  *
  29  *      @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
  30  * $FreeBSD$
  31  */
  32
  33 #include "opt_inet6.h"
  34 #include "opt_tcpdebug.h"
  35 #include "opt_tcp_sack.h"
  36
  37 #include <sys/param.h>
  38 #include <sys/kernel.h>
  39 #include <sys/lock.h>
  40 #include <sys/mbuf.h>
  41 #include <sys/mutex.h>
  42 #include <sys/protosw.h>
  43 #include <sys/socket.h>
  44 #include <sys/socketvar.h>
  45 #include <sys/sysctl.h>
  46 #include <sys/systm.h>
  47
  48 #include <net/route.h>
  49
  50 #include <netinet/in.h>
  51 #include <netinet/in_pcb.h>
  52 #include <netinet/in_systm.h>
  53 #ifdef INET6
  54 #include <netinet6/in6_pcb.h>
  55 #endif
  56 #include <netinet/ip_var.h>
  57 #include <netinet/tcp.h>
  58 #include <netinet/tcp_fsm.h>
  59 #include <netinet/tcp_timer.h>
  60 #include <netinet/tcp_var.h>
  61 #include <netinet/tcpip.h>
  62 #ifdef TCPDEBUG
  63 #include <netinet/tcp_debug.h>
  64 #endif
  65
  66 int     tcp_keepinit;
  67 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
  68     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "");
  69
  70 int     tcp_keepidle;
  71 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
  72     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "");
  73
  74 int     tcp_keepintvl;
  75 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
  76     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "");
  77
  78 int     tcp_delacktime;
  79 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime,
  80     CTLTYPE_INT|CTLFLAG_RW, &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
  81     "Time before a delayed ACK is sent");
  82
  83 int     tcp_msl;
  84 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
  85     &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
  86
  87 int     tcp_rexmit_min;
  88 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
  89     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I", "Minimum Retransmission Timeout");
  90
  91 int     tcp_rexmit_slop;
  92 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
  93     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I", "Retransmission Timer Slop");
  94
  95 static int      always_keepalive = 1;
  96 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
  97     &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
  98
  99 static int      tcp_keepcnt = TCPTV_KEEPCNT;
 100         /* max idle probes */
 101 int     tcp_maxpersistidle;
 102         /* max idle time in persist */
 103 int     tcp_maxidle;
 104
 105 /*
 106  * Tcp protocol timeout routine called every 500 ms.
 107  * Updates timestamps used for TCP
 108  * causes finite state machine actions if timers expire.
 109  */
 110 void
 111 tcp_slowtimo()
 112 {
 113
 114         tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
 115         INP_INFO_WLOCK(&tcbinfo);
 116         (void) tcp_timer_2msl_tw(0);
 117         INP_INFO_WUNLOCK(&tcbinfo);
 118 }
 119
 120 int     tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
 121     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
 122
 123 int     tcp_backoff[TCP_MAXRXTSHIFT + 1] =
 124     { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
 125
 126 static int tcp_totbackoff = 2559;       /* sum of tcp_backoff[] */
 127
 128 static int tcp_timer_race;
 129 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
 130     0, "Count of t_inpcb races on tcp_discardcb");
 131
 132 /*
 133  * TCP timer processing.
 134  */
 135
 136 void
 137 tcp_timer_delack(xtp)
 138         void *xtp;
 139 {
 140         struct tcpcb *tp = xtp;
 141         struct inpcb *inp;
 142
 143         INP_INFO_RLOCK(&tcbinfo);
 144         inp = tp->t_inpcb;
 145         /*
 146          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
 147          * tear-down mean we need it as a work-around for races between
 148          * timers and tcp_discardcb().
 149          *
 150          * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
 151          */
 152         if (inp == NULL) {
 153                 tcp_timer_race++;
 154                 INP_INFO_RUNLOCK(&tcbinfo);
 155                 return;
 156         }
 157         INP_LOCK(inp);
 158         INP_INFO_RUNLOCK(&tcbinfo);
 159         if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_delack)
 160             || !callout_active(tp->tt_delack)) {
 161                 INP_UNLOCK(inp);
 162                 return;
 163         }
 164         callout_deactivate(tp->tt_delack);
 165
 166         tp->t_flags |= TF_ACKNOW;
 167         tcpstat.tcps_delack++;
 168         (void) tcp_output(tp);
 169         INP_UNLOCK(inp);
 170 }
 171
 172 void
 173 tcp_timer_2msl(xtp)
 174         void *xtp;
 175 {
 176         struct tcpcb *tp = xtp;
 177         struct inpcb *inp;
 178 #ifdef TCPDEBUG
 179         int ostate;
 180
 181         ostate = tp->t_state;
 182 #endif
 183         /*
 184          * XXXRW: Does this actually happen?
 185          */
 186         INP_INFO_WLOCK(&tcbinfo);
 187         inp = tp->t_inpcb;
 188         /*
 189          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
 190          * tear-down mean we need it as a work-around for races between
 191          * timers and tcp_discardcb().
 192          *
 193          * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
 194          */
 195         if (inp == NULL) {
 196                 tcp_timer_race++;
 197                 INP_INFO_WUNLOCK(&tcbinfo);
 198                 return;
 199         }
 200         INP_LOCK(inp);
 201         tcp_free_sackholes(tp);
 202         if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_2msl) ||
 203             !callout_active(tp->tt_2msl)) {
 204                 INP_UNLOCK(tp->t_inpcb);
 205                 INP_INFO_WUNLOCK(&tcbinfo);
 206                 return;
 207         }
 208         callout_deactivate(tp->tt_2msl);
 209         /*
 210          * 2 MSL timeout in shutdown went off.  If we're closed but
 211          * still waiting for peer to close and connection has been idle
 212          * too long, or if 2MSL time is up from TIME_WAIT, delete connection
 213          * control block.  Otherwise, check again in a bit.
 214          */
 215         if (tp->t_state != TCPS_TIME_WAIT &&
 216             (ticks - tp->t_rcvtime) <= tcp_maxidle)
 217                 callout_reset(tp->tt_2msl, tcp_keepintvl,
 218                               tcp_timer_2msl, tp);
 219         else
 220                 tp = tcp_close(tp);
 221
 222 #ifdef TCPDEBUG
 223         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 224                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 225                           PRU_SLOWTIMO);
 226 #endif
 227         if (tp != NULL)
 228                 INP_UNLOCK(inp);
 229         INP_INFO_WUNLOCK(&tcbinfo);
 230 }
 231
 232 /*
 233  * The timed wait lists contain references to each of the TCP sessions
 234  * currently TIME_WAIT state.  The list pointers, including the list pointers
 235  * in each tcptw structure, are protected using the global tcbinfo lock,
 236  * which must be held over list iteration and modification.
 237  */
 238 struct twlist {
 239         LIST_HEAD(, tcptw)      tw_list;
 240         struct tcptw    tw_tail;
 241 };
 242 #define TWLIST_NLISTS   2
 243 static struct twlist twl_2msl[TWLIST_NLISTS];
 244 static struct twlist *tw_2msl_list[] = { &twl_2msl[0], &twl_2msl[1], NULL };
 245
 246 void
 247 tcp_timer_init(void)
 248 {
 249         int i;
 250         struct twlist *twl;
 251
 252         for (i = 0; i < TWLIST_NLISTS; i++) {
 253                 twl = &twl_2msl[i];
 254                 LIST_INIT(&twl->tw_list);
 255                 LIST_INSERT_HEAD(&twl->tw_list, &twl->tw_tail, tw_2msl);
 256         }
 257 }
 258
 259 void
 260 tcp_timer_2msl_reset(struct tcptw *tw, int timeo, int rearm)
 261 {
 262         int i;
 263         struct tcptw *tw_tail;
 264
 265         INP_INFO_WLOCK_ASSERT(&tcbinfo);
 266         INP_LOCK_ASSERT(tw->tw_inpcb);
 267         if (rearm)
 268                 LIST_REMOVE(tw, tw_2msl);
 269         tw->tw_time = timeo + ticks;
 270         i = timeo > tcp_msl ? 1 : 0;
 271         tw_tail = &twl_2msl[i].tw_tail;
 272         LIST_INSERT_BEFORE(tw_tail, tw, tw_2msl);
 273 }
 274
 275 void
 276 tcp_timer_2msl_stop(struct tcptw *tw)
 277 {
 278
 279         INP_INFO_WLOCK_ASSERT(&tcbinfo);
 280         LIST_REMOVE(tw, tw_2msl);
 281 }
 282
 283 struct tcptw *
 284 tcp_timer_2msl_tw(int reuse)
 285 {
 286         struct tcptw *tw, *tw_tail;
 287         struct twlist *twl;
 288         int i;
 289
 290         INP_INFO_WLOCK_ASSERT(&tcbinfo);
 291         for (i = 0; i < TWLIST_NLISTS; i++) {
 292                 twl = tw_2msl_list[i];
 293                 tw_tail = &twl->tw_tail;
 294
 295                 for (;;) {
 296                         tw = LIST_FIRST(&twl->tw_list);
 297                         if (tw == tw_tail || (!reuse && tw->tw_time > ticks))
 298                                 break;
 299                         INP_LOCK(tw->tw_inpcb);
 300                         tcp_twclose(tw, reuse);
 301                         if (reuse)
 302                                 return (tw);
 303                 }
 304
 305         }
 306         return (NULL);
 307 }
 308
 309 void
 310 tcp_timer_keep(xtp)
 311         void *xtp;
 312 {
 313         struct tcpcb *tp = xtp;
 314         struct tcptemp *t_template;
 315         struct inpcb *inp;
 316 #ifdef TCPDEBUG
 317         int ostate;
 318
 319         ostate = tp->t_state;
 320 #endif
 321         INP_INFO_WLOCK(&tcbinfo);
 322         inp = tp->t_inpcb;
 323         /*
 324          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
 325          * tear-down mean we need it as a work-around for races between
 326          * timers and tcp_discardcb().
 327          *
 328          * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
 329          */
 330         if (inp == NULL) {
 331                 tcp_timer_race++;
 332                 INP_INFO_WUNLOCK(&tcbinfo);
 333                 return;
 334         }
 335         INP_LOCK(inp);
 336         if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_keep)
 337             || !callout_active(tp->tt_keep)) {
 338                 INP_UNLOCK(inp);
 339                 INP_INFO_WUNLOCK(&tcbinfo);
 340                 return;
 341         }
 342         callout_deactivate(tp->tt_keep);
 343         /*
 344          * Keep-alive timer went off; send something
 345          * or drop connection if idle for too long.
 346          */
 347         tcpstat.tcps_keeptimeo++;
 348         if (tp->t_state < TCPS_ESTABLISHED)
 349                 goto dropit;
 350         if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
 351             tp->t_state <= TCPS_CLOSING) {
 352                 if ((ticks - tp->t_rcvtime) >= tcp_keepidle + tcp_maxidle)
 353                         goto dropit;
 354                 /*
 355                  * Send a packet designed to force a response
 356                  * if the peer is up and reachable:
 357                  * either an ACK if the connection is still alive,
 358                  * or an RST if the peer has closed the connection
 359                  * due to timeout or reboot.
 360                  * Using sequence number tp->snd_una-1
 361                  * causes the transmitted zero-length segment
 362                  * to lie outside the receive window;
 363                  * by the protocol spec, this requires the
 364                  * correspondent TCP to respond.
 365                  */
 366                 tcpstat.tcps_keepprobe++;
 367                 t_template = tcpip_maketemplate(inp);
 368                 if (t_template) {
 369                         tcp_respond(tp, t_template->tt_ipgen,
 370                                     &t_template->tt_t, (struct mbuf *)NULL,
 371                                     tp->rcv_nxt, tp->snd_una - 1, 0);
 372                         (void) m_free(dtom(t_template));
 373                 }
 374                 callout_reset(tp->tt_keep, tcp_keepintvl, tcp_timer_keep, tp);
 375         } else
 376                 callout_reset(tp->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
 377
 378 #ifdef TCPDEBUG
 379         if (inp->inp_socket->so_options & SO_DEBUG)
 380                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 381                           PRU_SLOWTIMO);
 382 #endif
 383         INP_UNLOCK(inp);
 384         INP_INFO_WUNLOCK(&tcbinfo);
 385         return;
 386
 387 dropit:
 388         tcpstat.tcps_keepdrops++;
 389         tp = tcp_drop(tp, ETIMEDOUT);
 390
 391 #ifdef TCPDEBUG
 392         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 393                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 394                           PRU_SLOWTIMO);
 395 #endif
 396         if (tp != NULL)
 397                 INP_UNLOCK(tp->t_inpcb);
 398         INP_INFO_WUNLOCK(&tcbinfo);
 399 }
 400
 401 void
 402 tcp_timer_persist(xtp)
 403         void *xtp;
 404 {
 405         struct tcpcb *tp = xtp;
 406         struct inpcb *inp;
 407 #ifdef TCPDEBUG
 408         int ostate;
 409
 410         ostate = tp->t_state;
 411 #endif
 412         INP_INFO_WLOCK(&tcbinfo);
 413         inp = tp->t_inpcb;
 414         /*
 415          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
 416          * tear-down mean we need it as a work-around for races between
 417          * timers and tcp_discardcb().
 418          *
 419          * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
 420          */
 421         if (inp == NULL) {
 422                 tcp_timer_race++;
 423                 INP_INFO_WUNLOCK(&tcbinfo);
 424                 return;
 425         }
 426         INP_LOCK(inp);
 427         if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_persist)
 428             || !callout_active(tp->tt_persist)) {
 429                 INP_UNLOCK(inp);
 430                 INP_INFO_WUNLOCK(&tcbinfo);
 431                 return;
 432         }
 433         callout_deactivate(tp->tt_persist);
 434         /*
 435          * Persistance timer into zero window.
 436          * Force a byte to be output, if possible.
 437          */
 438         tcpstat.tcps_persisttimeo++;
 439         /*
 440          * Hack: if the peer is dead/unreachable, we do not
 441          * time out if the window is closed.  After a full
 442          * backoff, drop the connection if the idle time
 443          * (no responses to probes) reaches the maximum
 444          * backoff that we would use if retransmitting.
 445          */
 446         if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
 447             ((ticks - tp->t_rcvtime) >= tcp_maxpersistidle ||
 448              (ticks - tp->t_rcvtime) >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
 449                 tcpstat.tcps_persistdrop++;
 450                 tp = tcp_drop(tp, ETIMEDOUT);
 451                 goto out;
 452         }
 453         tcp_setpersist(tp);
 454         tp->t_flags |= TF_FORCEDATA;
 455         (void) tcp_output(tp);
 456         tp->t_flags &= ~TF_FORCEDATA;
 457
 458 out:
 459 #ifdef TCPDEBUG
 460         if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
 461                 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
 462 #endif
 463         if (tp != NULL)
 464                 INP_UNLOCK(inp);
 465         INP_INFO_WUNLOCK(&tcbinfo);
 466 }
 467
 468 void
 469 tcp_timer_rexmt(xtp)
 470         void *xtp;
 471 {
 472         struct tcpcb *tp = xtp;
 473         int rexmt;
 474         int headlocked;
 475         struct inpcb *inp;
 476 #ifdef TCPDEBUG
 477         int ostate;
 478
 479         ostate = tp->t_state;
 480 #endif
 481         INP_INFO_WLOCK(&tcbinfo);
 482         headlocked = 1;
 483         inp = tp->t_inpcb;
 484         /*
 485          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
 486          * tear-down mean we need it as a work-around for races between
 487          * timers and tcp_discardcb().
 488          *
 489          * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
 490          */
 491         if (inp == NULL) {
 492                 tcp_timer_race++;
 493                 INP_INFO_WUNLOCK(&tcbinfo);
 494                 return;
 495         }
 496         INP_LOCK(inp);
 497         if ((inp->inp_vflag & INP_DROPPED) || callout_pending(tp->tt_rexmt)
 498             || !callout_active(tp->tt_rexmt)) {
 499                 INP_UNLOCK(inp);
 500                 INP_INFO_WUNLOCK(&tcbinfo);
 501                 return;
 502         }
 503         callout_deactivate(tp->tt_rexmt);
 504         tcp_free_sackholes(tp);
 505         /*
 506          * Retransmission timer went off.  Message has not
 507          * been acked within retransmit interval.  Back off
 508          * to a longer retransmit interval and retransmit one segment.
 509          */
 510         if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
 511                 tp->t_rxtshift = TCP_MAXRXTSHIFT;
 512                 tcpstat.tcps_timeoutdrop++;
 513                 tp = tcp_drop(tp, tp->t_softerror ?
 514                               tp->t_softerror : ETIMEDOUT);
 515                 goto out;
 516         }
 517         INP_INFO_WUNLOCK(&tcbinfo);
 518         headlocked = 0;
 519         if (tp->t_rxtshift == 1) {
 520                 /*
 521                  * first retransmit; record ssthresh and cwnd so they can
 522                  * be recovered if this turns out to be a "bad" retransmit.
 523                  * A retransmit is considered "bad" if an ACK for this
 524                  * segment is received within RTT/2 interval; the assumption
 525                  * here is that the ACK was already in flight.  See
 526                  * "On Estimating End-to-End Network Path Properties" by
 527                  * Allman and Paxson for more details.
 528                  */
 529                 tp->snd_cwnd_prev = tp->snd_cwnd;
 530                 tp->snd_ssthresh_prev = tp->snd_ssthresh;
 531                 tp->snd_recover_prev = tp->snd_recover;
 532                 if (IN_FASTRECOVERY(tp))
 533                   tp->t_flags |= TF_WASFRECOVERY;
 534                 else
 535                   tp->t_flags &= ~TF_WASFRECOVERY;
 536                 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
 537         }
 538         tcpstat.tcps_rexmttimeo++;
 539         if (tp->t_state == TCPS_SYN_SENT)
 540                 rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
 541         else
 542                 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
 543         TCPT_RANGESET(tp->t_rxtcur, rexmt,
 544                       tp->t_rttmin, TCPTV_REXMTMAX);
 545         /*
 546          * Disable rfc1323 if we havn't got any response to
 547          * our third SYN to work-around some broken terminal servers
 548          * (most of which have hopefully been retired) that have bad VJ
 549          * header compression code which trashes TCP segments containing
 550          * unknown-to-them TCP options.
 551          */
 552         if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3))
 553                 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP);
 554         /*
 555          * If we backed off this far, our srtt estimate is probably bogus.
 556          * Clobber it so we'll take the next rtt measurement as our srtt;
 557          * move the current srtt into rttvar to keep the current
 558          * retransmit times until then.
 559          */
 560         if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
 561 #ifdef INET6
 562                 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
 563                         in6_losing(tp->t_inpcb);
 564                 else
 565 #endif
 566                 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
 567                 tp->t_srtt = 0;
 568         }
 569         tp->snd_nxt = tp->snd_una;
 570         tp->snd_recover = tp->snd_max;
 571         /*
 572          * Force a segment to be sent.
 573          */
 574         tp->t_flags |= TF_ACKNOW;
 575         /*
 576          * If timing a segment in this window, stop the timer.
 577          */
 578         tp->t_rtttime = 0;
 579         /*
 580          * Close the congestion window down to one segment
 581          * (we'll open it by one segment for each ack we get).
 582          * Since we probably have a window's worth of unacked
 583          * data accumulated, this "slow start" keeps us from
 584          * dumping all that data as back-to-back packets (which
 585          * might overwhelm an intermediate gateway).
 586          *
 587          * There are two phases to the opening: Initially we
 588          * open by one mss on each ack.  This makes the window
 589          * size increase exponentially with time.  If the
 590          * window is larger than the path can handle, this
 591          * exponential growth results in dropped packet(s)
 592          * almost immediately.  To get more time between
 593          * drops but still "push" the network to take advantage
 594          * of improving conditions, we switch from exponential
 595          * to linear window opening at some threshhold size.
 596          * For a threshhold, we use half the current window
 597          * size, truncated to a multiple of the mss.
 598          *
 599          * (the minimum cwnd that will give us exponential
 600          * growth is 2 mss.  We don't allow the threshhold
 601          * to go below this.)
 602          */
 603         {
 604                 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
 605                 if (win < 2)
 606                         win = 2;
 607                 tp->snd_cwnd = tp->t_maxseg;
 608                 tp->snd_ssthresh = win * tp->t_maxseg;
 609                 tp->t_dupacks = 0;
 610         }
 611         EXIT_FASTRECOVERY(tp);
 612         (void) tcp_output(tp);
 613
 614 out:
 615 #ifdef TCPDEBUG
 616         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
 617                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
 618                           PRU_SLOWTIMO);
 619 #endif
 620         if (tp != NULL)
 621                 INP_UNLOCK(inp);
 622         if (headlocked)
 623                 INP_INFO_WUNLOCK(&tcbinfo);
 624 }