]> CyberLeo.Net >> Repos - FreeBSD/releng/8.1.git/blob - sys/netinet/tcp_timer.c
Copy stable/8 to releng/8.1 in preparation for 8.1-RC1.
[FreeBSD/releng/8.1.git] / sys / netinet / tcp_timer.c
1 /*-
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *      @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
30  */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include "opt_inet6.h"
36 #include "opt_tcpdebug.h"
37
38 #include <sys/param.h>
39 #include <sys/kernel.h>
40 #include <sys/lock.h>
41 #include <sys/mbuf.h>
42 #include <sys/mutex.h>
43 #include <sys/protosw.h>
44 #include <sys/socket.h>
45 #include <sys/socketvar.h>
46 #include <sys/sysctl.h>
47 #include <sys/systm.h>
48
49 #include <net/if.h>
50 #include <net/route.h>
51 #include <net/vnet.h>
52
53 #include <netinet/in.h>
54 #include <netinet/in_pcb.h>
55 #include <netinet/in_systm.h>
56 #ifdef INET6
57 #include <netinet6/in6_pcb.h>
58 #endif
59 #include <netinet/ip_var.h>
60 #include <netinet/tcp.h>
61 #include <netinet/tcp_fsm.h>
62 #include <netinet/tcp_timer.h>
63 #include <netinet/tcp_var.h>
64 #include <netinet/tcpip.h>
65 #ifdef TCPDEBUG
66 #include <netinet/tcp_debug.h>
67 #endif
68
69 int     tcp_keepinit;
70 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
71     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
72
73 int     tcp_keepidle;
74 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
75     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
76
77 int     tcp_keepintvl;
78 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
79     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
80
81 int     tcp_delacktime;
82 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
83     &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
84     "Time before a delayed ACK is sent");
85
86 int     tcp_msl;
87 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
88     &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
89
90 int     tcp_rexmit_min;
91 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
92     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
93     "Minimum Retransmission Timeout");
94
95 int     tcp_rexmit_slop;
96 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
97     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
98     "Retransmission Timer Slop");
99
100 static int      always_keepalive = 1;
101 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
102     &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
103
104 int    tcp_fast_finwait2_recycle = 0;
105 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 
106     &tcp_fast_finwait2_recycle, 0,
107     "Recycle closed FIN_WAIT_2 connections faster");
108
109 int    tcp_finwait2_timeout;
110 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
111     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
112
113
114 static int      tcp_keepcnt = TCPTV_KEEPCNT;
115         /* max idle probes */
116 int     tcp_maxpersistidle;
117         /* max idle time in persist */
118 int     tcp_maxidle;
119
120 /*
121  * Tcp protocol timeout routine called every 500 ms.
122  * Updates timestamps used for TCP
123  * causes finite state machine actions if timers expire.
124  */
125 void
126 tcp_slowtimo(void)
127 {
128         VNET_ITERATOR_DECL(vnet_iter);
129
130         VNET_LIST_RLOCK_NOSLEEP();
131         VNET_FOREACH(vnet_iter) {
132                 CURVNET_SET(vnet_iter);
133                 tcp_maxidle = tcp_keepcnt * tcp_keepintvl;
134                 INP_INFO_WLOCK(&V_tcbinfo);
135                 (void) tcp_tw_2msl_scan(0);
136                 INP_INFO_WUNLOCK(&V_tcbinfo);
137                 CURVNET_RESTORE();
138         }
139         VNET_LIST_RUNLOCK_NOSLEEP();
140 }
141
142 int     tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
143     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
144
145 int     tcp_backoff[TCP_MAXRXTSHIFT + 1] =
146     { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
147
148 static int tcp_totbackoff = 2559;       /* sum of tcp_backoff[] */
149
150 static int tcp_timer_race;
151 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
152     0, "Count of t_inpcb races on tcp_discardcb");
153
154 /*
155  * TCP timer processing.
156  */
157
158 void
159 tcp_timer_delack(void *xtp)
160 {
161         struct tcpcb *tp = xtp;
162         struct inpcb *inp;
163         CURVNET_SET(tp->t_vnet);
164
165         inp = tp->t_inpcb;
166         /*
167          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
168          * tear-down mean we need it as a work-around for races between
169          * timers and tcp_discardcb().
170          *
171          * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
172          */
173         if (inp == NULL) {
174                 tcp_timer_race++;
175                 CURVNET_RESTORE();
176                 return;
177         }
178         INP_WLOCK(inp);
179         if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_delack)
180             || !callout_active(&tp->t_timers->tt_delack)) {
181                 INP_WUNLOCK(inp);
182                 CURVNET_RESTORE();
183                 return;
184         }
185         callout_deactivate(&tp->t_timers->tt_delack);
186
187         tp->t_flags |= TF_ACKNOW;
188         TCPSTAT_INC(tcps_delack);
189         (void) tcp_output(tp);
190         INP_WUNLOCK(inp);
191         CURVNET_RESTORE();
192 }
193
194 void
195 tcp_timer_2msl(void *xtp)
196 {
197         struct tcpcb *tp = xtp;
198         struct inpcb *inp;
199         CURVNET_SET(tp->t_vnet);
200 #ifdef TCPDEBUG
201         int ostate;
202
203         ostate = tp->t_state;
204 #endif
205         /*
206          * XXXRW: Does this actually happen?
207          */
208         INP_INFO_WLOCK(&V_tcbinfo);
209         inp = tp->t_inpcb;
210         /*
211          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
212          * tear-down mean we need it as a work-around for races between
213          * timers and tcp_discardcb().
214          *
215          * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
216          */
217         if (inp == NULL) {
218                 tcp_timer_race++;
219                 INP_INFO_WUNLOCK(&V_tcbinfo);
220                 CURVNET_RESTORE();
221                 return;
222         }
223         INP_WLOCK(inp);
224         tcp_free_sackholes(tp);
225         if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_2msl) ||
226             !callout_active(&tp->t_timers->tt_2msl)) {
227                 INP_WUNLOCK(tp->t_inpcb);
228                 INP_INFO_WUNLOCK(&V_tcbinfo);
229                 CURVNET_RESTORE();
230                 return;
231         }
232         callout_deactivate(&tp->t_timers->tt_2msl);
233         /*
234          * 2 MSL timeout in shutdown went off.  If we're closed but
235          * still waiting for peer to close and connection has been idle
236          * too long, or if 2MSL time is up from TIME_WAIT, delete connection
237          * control block.  Otherwise, check again in a bit.
238          *
239          * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 
240          * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 
241          * Ignore fact that there were recent incoming segments.
242          */
243         if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
244             tp->t_inpcb && tp->t_inpcb->inp_socket && 
245             (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
246                 TCPSTAT_INC(tcps_finwait2_drops);
247                 tp = tcp_close(tp);             
248         } else {
249                 if (tp->t_state != TCPS_TIME_WAIT &&
250                    ticks - tp->t_rcvtime <= tcp_maxidle)
251                        callout_reset(&tp->t_timers->tt_2msl, tcp_keepintvl,
252                                      tcp_timer_2msl, tp);
253                else
254                        tp = tcp_close(tp);
255        }
256
257 #ifdef TCPDEBUG
258         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
259                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
260                           PRU_SLOWTIMO);
261 #endif
262         if (tp != NULL)
263                 INP_WUNLOCK(inp);
264         INP_INFO_WUNLOCK(&V_tcbinfo);
265         CURVNET_RESTORE();
266 }
267
268 void
269 tcp_timer_keep(void *xtp)
270 {
271         struct tcpcb *tp = xtp;
272         struct tcptemp *t_template;
273         struct inpcb *inp;
274         CURVNET_SET(tp->t_vnet);
275 #ifdef TCPDEBUG
276         int ostate;
277
278         ostate = tp->t_state;
279 #endif
280         INP_INFO_WLOCK(&V_tcbinfo);
281         inp = tp->t_inpcb;
282         /*
283          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
284          * tear-down mean we need it as a work-around for races between
285          * timers and tcp_discardcb().
286          *
287          * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
288          */
289         if (inp == NULL) {
290                 tcp_timer_race++;
291                 INP_INFO_WUNLOCK(&V_tcbinfo);
292                 CURVNET_RESTORE();
293                 return;
294         }
295         INP_WLOCK(inp);
296         if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_keep)
297             || !callout_active(&tp->t_timers->tt_keep)) {
298                 INP_WUNLOCK(inp);
299                 INP_INFO_WUNLOCK(&V_tcbinfo);
300                 CURVNET_RESTORE();
301                 return;
302         }
303         callout_deactivate(&tp->t_timers->tt_keep);
304         /*
305          * Keep-alive timer went off; send something
306          * or drop connection if idle for too long.
307          */
308         TCPSTAT_INC(tcps_keeptimeo);
309         if (tp->t_state < TCPS_ESTABLISHED)
310                 goto dropit;
311         if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
312             tp->t_state <= TCPS_CLOSING) {
313                 if (ticks - tp->t_rcvtime >= tcp_keepidle + tcp_maxidle)
314                         goto dropit;
315                 /*
316                  * Send a packet designed to force a response
317                  * if the peer is up and reachable:
318                  * either an ACK if the connection is still alive,
319                  * or an RST if the peer has closed the connection
320                  * due to timeout or reboot.
321                  * Using sequence number tp->snd_una-1
322                  * causes the transmitted zero-length segment
323                  * to lie outside the receive window;
324                  * by the protocol spec, this requires the
325                  * correspondent TCP to respond.
326                  */
327                 TCPSTAT_INC(tcps_keepprobe);
328                 t_template = tcpip_maketemplate(inp);
329                 if (t_template) {
330                         tcp_respond(tp, t_template->tt_ipgen,
331                                     &t_template->tt_t, (struct mbuf *)NULL,
332                                     tp->rcv_nxt, tp->snd_una - 1, 0);
333                         free(t_template, M_TEMP);
334                 }
335                 callout_reset(&tp->t_timers->tt_keep, tcp_keepintvl, tcp_timer_keep, tp);
336         } else
337                 callout_reset(&tp->t_timers->tt_keep, tcp_keepidle, tcp_timer_keep, tp);
338
339 #ifdef TCPDEBUG
340         if (inp->inp_socket->so_options & SO_DEBUG)
341                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
342                           PRU_SLOWTIMO);
343 #endif
344         INP_WUNLOCK(inp);
345         INP_INFO_WUNLOCK(&V_tcbinfo);
346         CURVNET_RESTORE();
347         return;
348
349 dropit:
350         TCPSTAT_INC(tcps_keepdrops);
351         tp = tcp_drop(tp, ETIMEDOUT);
352
353 #ifdef TCPDEBUG
354         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
355                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
356                           PRU_SLOWTIMO);
357 #endif
358         if (tp != NULL)
359                 INP_WUNLOCK(tp->t_inpcb);
360         INP_INFO_WUNLOCK(&V_tcbinfo);
361         CURVNET_RESTORE();
362 }
363
364 void
365 tcp_timer_persist(void *xtp)
366 {
367         struct tcpcb *tp = xtp;
368         struct inpcb *inp;
369         CURVNET_SET(tp->t_vnet);
370 #ifdef TCPDEBUG
371         int ostate;
372
373         ostate = tp->t_state;
374 #endif
375         INP_INFO_WLOCK(&V_tcbinfo);
376         inp = tp->t_inpcb;
377         /*
378          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
379          * tear-down mean we need it as a work-around for races between
380          * timers and tcp_discardcb().
381          *
382          * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
383          */
384         if (inp == NULL) {
385                 tcp_timer_race++;
386                 INP_INFO_WUNLOCK(&V_tcbinfo);
387                 CURVNET_RESTORE();
388                 return;
389         }
390         INP_WLOCK(inp);
391         if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_persist)
392             || !callout_active(&tp->t_timers->tt_persist)) {
393                 INP_WUNLOCK(inp);
394                 INP_INFO_WUNLOCK(&V_tcbinfo);
395                 CURVNET_RESTORE();
396                 return;
397         }
398         callout_deactivate(&tp->t_timers->tt_persist);
399         /*
400          * Persistance timer into zero window.
401          * Force a byte to be output, if possible.
402          */
403         TCPSTAT_INC(tcps_persisttimeo);
404         /*
405          * Hack: if the peer is dead/unreachable, we do not
406          * time out if the window is closed.  After a full
407          * backoff, drop the connection if the idle time
408          * (no responses to probes) reaches the maximum
409          * backoff that we would use if retransmitting.
410          */
411         if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
412             (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
413              ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
414                 TCPSTAT_INC(tcps_persistdrop);
415                 tp = tcp_drop(tp, ETIMEDOUT);
416                 goto out;
417         }
418         tcp_setpersist(tp);
419         tp->t_flags |= TF_FORCEDATA;
420         (void) tcp_output(tp);
421         tp->t_flags &= ~TF_FORCEDATA;
422
423 out:
424 #ifdef TCPDEBUG
425         if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
426                 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
427 #endif
428         if (tp != NULL)
429                 INP_WUNLOCK(inp);
430         INP_INFO_WUNLOCK(&V_tcbinfo);
431         CURVNET_RESTORE();
432 }
433
434 void
435 tcp_timer_rexmt(void * xtp)
436 {
437         struct tcpcb *tp = xtp;
438         CURVNET_SET(tp->t_vnet);
439         int rexmt;
440         int headlocked;
441         struct inpcb *inp;
442 #ifdef TCPDEBUG
443         int ostate;
444
445         ostate = tp->t_state;
446 #endif
447         INP_INFO_WLOCK(&V_tcbinfo);
448         headlocked = 1;
449         inp = tp->t_inpcb;
450         /*
451          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
452          * tear-down mean we need it as a work-around for races between
453          * timers and tcp_discardcb().
454          *
455          * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
456          */
457         if (inp == NULL) {
458                 tcp_timer_race++;
459                 INP_INFO_WUNLOCK(&V_tcbinfo);
460                 CURVNET_RESTORE();
461                 return;
462         }
463         INP_WLOCK(inp);
464         if ((inp->inp_flags & INP_DROPPED) || callout_pending(&tp->t_timers->tt_rexmt)
465             || !callout_active(&tp->t_timers->tt_rexmt)) {
466                 INP_WUNLOCK(inp);
467                 INP_INFO_WUNLOCK(&V_tcbinfo);
468                 CURVNET_RESTORE();
469                 return;
470         }
471         callout_deactivate(&tp->t_timers->tt_rexmt);
472         tcp_free_sackholes(tp);
473         /*
474          * Retransmission timer went off.  Message has not
475          * been acked within retransmit interval.  Back off
476          * to a longer retransmit interval and retransmit one segment.
477          */
478         if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
479                 tp->t_rxtshift = TCP_MAXRXTSHIFT;
480                 TCPSTAT_INC(tcps_timeoutdrop);
481                 tp = tcp_drop(tp, tp->t_softerror ?
482                               tp->t_softerror : ETIMEDOUT);
483                 goto out;
484         }
485         INP_INFO_WUNLOCK(&V_tcbinfo);
486         headlocked = 0;
487         if (tp->t_rxtshift == 1) {
488                 /*
489                  * first retransmit; record ssthresh and cwnd so they can
490                  * be recovered if this turns out to be a "bad" retransmit.
491                  * A retransmit is considered "bad" if an ACK for this
492                  * segment is received within RTT/2 interval; the assumption
493                  * here is that the ACK was already in flight.  See
494                  * "On Estimating End-to-End Network Path Properties" by
495                  * Allman and Paxson for more details.
496                  */
497                 tp->snd_cwnd_prev = tp->snd_cwnd;
498                 tp->snd_ssthresh_prev = tp->snd_ssthresh;
499                 tp->snd_recover_prev = tp->snd_recover;
500                 if (IN_FASTRECOVERY(tp))
501                   tp->t_flags |= TF_WASFRECOVERY;
502                 else
503                   tp->t_flags &= ~TF_WASFRECOVERY;
504                 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
505         }
506         TCPSTAT_INC(tcps_rexmttimeo);
507         if (tp->t_state == TCPS_SYN_SENT)
508                 rexmt = TCP_REXMTVAL(tp) * tcp_syn_backoff[tp->t_rxtshift];
509         else
510                 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
511         TCPT_RANGESET(tp->t_rxtcur, rexmt,
512                       tp->t_rttmin, TCPTV_REXMTMAX);
513         /*
514          * Disable rfc1323 if we havn't got any response to
515          * our third SYN to work-around some broken terminal servers
516          * (most of which have hopefully been retired) that have bad VJ
517          * header compression code which trashes TCP segments containing
518          * unknown-to-them TCP options.
519          */
520         if ((tp->t_state == TCPS_SYN_SENT) && (tp->t_rxtshift == 3))
521                 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP);
522         /*
523          * If we backed off this far, our srtt estimate is probably bogus.
524          * Clobber it so we'll take the next rtt measurement as our srtt;
525          * move the current srtt into rttvar to keep the current
526          * retransmit times until then.
527          */
528         if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
529 #ifdef INET6
530                 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
531                         in6_losing(tp->t_inpcb);
532                 else
533 #endif
534                 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
535                 tp->t_srtt = 0;
536         }
537         tp->snd_nxt = tp->snd_una;
538         tp->snd_recover = tp->snd_max;
539         /*
540          * Force a segment to be sent.
541          */
542         tp->t_flags |= TF_ACKNOW;
543         /*
544          * If timing a segment in this window, stop the timer.
545          */
546         tp->t_rtttime = 0;
547         /*
548          * Close the congestion window down to one segment
549          * (we'll open it by one segment for each ack we get).
550          * Since we probably have a window's worth of unacked
551          * data accumulated, this "slow start" keeps us from
552          * dumping all that data as back-to-back packets (which
553          * might overwhelm an intermediate gateway).
554          *
555          * There are two phases to the opening: Initially we
556          * open by one mss on each ack.  This makes the window
557          * size increase exponentially with time.  If the
558          * window is larger than the path can handle, this
559          * exponential growth results in dropped packet(s)
560          * almost immediately.  To get more time between
561          * drops but still "push" the network to take advantage
562          * of improving conditions, we switch from exponential
563          * to linear window opening at some threshhold size.
564          * For a threshhold, we use half the current window
565          * size, truncated to a multiple of the mss.
566          *
567          * (the minimum cwnd that will give us exponential
568          * growth is 2 mss.  We don't allow the threshhold
569          * to go below this.)
570          */
571         {
572                 u_int win = min(tp->snd_wnd, tp->snd_cwnd) / 2 / tp->t_maxseg;
573                 if (win < 2)
574                         win = 2;
575                 tp->snd_cwnd = tp->t_maxseg;
576                 tp->snd_ssthresh = win * tp->t_maxseg;
577                 tp->t_dupacks = 0;
578         }
579         EXIT_FASTRECOVERY(tp);
580         tp->t_bytes_acked = 0;
581         (void) tcp_output(tp);
582
583 out:
584 #ifdef TCPDEBUG
585         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
586                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
587                           PRU_SLOWTIMO);
588 #endif
589         if (tp != NULL)
590                 INP_WUNLOCK(inp);
591         if (headlocked)
592                 INP_INFO_WUNLOCK(&V_tcbinfo);
593         CURVNET_RESTORE();
594 }
595
596 void
597 tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
598 {
599         struct callout *t_callout;
600         void *f_callout;
601
602         switch (timer_type) {
603                 case TT_DELACK:
604                         t_callout = &tp->t_timers->tt_delack;
605                         f_callout = tcp_timer_delack;
606                         break;
607                 case TT_REXMT:
608                         t_callout = &tp->t_timers->tt_rexmt;
609                         f_callout = tcp_timer_rexmt;
610                         break;
611                 case TT_PERSIST:
612                         t_callout = &tp->t_timers->tt_persist;
613                         f_callout = tcp_timer_persist;
614                         break;
615                 case TT_KEEP:
616                         t_callout = &tp->t_timers->tt_keep;
617                         f_callout = tcp_timer_keep;
618                         break;
619                 case TT_2MSL:
620                         t_callout = &tp->t_timers->tt_2msl;
621                         f_callout = tcp_timer_2msl;
622                         break;
623                 default:
624                         panic("bad timer_type");
625                 }
626         if (delta == 0) {
627                 callout_stop(t_callout);
628         } else {
629                 callout_reset(t_callout, delta, f_callout, tp);
630         }
631 }
632
633 int
634 tcp_timer_active(struct tcpcb *tp, int timer_type)
635 {
636         struct callout *t_callout;
637
638         switch (timer_type) {
639                 case TT_DELACK:
640                         t_callout = &tp->t_timers->tt_delack;
641                         break;
642                 case TT_REXMT:
643                         t_callout = &tp->t_timers->tt_rexmt;
644                         break;
645                 case TT_PERSIST:
646                         t_callout = &tp->t_timers->tt_persist;
647                         break;
648                 case TT_KEEP:
649                         t_callout = &tp->t_timers->tt_keep;
650                         break;
651                 case TT_2MSL:
652                         t_callout = &tp->t_timers->tt_2msl;
653                         break;
654                 default:
655                         panic("bad timer_type");
656                 }
657         return callout_active(t_callout);
658 }