]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/tcp_timer.c
Merge ACPICA 20140926.
[FreeBSD/FreeBSD.git] / sys / netinet / tcp_timer.c
1 /*-
2  * Copyright (c) 1982, 1986, 1988, 1990, 1993, 1995
3  *      The Regents of the University of California.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  * 4. Neither the name of the University nor the names of its contributors
14  *    may be used to endorse or promote products derived from this software
15  *    without specific prior written permission.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  *      @(#)tcp_timer.c 8.2 (Berkeley) 5/24/95
30  */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include "opt_inet.h"
36 #include "opt_inet6.h"
37 #include "opt_tcpdebug.h"
38 #include "opt_rss.h"
39
40 #include <sys/param.h>
41 #include <sys/kernel.h>
42 #include <sys/lock.h>
43 #include <sys/mbuf.h>
44 #include <sys/mutex.h>
45 #include <sys/protosw.h>
46 #include <sys/smp.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/sysctl.h>
50 #include <sys/systm.h>
51
52 #include <net/if.h>
53 #include <net/route.h>
54 #include <net/vnet.h>
55 #include <net/netisr.h>
56
57 #include <netinet/cc.h>
58 #include <netinet/in.h>
59 #include <netinet/in_pcb.h>
60 #include <netinet/in_rss.h>
61 #include <netinet/in_systm.h>
62 #ifdef INET6
63 #include <netinet6/in6_pcb.h>
64 #endif
65 #include <netinet/ip_var.h>
66 #include <netinet/tcp_fsm.h>
67 #include <netinet/tcp_timer.h>
68 #include <netinet/tcp_var.h>
69 #include <netinet/tcpip.h>
70 #ifdef TCPDEBUG
71 #include <netinet/tcp_debug.h>
72 #endif
73
74 int     tcp_keepinit;
75 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW,
76     &tcp_keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection");
77
78 int     tcp_keepidle;
79 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPIDLE, keepidle, CTLTYPE_INT|CTLFLAG_RW,
80     &tcp_keepidle, 0, sysctl_msec_to_ticks, "I", "time before keepalive probes begin");
81
82 int     tcp_keepintvl;
83 SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINTVL, keepintvl, CTLTYPE_INT|CTLFLAG_RW,
84     &tcp_keepintvl, 0, sysctl_msec_to_ticks, "I", "time between keepalive probes");
85
86 int     tcp_delacktime;
87 SYSCTL_PROC(_net_inet_tcp, TCPCTL_DELACKTIME, delacktime, CTLTYPE_INT|CTLFLAG_RW,
88     &tcp_delacktime, 0, sysctl_msec_to_ticks, "I",
89     "Time before a delayed ACK is sent");
90
91 int     tcp_msl;
92 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, msl, CTLTYPE_INT|CTLFLAG_RW,
93     &tcp_msl, 0, sysctl_msec_to_ticks, "I", "Maximum segment lifetime");
94
95 int     tcp_rexmit_min;
96 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_min, CTLTYPE_INT|CTLFLAG_RW,
97     &tcp_rexmit_min, 0, sysctl_msec_to_ticks, "I",
98     "Minimum Retransmission Timeout");
99
100 int     tcp_rexmit_slop;
101 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, rexmit_slop, CTLTYPE_INT|CTLFLAG_RW,
102     &tcp_rexmit_slop, 0, sysctl_msec_to_ticks, "I",
103     "Retransmission Timer Slop");
104
105 static int      always_keepalive = 1;
106 SYSCTL_INT(_net_inet_tcp, OID_AUTO, always_keepalive, CTLFLAG_RW,
107     &always_keepalive , 0, "Assume SO_KEEPALIVE on all TCP connections");
108
109 int    tcp_fast_finwait2_recycle = 0;
110 SYSCTL_INT(_net_inet_tcp, OID_AUTO, fast_finwait2_recycle, CTLFLAG_RW, 
111     &tcp_fast_finwait2_recycle, 0,
112     "Recycle closed FIN_WAIT_2 connections faster");
113
114 int    tcp_finwait2_timeout;
115 SYSCTL_PROC(_net_inet_tcp, OID_AUTO, finwait2_timeout, CTLTYPE_INT|CTLFLAG_RW,
116     &tcp_finwait2_timeout, 0, sysctl_msec_to_ticks, "I", "FIN-WAIT2 timeout");
117
118 int     tcp_keepcnt = TCPTV_KEEPCNT;
119 SYSCTL_INT(_net_inet_tcp, OID_AUTO, keepcnt, CTLFLAG_RW, &tcp_keepcnt, 0,
120     "Number of keepalive probes to send");
121
122         /* max idle probes */
123 int     tcp_maxpersistidle;
124
125 static int      tcp_rexmit_drop_options = 0;
126 SYSCTL_INT(_net_inet_tcp, OID_AUTO, rexmit_drop_options, CTLFLAG_RW,
127     &tcp_rexmit_drop_options, 0,
128     "Drop TCP options from 3rd and later retransmitted SYN");
129
130 #ifdef  RSS
131 static int      per_cpu_timers = 1;
132 #else
133 static int      per_cpu_timers = 0;
134 #endif
135 SYSCTL_INT(_net_inet_tcp, OID_AUTO, per_cpu_timers, CTLFLAG_RW,
136     &per_cpu_timers , 0, "run tcp timers on all cpus");
137
138 #if 0
139 #define INP_CPU(inp)    (per_cpu_timers ? (!CPU_ABSENT(((inp)->inp_flowid % (mp_maxid+1))) ? \
140                 ((inp)->inp_flowid % (mp_maxid+1)) : curcpu) : 0)
141 #endif
142
143 /*
144  * Map the given inp to a CPU id.
145  *
146  * This queries RSS if it's compiled in, else it defaults to the current
147  * CPU ID.
148  */
149 static inline int
150 inp_to_cpuid(struct inpcb *inp)
151 {
152         u_int cpuid;
153
154 #ifdef  RSS
155         if (per_cpu_timers) {
156                 cpuid = rss_hash2cpuid(inp->inp_flowid, inp->inp_flowtype);
157                 if (cpuid == NETISR_CPUID_NONE)
158                         return (curcpu);        /* XXX */
159                 else
160                         return (cpuid);
161         }
162 #else
163         /* Legacy, pre-RSS behaviour */
164         if (per_cpu_timers) {
165                 /*
166                  * We don't have a flowid -> cpuid mapping, so cheat and
167                  * just map unknown cpuids to curcpu.  Not the best, but
168                  * apparently better than defaulting to swi 0.
169                  */
170                 cpuid = inp->inp_flowid % (mp_maxid + 1);
171                 if (! CPU_ABSENT(cpuid))
172                         return (cpuid);
173                 return (curcpu);
174         }
175 #endif
176         /* Default for RSS and non-RSS - cpuid 0 */
177         else {
178                 return (0);
179         }
180 }
181
182 /*
183  * Tcp protocol timeout routine called every 500 ms.
184  * Updates timestamps used for TCP
185  * causes finite state machine actions if timers expire.
186  */
187 void
188 tcp_slowtimo(void)
189 {
190         VNET_ITERATOR_DECL(vnet_iter);
191
192         VNET_LIST_RLOCK_NOSLEEP();
193         VNET_FOREACH(vnet_iter) {
194                 CURVNET_SET(vnet_iter);
195                 tcp_tw_2msl_scan();
196                 CURVNET_RESTORE();
197         }
198         VNET_LIST_RUNLOCK_NOSLEEP();
199 }
200
201 int     tcp_syn_backoff[TCP_MAXRXTSHIFT + 1] =
202     { 1, 1, 1, 1, 1, 2, 4, 8, 16, 32, 64, 64, 64 };
203
204 int     tcp_backoff[TCP_MAXRXTSHIFT + 1] =
205     { 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 512, 512, 512 };
206
207 static int tcp_totbackoff = 2559;       /* sum of tcp_backoff[] */
208
209 static int tcp_timer_race;
210 SYSCTL_INT(_net_inet_tcp, OID_AUTO, timer_race, CTLFLAG_RD, &tcp_timer_race,
211     0, "Count of t_inpcb races on tcp_discardcb");
212
213 /*
214  * TCP timer processing.
215  */
216
217 void
218 tcp_timer_delack(void *xtp)
219 {
220         struct tcpcb *tp = xtp;
221         struct inpcb *inp;
222         CURVNET_SET(tp->t_vnet);
223
224         inp = tp->t_inpcb;
225         /*
226          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
227          * tear-down mean we need it as a work-around for races between
228          * timers and tcp_discardcb().
229          *
230          * KASSERT(inp != NULL, ("tcp_timer_delack: inp == NULL"));
231          */
232         if (inp == NULL) {
233                 tcp_timer_race++;
234                 CURVNET_RESTORE();
235                 return;
236         }
237         INP_WLOCK(inp);
238         if (callout_pending(&tp->t_timers->tt_delack) ||
239             !callout_active(&tp->t_timers->tt_delack)) {
240                 INP_WUNLOCK(inp);
241                 CURVNET_RESTORE();
242                 return;
243         }
244         callout_deactivate(&tp->t_timers->tt_delack);
245         if ((inp->inp_flags & INP_DROPPED) != 0) {
246                 INP_WUNLOCK(inp);
247                 CURVNET_RESTORE();
248                 return;
249         }
250
251         tp->t_flags |= TF_ACKNOW;
252         TCPSTAT_INC(tcps_delack);
253         (void) tcp_output(tp);
254         INP_WUNLOCK(inp);
255         CURVNET_RESTORE();
256 }
257
258 void
259 tcp_timer_2msl(void *xtp)
260 {
261         struct tcpcb *tp = xtp;
262         struct inpcb *inp;
263         CURVNET_SET(tp->t_vnet);
264 #ifdef TCPDEBUG
265         int ostate;
266
267         ostate = tp->t_state;
268 #endif
269         /*
270          * XXXRW: Does this actually happen?
271          */
272         INP_INFO_WLOCK(&V_tcbinfo);
273         inp = tp->t_inpcb;
274         /*
275          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
276          * tear-down mean we need it as a work-around for races between
277          * timers and tcp_discardcb().
278          *
279          * KASSERT(inp != NULL, ("tcp_timer_2msl: inp == NULL"));
280          */
281         if (inp == NULL) {
282                 tcp_timer_race++;
283                 INP_INFO_WUNLOCK(&V_tcbinfo);
284                 CURVNET_RESTORE();
285                 return;
286         }
287         INP_WLOCK(inp);
288         tcp_free_sackholes(tp);
289         if (callout_pending(&tp->t_timers->tt_2msl) ||
290             !callout_active(&tp->t_timers->tt_2msl)) {
291                 INP_WUNLOCK(tp->t_inpcb);
292                 INP_INFO_WUNLOCK(&V_tcbinfo);
293                 CURVNET_RESTORE();
294                 return;
295         }
296         callout_deactivate(&tp->t_timers->tt_2msl);
297         if ((inp->inp_flags & INP_DROPPED) != 0) {
298                 INP_WUNLOCK(inp);
299                 INP_INFO_WUNLOCK(&V_tcbinfo);
300                 CURVNET_RESTORE();
301                 return;
302         }
303         /*
304          * 2 MSL timeout in shutdown went off.  If we're closed but
305          * still waiting for peer to close and connection has been idle
306          * too long, or if 2MSL time is up from TIME_WAIT, delete connection
307          * control block.  Otherwise, check again in a bit.
308          *
309          * If fastrecycle of FIN_WAIT_2, in FIN_WAIT_2 and receiver has closed, 
310          * there's no point in hanging onto FIN_WAIT_2 socket. Just close it. 
311          * Ignore fact that there were recent incoming segments.
312          */
313         if (tcp_fast_finwait2_recycle && tp->t_state == TCPS_FIN_WAIT_2 &&
314             tp->t_inpcb && tp->t_inpcb->inp_socket && 
315             (tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
316                 TCPSTAT_INC(tcps_finwait2_drops);
317                 tp = tcp_close(tp);             
318         } else {
319                 if (tp->t_state != TCPS_TIME_WAIT &&
320                    ticks - tp->t_rcvtime <= TP_MAXIDLE(tp))
321                        callout_reset_on(&tp->t_timers->tt_2msl,
322                            TP_KEEPINTVL(tp), tcp_timer_2msl, tp,
323                            inp_to_cpuid(inp));
324                else
325                        tp = tcp_close(tp);
326        }
327
328 #ifdef TCPDEBUG
329         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
330                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
331                           PRU_SLOWTIMO);
332 #endif
333         if (tp != NULL)
334                 INP_WUNLOCK(inp);
335         INP_INFO_WUNLOCK(&V_tcbinfo);
336         CURVNET_RESTORE();
337 }
338
339 void
340 tcp_timer_keep(void *xtp)
341 {
342         struct tcpcb *tp = xtp;
343         struct tcptemp *t_template;
344         struct inpcb *inp;
345         CURVNET_SET(tp->t_vnet);
346 #ifdef TCPDEBUG
347         int ostate;
348
349         ostate = tp->t_state;
350 #endif
351         INP_INFO_WLOCK(&V_tcbinfo);
352         inp = tp->t_inpcb;
353         /*
354          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
355          * tear-down mean we need it as a work-around for races between
356          * timers and tcp_discardcb().
357          *
358          * KASSERT(inp != NULL, ("tcp_timer_keep: inp == NULL"));
359          */
360         if (inp == NULL) {
361                 tcp_timer_race++;
362                 INP_INFO_WUNLOCK(&V_tcbinfo);
363                 CURVNET_RESTORE();
364                 return;
365         }
366         INP_WLOCK(inp);
367         if (callout_pending(&tp->t_timers->tt_keep) ||
368             !callout_active(&tp->t_timers->tt_keep)) {
369                 INP_WUNLOCK(inp);
370                 INP_INFO_WUNLOCK(&V_tcbinfo);
371                 CURVNET_RESTORE();
372                 return;
373         }
374         callout_deactivate(&tp->t_timers->tt_keep);
375         if ((inp->inp_flags & INP_DROPPED) != 0) {
376                 INP_WUNLOCK(inp);
377                 INP_INFO_WUNLOCK(&V_tcbinfo);
378                 CURVNET_RESTORE();
379                 return;
380         }
381         /*
382          * Keep-alive timer went off; send something
383          * or drop connection if idle for too long.
384          */
385         TCPSTAT_INC(tcps_keeptimeo);
386         if (tp->t_state < TCPS_ESTABLISHED)
387                 goto dropit;
388         if ((always_keepalive || inp->inp_socket->so_options & SO_KEEPALIVE) &&
389             tp->t_state <= TCPS_CLOSING) {
390                 if (ticks - tp->t_rcvtime >= TP_KEEPIDLE(tp) + TP_MAXIDLE(tp))
391                         goto dropit;
392                 /*
393                  * Send a packet designed to force a response
394                  * if the peer is up and reachable:
395                  * either an ACK if the connection is still alive,
396                  * or an RST if the peer has closed the connection
397                  * due to timeout or reboot.
398                  * Using sequence number tp->snd_una-1
399                  * causes the transmitted zero-length segment
400                  * to lie outside the receive window;
401                  * by the protocol spec, this requires the
402                  * correspondent TCP to respond.
403                  */
404                 TCPSTAT_INC(tcps_keepprobe);
405                 t_template = tcpip_maketemplate(inp);
406                 if (t_template) {
407                         tcp_respond(tp, t_template->tt_ipgen,
408                                     &t_template->tt_t, (struct mbuf *)NULL,
409                                     tp->rcv_nxt, tp->snd_una - 1, 0);
410                         free(t_template, M_TEMP);
411                 }
412                 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPINTVL(tp),
413                     tcp_timer_keep, tp, inp_to_cpuid(inp));
414         } else
415                 callout_reset_on(&tp->t_timers->tt_keep, TP_KEEPIDLE(tp),
416                     tcp_timer_keep, tp, inp_to_cpuid(inp));
417
418 #ifdef TCPDEBUG
419         if (inp->inp_socket->so_options & SO_DEBUG)
420                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
421                           PRU_SLOWTIMO);
422 #endif
423         INP_WUNLOCK(inp);
424         INP_INFO_WUNLOCK(&V_tcbinfo);
425         CURVNET_RESTORE();
426         return;
427
428 dropit:
429         TCPSTAT_INC(tcps_keepdrops);
430         tp = tcp_drop(tp, ETIMEDOUT);
431
432 #ifdef TCPDEBUG
433         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
434                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
435                           PRU_SLOWTIMO);
436 #endif
437         if (tp != NULL)
438                 INP_WUNLOCK(tp->t_inpcb);
439         INP_INFO_WUNLOCK(&V_tcbinfo);
440         CURVNET_RESTORE();
441 }
442
443 void
444 tcp_timer_persist(void *xtp)
445 {
446         struct tcpcb *tp = xtp;
447         struct inpcb *inp;
448         CURVNET_SET(tp->t_vnet);
449 #ifdef TCPDEBUG
450         int ostate;
451
452         ostate = tp->t_state;
453 #endif
454         INP_INFO_WLOCK(&V_tcbinfo);
455         inp = tp->t_inpcb;
456         /*
457          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
458          * tear-down mean we need it as a work-around for races between
459          * timers and tcp_discardcb().
460          *
461          * KASSERT(inp != NULL, ("tcp_timer_persist: inp == NULL"));
462          */
463         if (inp == NULL) {
464                 tcp_timer_race++;
465                 INP_INFO_WUNLOCK(&V_tcbinfo);
466                 CURVNET_RESTORE();
467                 return;
468         }
469         INP_WLOCK(inp);
470         if (callout_pending(&tp->t_timers->tt_persist) ||
471             !callout_active(&tp->t_timers->tt_persist)) {
472                 INP_WUNLOCK(inp);
473                 INP_INFO_WUNLOCK(&V_tcbinfo);
474                 CURVNET_RESTORE();
475                 return;
476         }
477         callout_deactivate(&tp->t_timers->tt_persist);
478         if ((inp->inp_flags & INP_DROPPED) != 0) {
479                 INP_WUNLOCK(inp);
480                 INP_INFO_WUNLOCK(&V_tcbinfo);
481                 CURVNET_RESTORE();
482                 return;
483         }
484         /*
485          * Persistance timer into zero window.
486          * Force a byte to be output, if possible.
487          */
488         TCPSTAT_INC(tcps_persisttimeo);
489         /*
490          * Hack: if the peer is dead/unreachable, we do not
491          * time out if the window is closed.  After a full
492          * backoff, drop the connection if the idle time
493          * (no responses to probes) reaches the maximum
494          * backoff that we would use if retransmitting.
495          */
496         if (tp->t_rxtshift == TCP_MAXRXTSHIFT &&
497             (ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
498              ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
499                 TCPSTAT_INC(tcps_persistdrop);
500                 tp = tcp_drop(tp, ETIMEDOUT);
501                 goto out;
502         }
503         /*
504          * If the user has closed the socket then drop a persisting
505          * connection after a much reduced timeout.
506          */
507         if (tp->t_state > TCPS_CLOSE_WAIT &&
508             (ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
509                 TCPSTAT_INC(tcps_persistdrop);
510                 tp = tcp_drop(tp, ETIMEDOUT);
511                 goto out;
512         }
513         tcp_setpersist(tp);
514         tp->t_flags |= TF_FORCEDATA;
515         (void) tcp_output(tp);
516         tp->t_flags &= ~TF_FORCEDATA;
517
518 out:
519 #ifdef TCPDEBUG
520         if (tp != NULL && tp->t_inpcb->inp_socket->so_options & SO_DEBUG)
521                 tcp_trace(TA_USER, ostate, tp, NULL, NULL, PRU_SLOWTIMO);
522 #endif
523         if (tp != NULL)
524                 INP_WUNLOCK(inp);
525         INP_INFO_WUNLOCK(&V_tcbinfo);
526         CURVNET_RESTORE();
527 }
528
529 void
530 tcp_timer_rexmt(void * xtp)
531 {
532         struct tcpcb *tp = xtp;
533         CURVNET_SET(tp->t_vnet);
534         int rexmt;
535         int headlocked;
536         struct inpcb *inp;
537 #ifdef TCPDEBUG
538         int ostate;
539
540         ostate = tp->t_state;
541 #endif
542         INP_INFO_RLOCK(&V_tcbinfo);
543         inp = tp->t_inpcb;
544         /*
545          * XXXRW: While this assert is in fact correct, bugs in the tcpcb
546          * tear-down mean we need it as a work-around for races between
547          * timers and tcp_discardcb().
548          *
549          * KASSERT(inp != NULL, ("tcp_timer_rexmt: inp == NULL"));
550          */
551         if (inp == NULL) {
552                 tcp_timer_race++;
553                 INP_INFO_RUNLOCK(&V_tcbinfo);
554                 CURVNET_RESTORE();
555                 return;
556         }
557         INP_WLOCK(inp);
558         if (callout_pending(&tp->t_timers->tt_rexmt) ||
559             !callout_active(&tp->t_timers->tt_rexmt)) {
560                 INP_WUNLOCK(inp);
561                 INP_INFO_RUNLOCK(&V_tcbinfo);
562                 CURVNET_RESTORE();
563                 return;
564         }
565         callout_deactivate(&tp->t_timers->tt_rexmt);
566         if ((inp->inp_flags & INP_DROPPED) != 0) {
567                 INP_WUNLOCK(inp);
568                 INP_INFO_RUNLOCK(&V_tcbinfo);
569                 CURVNET_RESTORE();
570                 return;
571         }
572         tcp_free_sackholes(tp);
573         /*
574          * Retransmission timer went off.  Message has not
575          * been acked within retransmit interval.  Back off
576          * to a longer retransmit interval and retransmit one segment.
577          */
578         if (++tp->t_rxtshift > TCP_MAXRXTSHIFT) {
579                 tp->t_rxtshift = TCP_MAXRXTSHIFT;
580                 TCPSTAT_INC(tcps_timeoutdrop);
581                 in_pcbref(inp);
582                 INP_INFO_RUNLOCK(&V_tcbinfo);
583                 INP_WUNLOCK(inp);
584                 INP_INFO_WLOCK(&V_tcbinfo);
585                 INP_WLOCK(inp);
586                 if (in_pcbrele_wlocked(inp)) {
587                         INP_INFO_WUNLOCK(&V_tcbinfo);
588                         CURVNET_RESTORE();
589                         return;
590                 }
591                 if (inp->inp_flags & INP_DROPPED) {
592                         INP_WUNLOCK(inp);
593                         INP_INFO_WUNLOCK(&V_tcbinfo);
594                         CURVNET_RESTORE();
595                         return;
596                 }
597
598                 tp = tcp_drop(tp, tp->t_softerror ?
599                               tp->t_softerror : ETIMEDOUT);
600                 headlocked = 1;
601                 goto out;
602         }
603         INP_INFO_RUNLOCK(&V_tcbinfo);
604         headlocked = 0;
605         if (tp->t_state == TCPS_SYN_SENT) {
606                 /*
607                  * If the SYN was retransmitted, indicate CWND to be
608                  * limited to 1 segment in cc_conn_init().
609                  */
610                 tp->snd_cwnd = 1;
611         } else if (tp->t_rxtshift == 1) {
612                 /*
613                  * first retransmit; record ssthresh and cwnd so they can
614                  * be recovered if this turns out to be a "bad" retransmit.
615                  * A retransmit is considered "bad" if an ACK for this
616                  * segment is received within RTT/2 interval; the assumption
617                  * here is that the ACK was already in flight.  See
618                  * "On Estimating End-to-End Network Path Properties" by
619                  * Allman and Paxson for more details.
620                  */
621                 tp->snd_cwnd_prev = tp->snd_cwnd;
622                 tp->snd_ssthresh_prev = tp->snd_ssthresh;
623                 tp->snd_recover_prev = tp->snd_recover;
624                 if (IN_FASTRECOVERY(tp->t_flags))
625                         tp->t_flags |= TF_WASFRECOVERY;
626                 else
627                         tp->t_flags &= ~TF_WASFRECOVERY;
628                 if (IN_CONGRECOVERY(tp->t_flags))
629                         tp->t_flags |= TF_WASCRECOVERY;
630                 else
631                         tp->t_flags &= ~TF_WASCRECOVERY;
632                 tp->t_badrxtwin = ticks + (tp->t_srtt >> (TCP_RTT_SHIFT + 1));
633                 tp->t_flags |= TF_PREVVALID;
634         } else
635                 tp->t_flags &= ~TF_PREVVALID;
636         TCPSTAT_INC(tcps_rexmttimeo);
637         if (tp->t_state == TCPS_SYN_SENT)
638                 rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift];
639         else
640                 rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift];
641         TCPT_RANGESET(tp->t_rxtcur, rexmt,
642                       tp->t_rttmin, TCPTV_REXMTMAX);
643         /*
644          * Disable RFC1323 and SACK if we haven't got any response to
645          * our third SYN to work-around some broken terminal servers
646          * (most of which have hopefully been retired) that have bad VJ
647          * header compression code which trashes TCP segments containing
648          * unknown-to-them TCP options.
649          */
650         if (tcp_rexmit_drop_options && (tp->t_state == TCPS_SYN_SENT) &&
651             (tp->t_rxtshift == 3))
652                 tp->t_flags &= ~(TF_REQ_SCALE|TF_REQ_TSTMP|TF_SACK_PERMIT);
653         /*
654          * If we backed off this far, our srtt estimate is probably bogus.
655          * Clobber it so we'll take the next rtt measurement as our srtt;
656          * move the current srtt into rttvar to keep the current
657          * retransmit times until then.
658          */
659         if (tp->t_rxtshift > TCP_MAXRXTSHIFT / 4) {
660 #ifdef INET6
661                 if ((tp->t_inpcb->inp_vflag & INP_IPV6) != 0)
662                         in6_losing(tp->t_inpcb);
663 #endif
664                 tp->t_rttvar += (tp->t_srtt >> TCP_RTT_SHIFT);
665                 tp->t_srtt = 0;
666         }
667         tp->snd_nxt = tp->snd_una;
668         tp->snd_recover = tp->snd_max;
669         /*
670          * Force a segment to be sent.
671          */
672         tp->t_flags |= TF_ACKNOW;
673         /*
674          * If timing a segment in this window, stop the timer.
675          */
676         tp->t_rtttime = 0;
677
678         cc_cong_signal(tp, NULL, CC_RTO);
679
680         (void) tcp_output(tp);
681
682 out:
683 #ifdef TCPDEBUG
684         if (tp != NULL && (tp->t_inpcb->inp_socket->so_options & SO_DEBUG))
685                 tcp_trace(TA_USER, ostate, tp, (void *)0, (struct tcphdr *)0,
686                           PRU_SLOWTIMO);
687 #endif
688         if (tp != NULL)
689                 INP_WUNLOCK(inp);
690         if (headlocked)
691                 INP_INFO_WUNLOCK(&V_tcbinfo);
692         CURVNET_RESTORE();
693 }
694
695 void
696 tcp_timer_activate(struct tcpcb *tp, int timer_type, u_int delta)
697 {
698         struct callout *t_callout;
699         void *f_callout;
700         struct inpcb *inp = tp->t_inpcb;
701         int cpu = inp_to_cpuid(inp);
702
703 #ifdef TCP_OFFLOAD
704         if (tp->t_flags & TF_TOE)
705                 return;
706 #endif
707
708         switch (timer_type) {
709                 case TT_DELACK:
710                         t_callout = &tp->t_timers->tt_delack;
711                         f_callout = tcp_timer_delack;
712                         break;
713                 case TT_REXMT:
714                         t_callout = &tp->t_timers->tt_rexmt;
715                         f_callout = tcp_timer_rexmt;
716                         break;
717                 case TT_PERSIST:
718                         t_callout = &tp->t_timers->tt_persist;
719                         f_callout = tcp_timer_persist;
720                         break;
721                 case TT_KEEP:
722                         t_callout = &tp->t_timers->tt_keep;
723                         f_callout = tcp_timer_keep;
724                         break;
725                 case TT_2MSL:
726                         t_callout = &tp->t_timers->tt_2msl;
727                         f_callout = tcp_timer_2msl;
728                         break;
729                 default:
730                         panic("bad timer_type");
731                 }
732         if (delta == 0) {
733                 callout_stop(t_callout);
734         } else {
735                 callout_reset_on(t_callout, delta, f_callout, tp, cpu);
736         }
737 }
738
739 int
740 tcp_timer_active(struct tcpcb *tp, int timer_type)
741 {
742         struct callout *t_callout;
743
744         switch (timer_type) {
745                 case TT_DELACK:
746                         t_callout = &tp->t_timers->tt_delack;
747                         break;
748                 case TT_REXMT:
749                         t_callout = &tp->t_timers->tt_rexmt;
750                         break;
751                 case TT_PERSIST:
752                         t_callout = &tp->t_timers->tt_persist;
753                         break;
754                 case TT_KEEP:
755                         t_callout = &tp->t_timers->tt_keep;
756                         break;
757                 case TT_2MSL:
758                         t_callout = &tp->t_timers->tt_2msl;
759                         break;
760                 default:
761                         panic("bad timer_type");
762                 }
763         return callout_active(t_callout);
764 }
765
766 #define ticks_to_msecs(t)       (1000*(t) / hz)
767
768 void
769 tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
770     struct xtcp_timer *xtimer)
771 {
772         sbintime_t now;
773
774         bzero(xtimer, sizeof(*xtimer));
775         if (timer == NULL)
776                 return;
777         now = getsbinuptime();
778         if (callout_active(&timer->tt_delack))
779                 xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
780         if (callout_active(&timer->tt_rexmt))
781                 xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
782         if (callout_active(&timer->tt_persist))
783                 xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
784         if (callout_active(&timer->tt_keep))
785                 xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
786         if (callout_active(&timer->tt_2msl))
787                 xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
788         xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
789 }