]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/tcp_usrreq.c
zfs: merge openzfs/zfs@2163cde45
[FreeBSD/FreeBSD.git] / sys / netinet / tcp_usrreq.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1988, 1993
5  *      The Regents of the University of California.
6  * Copyright (c) 2006-2007 Robert N. M. Watson
7  * Copyright (c) 2010-2011 Juniper Networks, Inc.
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Robert N. M. Watson under
11  * contract to Juniper Networks, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *      From: @(#)tcp_usrreq.c  8.2 (Berkeley) 1/3/94
38  */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42
43 #include "opt_ddb.h"
44 #include "opt_inet.h"
45 #include "opt_inet6.h"
46 #include "opt_ipsec.h"
47 #include "opt_kern_tls.h"
48 #include "opt_tcpdebug.h"
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/arb.h>
53 #include <sys/limits.h>
54 #include <sys/malloc.h>
55 #include <sys/refcount.h>
56 #include <sys/kernel.h>
57 #include <sys/ktls.h>
58 #include <sys/qmath.h>
59 #include <sys/sysctl.h>
60 #include <sys/mbuf.h>
61 #ifdef INET6
62 #include <sys/domain.h>
63 #endif /* INET6 */
64 #include <sys/socket.h>
65 #include <sys/socketvar.h>
66 #include <sys/protosw.h>
67 #include <sys/proc.h>
68 #include <sys/jail.h>
69 #include <sys/stats.h>
70
71 #ifdef DDB
72 #include <ddb/ddb.h>
73 #endif
74
75 #include <net/if.h>
76 #include <net/if_var.h>
77 #include <net/route.h>
78 #include <net/vnet.h>
79
80 #include <netinet/in.h>
81 #include <netinet/in_kdtrace.h>
82 #include <netinet/in_pcb.h>
83 #include <netinet/in_systm.h>
84 #include <netinet/in_var.h>
85 #include <netinet/ip.h>
86 #include <netinet/ip_var.h>
87 #ifdef INET6
88 #include <netinet/ip6.h>
89 #include <netinet6/in6_pcb.h>
90 #include <netinet6/ip6_var.h>
91 #include <netinet6/scope6_var.h>
92 #endif
93 #include <netinet/tcp.h>
94 #include <netinet/tcp_fsm.h>
95 #include <netinet/tcp_seq.h>
96 #include <netinet/tcp_timer.h>
97 #include <netinet/tcp_var.h>
98 #include <netinet/tcp_log_buf.h>
99 #include <netinet/tcpip.h>
100 #include <netinet/cc/cc.h>
101 #include <netinet/tcp_fastopen.h>
102 #include <netinet/tcp_hpts.h>
103 #ifdef TCPPCAP
104 #include <netinet/tcp_pcap.h>
105 #endif
106 #include <netinet/tcp_debug.h>
107 #ifdef TCP_OFFLOAD
108 #include <netinet/tcp_offload.h>
109 #endif
110 #include <netipsec/ipsec_support.h>
111
112 #include <vm/vm.h>
113 #include <vm/vm_param.h>
114 #include <vm/pmap.h>
115 #include <vm/vm_extern.h>
116 #include <vm/vm_map.h>
117 #include <vm/vm_page.h>
118
119 /*
120  * TCP protocol interface to socket abstraction.
121  */
122 #ifdef INET
123 static int      tcp_connect(struct tcpcb *, struct sockaddr *,
124                     struct thread *td);
125 #endif /* INET */
126 #ifdef INET6
127 static int      tcp6_connect(struct tcpcb *, struct sockaddr *,
128                     struct thread *td);
129 #endif /* INET6 */
130 static void     tcp_disconnect(struct tcpcb *);
131 static void     tcp_usrclosed(struct tcpcb *);
132 static void     tcp_fill_info(struct tcpcb *, struct tcp_info *);
133
134 static int      tcp_pru_options_support(struct tcpcb *tp, int flags);
135
136 #ifdef TCPDEBUG
137 #define TCPDEBUG0       int ostate = 0
138 #define TCPDEBUG1()     ostate = tp ? tp->t_state : 0
139 #define TCPDEBUG2(req)  if (tp && (so->so_options & SO_DEBUG)) \
140                                 tcp_trace(TA_USER, ostate, tp, 0, 0, req)
141 #else
142 #define TCPDEBUG0
143 #define TCPDEBUG1()
144 #define TCPDEBUG2(req)
145 #endif
146
147 /*
148  * tcp_require_unique port requires a globally-unique source port for each
149  * outgoing connection.  The default is to require the 4-tuple to be unique.
150  */
151 VNET_DEFINE(int, tcp_require_unique_port) = 0;
152 SYSCTL_INT(_net_inet_tcp, OID_AUTO, require_unique_port,
153     CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_require_unique_port), 0,
154     "Require globally-unique ephemeral port for outgoing connections");
155 #define V_tcp_require_unique_port       VNET(tcp_require_unique_port)
156
157 /*
158  * TCP attaches to socket via pru_attach(), reserving space,
159  * and an internet control block.
160  */
161 static int
162 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
163 {
164         struct inpcb *inp;
165         struct tcpcb *tp = NULL;
166         int error;
167         TCPDEBUG0;
168
169         inp = sotoinpcb(so);
170         KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
171         TCPDEBUG1();
172
173         error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
174         if (error)
175                 goto out;
176
177         so->so_rcv.sb_flags |= SB_AUTOSIZE;
178         so->so_snd.sb_flags |= SB_AUTOSIZE;
179         error = in_pcballoc(so, &V_tcbinfo);
180         if (error)
181                 goto out;
182         inp = sotoinpcb(so);
183         tp = tcp_newtcpcb(inp);
184         if (tp == NULL) {
185                 error = ENOBUFS;
186                 in_pcbdetach(inp);
187                 in_pcbfree(inp);
188                 goto out;
189         }
190         tp->t_state = TCPS_CLOSED;
191         INP_WUNLOCK(inp);
192         TCPSTATES_INC(TCPS_CLOSED);
193 out:
194         TCPDEBUG2(PRU_ATTACH);
195         TCP_PROBE2(debug__user, tp, PRU_ATTACH);
196         return (error);
197 }
198
199 /*
200  * tcp_usr_detach is called when the socket layer loses its final reference
201  * to the socket, be it a file descriptor reference, a reference from TCP,
202  * etc.  At this point, there is only one case in which we will keep around
203  * inpcb state: time wait.
204  */
205 static void
206 tcp_usr_detach(struct socket *so)
207 {
208         struct inpcb *inp;
209         struct tcpcb *tp;
210
211         inp = sotoinpcb(so);
212         KASSERT(inp != NULL, ("%s: inp == NULL", __func__));
213         INP_WLOCK(inp);
214         KASSERT(so->so_pcb == inp && inp->inp_socket == so,
215                 ("%s: socket %p inp %p mismatch", __func__, so, inp));
216
217         tp = intotcpcb(inp);
218
219         KASSERT(inp->inp_flags & INP_DROPPED ||
220             tp->t_state < TCPS_SYN_SENT,
221             ("%s: inp %p not dropped or embryonic", __func__, inp));
222
223         tcp_discardcb(tp);
224         in_pcbdetach(inp);
225         in_pcbfree(inp);
226 }
227
228 #ifdef INET
229 /*
230  * Give the socket an address.
231  */
232 static int
233 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
234 {
235         int error = 0;
236         struct inpcb *inp;
237 #ifdef KDTRACE_HOOKS
238         struct tcpcb *tp = NULL;
239 #endif
240         struct sockaddr_in *sinp;
241
242         sinp = (struct sockaddr_in *)nam;
243         if (nam->sa_family != AF_INET) {
244                 /*
245                  * Preserve compatibility with old programs.
246                  */
247                 if (nam->sa_family != AF_UNSPEC ||
248                     nam->sa_len < offsetof(struct sockaddr_in, sin_zero) ||
249                     sinp->sin_addr.s_addr != INADDR_ANY)
250                         return (EAFNOSUPPORT);
251                 nam->sa_family = AF_INET;
252         }
253         if (nam->sa_len != sizeof(*sinp))
254                 return (EINVAL);
255
256         /*
257          * Must check for multicast addresses and disallow binding
258          * to them.
259          */
260         if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
261                 return (EAFNOSUPPORT);
262
263         TCPDEBUG0;
264         inp = sotoinpcb(so);
265         KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
266         INP_WLOCK(inp);
267         if (inp->inp_flags & INP_DROPPED) {
268                 error = EINVAL;
269                 goto out;
270         }
271 #ifdef KDTRACE_HOOKS
272         tp = intotcpcb(inp);
273 #endif
274         TCPDEBUG1();
275         INP_HASH_WLOCK(&V_tcbinfo);
276         error = in_pcbbind(inp, nam, td->td_ucred);
277         INP_HASH_WUNLOCK(&V_tcbinfo);
278 out:
279         TCPDEBUG2(PRU_BIND);
280         TCP_PROBE2(debug__user, tp, PRU_BIND);
281         INP_WUNLOCK(inp);
282
283         return (error);
284 }
285 #endif /* INET */
286
287 #ifdef INET6
288 static int
289 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
290 {
291         int error = 0;
292         struct inpcb *inp;
293 #ifdef KDTRACE_HOOKS
294         struct tcpcb *tp = NULL;
295 #endif
296         struct sockaddr_in6 *sin6;
297         u_char vflagsav;
298
299         sin6 = (struct sockaddr_in6 *)nam;
300         if (nam->sa_family != AF_INET6)
301                 return (EAFNOSUPPORT);
302         if (nam->sa_len != sizeof(*sin6))
303                 return (EINVAL);
304
305         /*
306          * Must check for multicast addresses and disallow binding
307          * to them.
308          */
309         if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
310                 return (EAFNOSUPPORT);
311
312         TCPDEBUG0;
313         inp = sotoinpcb(so);
314         KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
315         INP_WLOCK(inp);
316         vflagsav = inp->inp_vflag;
317         if (inp->inp_flags & INP_DROPPED) {
318                 error = EINVAL;
319                 goto out;
320         }
321 #ifdef KDTRACE_HOOKS
322         tp = intotcpcb(inp);
323 #endif
324         TCPDEBUG1();
325         INP_HASH_WLOCK(&V_tcbinfo);
326         inp->inp_vflag &= ~INP_IPV4;
327         inp->inp_vflag |= INP_IPV6;
328 #ifdef INET
329         if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
330                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
331                         inp->inp_vflag |= INP_IPV4;
332                 else if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
333                         struct sockaddr_in sin;
334
335                         in6_sin6_2_sin(&sin, sin6);
336                         if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
337                                 error = EAFNOSUPPORT;
338                                 INP_HASH_WUNLOCK(&V_tcbinfo);
339                                 goto out;
340                         }
341                         inp->inp_vflag |= INP_IPV4;
342                         inp->inp_vflag &= ~INP_IPV6;
343                         error = in_pcbbind(inp, (struct sockaddr *)&sin,
344                             td->td_ucred);
345                         INP_HASH_WUNLOCK(&V_tcbinfo);
346                         goto out;
347                 }
348         }
349 #endif
350         error = in6_pcbbind(inp, nam, td->td_ucred);
351         INP_HASH_WUNLOCK(&V_tcbinfo);
352 out:
353         if (error != 0)
354                 inp->inp_vflag = vflagsav;
355         TCPDEBUG2(PRU_BIND);
356         TCP_PROBE2(debug__user, tp, PRU_BIND);
357         INP_WUNLOCK(inp);
358         return (error);
359 }
360 #endif /* INET6 */
361
362 #ifdef INET
363 /*
364  * Prepare to accept connections.
365  */
366 static int
367 tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
368 {
369         int error = 0;
370         struct inpcb *inp;
371         struct tcpcb *tp = NULL;
372
373         TCPDEBUG0;
374         inp = sotoinpcb(so);
375         KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
376         INP_WLOCK(inp);
377         if (inp->inp_flags & INP_DROPPED) {
378                 error = EINVAL;
379                 goto out;
380         }
381         tp = intotcpcb(inp);
382         TCPDEBUG1();
383         SOCK_LOCK(so);
384         error = solisten_proto_check(so);
385         if (error != 0) {
386                 SOCK_UNLOCK(so);
387                 goto out;
388         }
389         if (inp->inp_lport == 0) {
390                 INP_HASH_WLOCK(&V_tcbinfo);
391                 error = in_pcbbind(inp, NULL, td->td_ucred);
392                 INP_HASH_WUNLOCK(&V_tcbinfo);
393         }
394         if (error == 0) {
395                 tcp_state_change(tp, TCPS_LISTEN);
396                 solisten_proto(so, backlog);
397 #ifdef TCP_OFFLOAD
398                 if ((so->so_options & SO_NO_OFFLOAD) == 0)
399                         tcp_offload_listen_start(tp);
400 #endif
401         } else {
402                 solisten_proto_abort(so);
403         }
404         SOCK_UNLOCK(so);
405
406         if (IS_FASTOPEN(tp->t_flags))
407                 tp->t_tfo_pending = tcp_fastopen_alloc_counter();
408
409 out:
410         TCPDEBUG2(PRU_LISTEN);
411         TCP_PROBE2(debug__user, tp, PRU_LISTEN);
412         INP_WUNLOCK(inp);
413         return (error);
414 }
415 #endif /* INET */
416
417 #ifdef INET6
418 static int
419 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
420 {
421         int error = 0;
422         struct inpcb *inp;
423         struct tcpcb *tp = NULL;
424         u_char vflagsav;
425
426         TCPDEBUG0;
427         inp = sotoinpcb(so);
428         KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
429         INP_WLOCK(inp);
430         if (inp->inp_flags & INP_DROPPED) {
431                 error = EINVAL;
432                 goto out;
433         }
434         vflagsav = inp->inp_vflag;
435         tp = intotcpcb(inp);
436         TCPDEBUG1();
437         SOCK_LOCK(so);
438         error = solisten_proto_check(so);
439         if (error != 0) {
440                 SOCK_UNLOCK(so);
441                 goto out;
442         }
443         INP_HASH_WLOCK(&V_tcbinfo);
444         if (inp->inp_lport == 0) {
445                 inp->inp_vflag &= ~INP_IPV4;
446                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
447                         inp->inp_vflag |= INP_IPV4;
448                 error = in6_pcbbind(inp, NULL, td->td_ucred);
449         }
450         INP_HASH_WUNLOCK(&V_tcbinfo);
451         if (error == 0) {
452                 tcp_state_change(tp, TCPS_LISTEN);
453                 solisten_proto(so, backlog);
454 #ifdef TCP_OFFLOAD
455                 if ((so->so_options & SO_NO_OFFLOAD) == 0)
456                         tcp_offload_listen_start(tp);
457 #endif
458         } else {
459                 solisten_proto_abort(so);
460         }
461         SOCK_UNLOCK(so);
462
463         if (IS_FASTOPEN(tp->t_flags))
464                 tp->t_tfo_pending = tcp_fastopen_alloc_counter();
465
466         if (error != 0)
467                 inp->inp_vflag = vflagsav;
468
469 out:
470         TCPDEBUG2(PRU_LISTEN);
471         TCP_PROBE2(debug__user, tp, PRU_LISTEN);
472         INP_WUNLOCK(inp);
473         return (error);
474 }
475 #endif /* INET6 */
476
477 #ifdef INET
478 /*
479  * Initiate connection to peer.
480  * Create a template for use in transmissions on this connection.
481  * Enter SYN_SENT state, and mark socket as connecting.
482  * Start keep-alive timer, and seed output sequence space.
483  * Send initial segment on connection.
484  */
485 static int
486 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
487 {
488         struct epoch_tracker et;
489         int error = 0;
490         struct inpcb *inp;
491         struct tcpcb *tp = NULL;
492         struct sockaddr_in *sinp;
493
494         sinp = (struct sockaddr_in *)nam;
495         if (nam->sa_family != AF_INET)
496                 return (EAFNOSUPPORT);
497         if (nam->sa_len != sizeof (*sinp))
498                 return (EINVAL);
499
500         /*
501          * Must disallow TCP ``connections'' to multicast addresses.
502          */
503         if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
504                 return (EAFNOSUPPORT);
505         if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST)
506                 return (EACCES);
507         if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
508                 return (error);
509
510         TCPDEBUG0;
511         inp = sotoinpcb(so);
512         KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
513         INP_WLOCK(inp);
514         if (inp->inp_flags & INP_DROPPED) {
515                 error = ECONNREFUSED;
516                 goto out;
517         }
518         if (SOLISTENING(so)) {
519                 error = EOPNOTSUPP;
520                 goto out;
521         }
522         tp = intotcpcb(inp);
523         TCPDEBUG1();
524         NET_EPOCH_ENTER(et);
525         if ((error = tcp_connect(tp, nam, td)) != 0)
526                 goto out_in_epoch;
527 #ifdef TCP_OFFLOAD
528         if (registered_toedevs > 0 &&
529             (so->so_options & SO_NO_OFFLOAD) == 0 &&
530             (error = tcp_offload_connect(so, nam)) == 0)
531                 goto out_in_epoch;
532 #endif
533         tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
534         error = tcp_output(tp);
535         KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()"
536             ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error));
537 out_in_epoch:
538         NET_EPOCH_EXIT(et);
539 out:
540         TCPDEBUG2(PRU_CONNECT);
541         TCP_PROBE2(debug__user, tp, PRU_CONNECT);
542         INP_WUNLOCK(inp);
543         return (error);
544 }
545 #endif /* INET */
546
547 #ifdef INET6
548 static int
549 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
550 {
551         struct epoch_tracker et;
552         int error = 0;
553         struct inpcb *inp;
554         struct tcpcb *tp = NULL;
555         struct sockaddr_in6 *sin6;
556         u_int8_t incflagsav;
557         u_char vflagsav;
558
559         TCPDEBUG0;
560
561         sin6 = (struct sockaddr_in6 *)nam;
562         if (nam->sa_family != AF_INET6)
563                 return (EAFNOSUPPORT);
564         if (nam->sa_len != sizeof (*sin6))
565                 return (EINVAL);
566
567         /*
568          * Must disallow TCP ``connections'' to multicast addresses.
569          */
570         if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
571                 return (EAFNOSUPPORT);
572
573         inp = sotoinpcb(so);
574         KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
575         INP_WLOCK(inp);
576         vflagsav = inp->inp_vflag;
577         incflagsav = inp->inp_inc.inc_flags;
578         if (inp->inp_flags & INP_DROPPED) {
579                 error = ECONNREFUSED;
580                 goto out;
581         }
582         if (SOLISTENING(so)) {
583                 error = EINVAL;
584                 goto out;
585         }
586         tp = intotcpcb(inp);
587         TCPDEBUG1();
588 #ifdef INET
589         /*
590          * XXXRW: Some confusion: V4/V6 flags relate to binding, and
591          * therefore probably require the hash lock, which isn't held here.
592          * Is this a significant problem?
593          */
594         if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
595                 struct sockaddr_in sin;
596
597                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
598                         error = EINVAL;
599                         goto out;
600                 }
601                 if ((inp->inp_vflag & INP_IPV4) == 0) {
602                         error = EAFNOSUPPORT;
603                         goto out;
604                 }
605
606                 in6_sin6_2_sin(&sin, sin6);
607                 if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
608                         error = EAFNOSUPPORT;
609                         goto out;
610                 }
611                 if (ntohl(sin.sin_addr.s_addr) == INADDR_BROADCAST) {
612                         error = EACCES;
613                         goto out;
614                 }
615                 if ((error = prison_remote_ip4(td->td_ucred,
616                     &sin.sin_addr)) != 0)
617                         goto out;
618                 inp->inp_vflag |= INP_IPV4;
619                 inp->inp_vflag &= ~INP_IPV6;
620                 NET_EPOCH_ENTER(et);
621                 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
622                         goto out_in_epoch;
623 #ifdef TCP_OFFLOAD
624                 if (registered_toedevs > 0 &&
625                     (so->so_options & SO_NO_OFFLOAD) == 0 &&
626                     (error = tcp_offload_connect(so, nam)) == 0)
627                         goto out_in_epoch;
628 #endif
629                 error = tcp_output(tp);
630                 goto out_in_epoch;
631         } else {
632                 if ((inp->inp_vflag & INP_IPV6) == 0) {
633                         error = EAFNOSUPPORT;
634                         goto out;
635                 }
636         }
637 #endif
638         if ((error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr)) != 0)
639                 goto out;
640         inp->inp_vflag &= ~INP_IPV4;
641         inp->inp_vflag |= INP_IPV6;
642         inp->inp_inc.inc_flags |= INC_ISIPV6;
643         NET_EPOCH_ENTER(et);
644         if ((error = tcp6_connect(tp, nam, td)) != 0)
645                 goto out_in_epoch;
646 #ifdef TCP_OFFLOAD
647         if (registered_toedevs > 0 &&
648             (so->so_options & SO_NO_OFFLOAD) == 0 &&
649             (error = tcp_offload_connect(so, nam)) == 0)
650                 goto out_in_epoch;
651 #endif
652         tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
653         error = tcp_output(tp);
654 out_in_epoch:
655         NET_EPOCH_EXIT(et);
656 out:
657         KASSERT(error >= 0, ("TCP stack %s requested tcp_drop(%p) at connect()"
658             ", error code %d", tp->t_fb->tfb_tcp_block_name, tp, -error));
659         /*
660          * If the implicit bind in the connect call fails, restore
661          * the flags we modified.
662          */
663         if (error != 0 && inp->inp_lport == 0) {
664                 inp->inp_vflag = vflagsav;
665                 inp->inp_inc.inc_flags = incflagsav;
666         }
667
668         TCPDEBUG2(PRU_CONNECT);
669         TCP_PROBE2(debug__user, tp, PRU_CONNECT);
670         INP_WUNLOCK(inp);
671         return (error);
672 }
673 #endif /* INET6 */
674
675 /*
676  * Initiate disconnect from peer.
677  * If connection never passed embryonic stage, just drop;
678  * else if don't need to let data drain, then can just drop anyways,
679  * else have to begin TCP shutdown process: mark socket disconnecting,
680  * drain unread data, state switch to reflect user close, and
681  * send segment (e.g. FIN) to peer.  Socket will be really disconnected
682  * when peer sends FIN and acks ours.
683  *
684  * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
685  */
686 static int
687 tcp_usr_disconnect(struct socket *so)
688 {
689         struct inpcb *inp;
690         struct tcpcb *tp = NULL;
691         struct epoch_tracker et;
692         int error = 0;
693
694         TCPDEBUG0;
695         NET_EPOCH_ENTER(et);
696         inp = sotoinpcb(so);
697         KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
698         INP_WLOCK(inp);
699         if (inp->inp_flags & INP_DROPPED) {
700                 error = ECONNRESET;
701                 goto out;
702         }
703         tp = intotcpcb(inp);
704         TCPDEBUG1();
705         tcp_disconnect(tp);
706 out:
707         TCPDEBUG2(PRU_DISCONNECT);
708         TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
709         INP_WUNLOCK(inp);
710         NET_EPOCH_EXIT(et);
711         return (error);
712 }
713
714 #ifdef INET
715 /*
716  * Accept a connection.  Essentially all the work is done at higher levels;
717  * just return the address of the peer, storing through addr.
718  */
719 static int
720 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
721 {
722         int error = 0;
723         struct inpcb *inp = NULL;
724 #ifdef KDTRACE_HOOKS
725         struct tcpcb *tp = NULL;
726 #endif
727         struct in_addr addr;
728         in_port_t port = 0;
729         TCPDEBUG0;
730
731         if (so->so_state & SS_ISDISCONNECTED)
732                 return (ECONNABORTED);
733
734         inp = sotoinpcb(so);
735         KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
736         INP_WLOCK(inp);
737         if (inp->inp_flags & INP_DROPPED) {
738                 error = ECONNABORTED;
739                 goto out;
740         }
741 #ifdef KDTRACE_HOOKS
742         tp = intotcpcb(inp);
743 #endif
744         TCPDEBUG1();
745
746         /*
747          * We inline in_getpeeraddr and COMMON_END here, so that we can
748          * copy the data of interest and defer the malloc until after we
749          * release the lock.
750          */
751         port = inp->inp_fport;
752         addr = inp->inp_faddr;
753
754 out:
755         TCPDEBUG2(PRU_ACCEPT);
756         TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
757         INP_WUNLOCK(inp);
758         if (error == 0)
759                 *nam = in_sockaddr(port, &addr);
760         return error;
761 }
762 #endif /* INET */
763
764 #ifdef INET6
765 static int
766 tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
767 {
768         struct inpcb *inp = NULL;
769         int error = 0;
770 #ifdef KDTRACE_HOOKS
771         struct tcpcb *tp = NULL;
772 #endif
773         struct in_addr addr;
774         struct in6_addr addr6;
775         struct epoch_tracker et;
776         in_port_t port = 0;
777         int v4 = 0;
778         TCPDEBUG0;
779
780         if (so->so_state & SS_ISDISCONNECTED)
781                 return (ECONNABORTED);
782
783         inp = sotoinpcb(so);
784         KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
785         NET_EPOCH_ENTER(et);
786         INP_WLOCK(inp);
787         if (inp->inp_flags & INP_DROPPED) {
788                 error = ECONNABORTED;
789                 goto out;
790         }
791 #ifdef KDTRACE_HOOKS
792         tp = intotcpcb(inp);
793 #endif
794         TCPDEBUG1();
795
796         /*
797          * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
798          * copy the data of interest and defer the malloc until after we
799          * release the lock.
800          */
801         if (inp->inp_vflag & INP_IPV4) {
802                 v4 = 1;
803                 port = inp->inp_fport;
804                 addr = inp->inp_faddr;
805         } else {
806                 port = inp->inp_fport;
807                 addr6 = inp->in6p_faddr;
808         }
809
810 out:
811         TCPDEBUG2(PRU_ACCEPT);
812         TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
813         INP_WUNLOCK(inp);
814         NET_EPOCH_EXIT(et);
815         if (error == 0) {
816                 if (v4)
817                         *nam = in6_v4mapsin6_sockaddr(port, &addr);
818                 else
819                         *nam = in6_sockaddr(port, &addr6);
820         }
821         return error;
822 }
823 #endif /* INET6 */
824
825 /*
826  * Mark the connection as being incapable of further output.
827  */
828 static int
829 tcp_usr_shutdown(struct socket *so)
830 {
831         int error = 0;
832         struct inpcb *inp;
833         struct tcpcb *tp = NULL;
834         struct epoch_tracker et;
835
836         TCPDEBUG0;
837         inp = sotoinpcb(so);
838         KASSERT(inp != NULL, ("inp == NULL"));
839         INP_WLOCK(inp);
840         if (inp->inp_flags & INP_DROPPED) {
841                 INP_WUNLOCK(inp);
842                 return (ECONNRESET);
843         }
844         tp = intotcpcb(inp);
845         NET_EPOCH_ENTER(et);
846         TCPDEBUG1();
847         socantsendmore(so);
848         tcp_usrclosed(tp);
849         if (!(inp->inp_flags & INP_DROPPED))
850                 error = tcp_output_nodrop(tp);
851         TCPDEBUG2(PRU_SHUTDOWN);
852         TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
853         error = tcp_unlock_or_drop(tp, error);
854         NET_EPOCH_EXIT(et);
855
856         return (error);
857 }
858
859 /*
860  * After a receive, possibly send window update to peer.
861  */
862 static int
863 tcp_usr_rcvd(struct socket *so, int flags)
864 {
865         struct epoch_tracker et;
866         struct inpcb *inp;
867         struct tcpcb *tp = NULL;
868         int outrv = 0, error = 0;
869
870         TCPDEBUG0;
871         inp = sotoinpcb(so);
872         KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
873         INP_WLOCK(inp);
874         if (inp->inp_flags & INP_DROPPED) {
875                 INP_WUNLOCK(inp);
876                 return (ECONNRESET);
877         }
878         tp = intotcpcb(inp);
879         NET_EPOCH_ENTER(et);
880         TCPDEBUG1();
881         /*
882          * For passively-created TFO connections, don't attempt a window
883          * update while still in SYN_RECEIVED as this may trigger an early
884          * SYN|ACK.  It is preferable to have the SYN|ACK be sent along with
885          * application response data, or failing that, when the DELACK timer
886          * expires.
887          */
888         if (IS_FASTOPEN(tp->t_flags) &&
889             (tp->t_state == TCPS_SYN_RECEIVED))
890                 goto out;
891 #ifdef TCP_OFFLOAD
892         if (tp->t_flags & TF_TOE)
893                 tcp_offload_rcvd(tp);
894         else
895 #endif
896                 outrv = tcp_output_nodrop(tp);
897 out:
898         TCPDEBUG2(PRU_RCVD);
899         TCP_PROBE2(debug__user, tp, PRU_RCVD);
900         (void) tcp_unlock_or_drop(tp, outrv);
901         NET_EPOCH_EXIT(et);
902         return (error);
903 }
904
905 /*
906  * Do a send by putting data in output queue and updating urgent
907  * marker if URG set.  Possibly send more data.  Unlike the other
908  * pru_*() routines, the mbuf chains are our responsibility.  We
909  * must either enqueue them or free them.  The other pru_* routines
910  * generally are caller-frees.
911  */
912 static int
913 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
914     struct sockaddr *nam, struct mbuf *control, struct thread *td)
915 {
916         struct epoch_tracker et;
917         int error = 0;
918         struct inpcb *inp;
919         struct tcpcb *tp = NULL;
920 #ifdef INET
921 #ifdef INET6
922         struct sockaddr_in sin;
923 #endif
924         struct sockaddr_in *sinp;
925 #endif
926 #ifdef INET6
927         int isipv6;
928 #endif
929         u_int8_t incflagsav;
930         u_char vflagsav;
931         bool restoreflags;
932         TCPDEBUG0;
933
934         if (control != NULL) {
935                 /* TCP doesn't do control messages (rights, creds, etc) */
936                 if (control->m_len) {
937                         m_freem(control);
938                         return (EINVAL);
939                 }
940                 m_freem(control);       /* empty control, just free it */
941         }
942
943         inp = sotoinpcb(so);
944         KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
945         INP_WLOCK(inp);
946         if (inp->inp_flags & INP_DROPPED) {
947                 if (m != NULL && (flags & PRUS_NOTREADY) == 0)
948                         m_freem(m);
949                 INP_WUNLOCK(inp);
950                 return (ECONNRESET);
951         }
952
953         vflagsav = inp->inp_vflag;
954         incflagsav = inp->inp_inc.inc_flags;
955         restoreflags = false;
956         tp = intotcpcb(inp);
957
958         NET_EPOCH_ENTER(et);
959         if ((flags & PRUS_OOB) != 0 &&
960             (error = tcp_pru_options_support(tp, PRUS_OOB)) != 0)
961                 goto out;
962
963         TCPDEBUG1();
964         if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
965                 if (tp->t_state == TCPS_LISTEN) {
966                         error = EINVAL;
967                         goto out;
968                 }
969                 switch (nam->sa_family) {
970 #ifdef INET
971                 case AF_INET:
972                         sinp = (struct sockaddr_in *)nam;
973                         if (sinp->sin_len != sizeof(struct sockaddr_in)) {
974                                 error = EINVAL;
975                                 goto out;
976                         }
977                         if ((inp->inp_vflag & INP_IPV6) != 0) {
978                                 error = EAFNOSUPPORT;
979                                 goto out;
980                         }
981                         if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
982                                 error = EAFNOSUPPORT;
983                                 goto out;
984                         }
985                         if (ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) {
986                                 error = EACCES;
987                                 goto out;
988                         }
989                         if ((error = prison_remote_ip4(td->td_ucred,
990                             &sinp->sin_addr)))
991                                 goto out;
992 #ifdef INET6
993                         isipv6 = 0;
994 #endif
995                         break;
996 #endif /* INET */
997 #ifdef INET6
998                 case AF_INET6:
999                 {
1000                         struct sockaddr_in6 *sin6;
1001
1002                         sin6 = (struct sockaddr_in6 *)nam;
1003                         if (sin6->sin6_len != sizeof(*sin6)) {
1004                                 error = EINVAL;
1005                                 goto out;
1006                         }
1007                         if ((inp->inp_vflag & INP_IPV6PROTO) == 0) {
1008                                 error = EAFNOSUPPORT;
1009                                 goto out;
1010                         }
1011                         if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
1012                                 error = EAFNOSUPPORT;
1013                                 goto out;
1014                         }
1015                         if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
1016 #ifdef INET
1017                                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
1018                                         error = EINVAL;
1019                                         goto out;
1020                                 }
1021                                 if ((inp->inp_vflag & INP_IPV4) == 0) {
1022                                         error = EAFNOSUPPORT;
1023                                         goto out;
1024                                 }
1025                                 restoreflags = true;
1026                                 inp->inp_vflag &= ~INP_IPV6;
1027                                 sinp = &sin;
1028                                 in6_sin6_2_sin(sinp, sin6);
1029                                 if (IN_MULTICAST(
1030                                     ntohl(sinp->sin_addr.s_addr))) {
1031                                         error = EAFNOSUPPORT;
1032                                         goto out;
1033                                 }
1034                                 if ((error = prison_remote_ip4(td->td_ucred,
1035                                     &sinp->sin_addr)))
1036                                         goto out;
1037                                 isipv6 = 0;
1038 #else /* !INET */
1039                                 error = EAFNOSUPPORT;
1040                                 goto out;
1041 #endif /* INET */
1042                         } else {
1043                                 if ((inp->inp_vflag & INP_IPV6) == 0) {
1044                                         error = EAFNOSUPPORT;
1045                                         goto out;
1046                                 }
1047                                 restoreflags = true;
1048                                 inp->inp_vflag &= ~INP_IPV4;
1049                                 inp->inp_inc.inc_flags |= INC_ISIPV6;
1050                                 if ((error = prison_remote_ip6(td->td_ucred,
1051                                     &sin6->sin6_addr)))
1052                                         goto out;
1053                                 isipv6 = 1;
1054                         }
1055                         break;
1056                 }
1057 #endif /* INET6 */
1058                 default:
1059                         error = EAFNOSUPPORT;
1060                         goto out;
1061                 }
1062         }
1063         if (!(flags & PRUS_OOB)) {
1064                 if (tp->t_acktime == 0)
1065                         tp->t_acktime = ticks;
1066                 sbappendstream(&so->so_snd, m, flags);
1067                 m = NULL;
1068                 if (nam && tp->t_state < TCPS_SYN_SENT) {
1069                         KASSERT(tp->t_state == TCPS_CLOSED,
1070                             ("%s: tp %p is listening", __func__, tp));
1071
1072                         /*
1073                          * Do implied connect if not yet connected,
1074                          * initialize window to default value, and
1075                          * initialize maxseg using peer's cached MSS.
1076                          */
1077 #ifdef INET6
1078                         if (isipv6)
1079                                 error = tcp6_connect(tp, nam, td);
1080 #endif /* INET6 */
1081 #if defined(INET6) && defined(INET)
1082                         else
1083 #endif
1084 #ifdef INET
1085                                 error = tcp_connect(tp,
1086                                     (struct sockaddr *)sinp, td);
1087 #endif
1088                         /*
1089                          * The bind operation in tcp_connect succeeded. We
1090                          * no longer want to restore the flags if later
1091                          * operations fail.
1092                          */
1093                         if (error == 0 || inp->inp_lport != 0)
1094                                 restoreflags = false;
1095
1096                         if (error) {
1097                                 /* m is freed if PRUS_NOTREADY is unset. */
1098                                 sbflush(&so->so_snd);
1099                                 goto out;
1100                         }
1101                         if (IS_FASTOPEN(tp->t_flags))
1102                                 tcp_fastopen_connect(tp);
1103                         else {
1104                                 tp->snd_wnd = TTCP_CLIENT_SND_WND;
1105                                 tcp_mss(tp, -1);
1106                         }
1107                 }
1108                 if (flags & PRUS_EOF) {
1109                         /*
1110                          * Close the send side of the connection after
1111                          * the data is sent.
1112                          */
1113                         socantsendmore(so);
1114                         tcp_usrclosed(tp);
1115                 }
1116                 if (TCPS_HAVEESTABLISHED(tp->t_state) &&
1117                     ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) &&
1118                     (tp->t_fbyte_out == 0) &&
1119                     (so->so_snd.sb_ccc > 0)) {
1120                         tp->t_fbyte_out = ticks;
1121                         if (tp->t_fbyte_out == 0)
1122                                 tp->t_fbyte_out = 1;
1123                         if (tp->t_fbyte_out && tp->t_fbyte_in)
1124                                 tp->t_flags2 |= TF2_FBYTES_COMPLETE;
1125                 }
1126                 if (!(inp->inp_flags & INP_DROPPED) &&
1127                     !(flags & PRUS_NOTREADY)) {
1128                         if (flags & PRUS_MORETOCOME)
1129                                 tp->t_flags |= TF_MORETOCOME;
1130                         error = tcp_output_nodrop(tp);
1131                         if (flags & PRUS_MORETOCOME)
1132                                 tp->t_flags &= ~TF_MORETOCOME;
1133                 }
1134         } else {
1135                 /*
1136                  * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
1137                  */
1138                 SOCKBUF_LOCK(&so->so_snd);
1139                 if (sbspace(&so->so_snd) < -512) {
1140                         SOCKBUF_UNLOCK(&so->so_snd);
1141                         error = ENOBUFS;
1142                         goto out;
1143                 }
1144                 /*
1145                  * According to RFC961 (Assigned Protocols),
1146                  * the urgent pointer points to the last octet
1147                  * of urgent data.  We continue, however,
1148                  * to consider it to indicate the first octet
1149                  * of data past the urgent section.
1150                  * Otherwise, snd_up should be one lower.
1151                  */
1152                 if (tp->t_acktime == 0)
1153                         tp->t_acktime = ticks;
1154                 sbappendstream_locked(&so->so_snd, m, flags);
1155                 SOCKBUF_UNLOCK(&so->so_snd);
1156                 m = NULL;
1157                 if (nam && tp->t_state < TCPS_SYN_SENT) {
1158                         /*
1159                          * Do implied connect if not yet connected,
1160                          * initialize window to default value, and
1161                          * initialize maxseg using peer's cached MSS.
1162                          */
1163
1164                         /*
1165                          * Not going to contemplate SYN|URG
1166                          */
1167                         if (IS_FASTOPEN(tp->t_flags))
1168                                 tp->t_flags &= ~TF_FASTOPEN;
1169 #ifdef INET6
1170                         if (isipv6)
1171                                 error = tcp6_connect(tp, nam, td);
1172 #endif /* INET6 */
1173 #if defined(INET6) && defined(INET)
1174                         else
1175 #endif
1176 #ifdef INET
1177                                 error = tcp_connect(tp,
1178                                     (struct sockaddr *)sinp, td);
1179 #endif
1180                         /*
1181                          * The bind operation in tcp_connect succeeded. We
1182                          * no longer want to restore the flags if later
1183                          * operations fail.
1184                          */
1185                         if (error == 0 || inp->inp_lport != 0)
1186                                 restoreflags = false;
1187
1188                         if (error != 0) {
1189                                 /* m is freed if PRUS_NOTREADY is unset. */
1190                                 sbflush(&so->so_snd);
1191                                 goto out;
1192                         }
1193                         tp->snd_wnd = TTCP_CLIENT_SND_WND;
1194                         tcp_mss(tp, -1);
1195                 }
1196                 tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
1197                 if ((flags & PRUS_NOTREADY) == 0) {
1198                         tp->t_flags |= TF_FORCEDATA;
1199                         error = tcp_output_nodrop(tp);
1200                         tp->t_flags &= ~TF_FORCEDATA;
1201                 }
1202         }
1203         TCP_LOG_EVENT(tp, NULL,
1204             &inp->inp_socket->so_rcv,
1205             &inp->inp_socket->so_snd,
1206             TCP_LOG_USERSEND, error,
1207             0, NULL, false);
1208
1209 out:
1210         /*
1211          * In case of PRUS_NOTREADY, the caller or tcp_usr_ready() is
1212          * responsible for freeing memory.
1213          */
1214         if (m != NULL && (flags & PRUS_NOTREADY) == 0)
1215                 m_freem(m);
1216
1217         /*
1218          * If the request was unsuccessful and we changed flags,
1219          * restore the original flags.
1220          */
1221         if (error != 0 && restoreflags) {
1222                 inp->inp_vflag = vflagsav;
1223                 inp->inp_inc.inc_flags = incflagsav;
1224         }
1225         TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
1226                   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
1227         TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
1228                    ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
1229         error = tcp_unlock_or_drop(tp, error);
1230         NET_EPOCH_EXIT(et);
1231         return (error);
1232 }
1233
1234 static int
1235 tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
1236 {
1237         struct epoch_tracker et;
1238         struct inpcb *inp;
1239         struct tcpcb *tp;
1240         int error;
1241
1242         inp = sotoinpcb(so);
1243         INP_WLOCK(inp);
1244         if (inp->inp_flags & INP_DROPPED) {
1245                 INP_WUNLOCK(inp);
1246                 mb_free_notready(m, count);
1247                 return (ECONNRESET);
1248         }
1249         tp = intotcpcb(inp);
1250
1251         SOCKBUF_LOCK(&so->so_snd);
1252         error = sbready(&so->so_snd, m, count);
1253         SOCKBUF_UNLOCK(&so->so_snd);
1254         if (error) {
1255                 INP_WUNLOCK(inp);
1256                 return (error);
1257         }
1258         NET_EPOCH_ENTER(et);
1259         error = tcp_output_unlock(tp);
1260         NET_EPOCH_EXIT(et);
1261
1262         return (error);
1263 }
1264
1265 /*
1266  * Abort the TCP.  Drop the connection abruptly.
1267  */
1268 static void
1269 tcp_usr_abort(struct socket *so)
1270 {
1271         struct inpcb *inp;
1272         struct tcpcb *tp = NULL;
1273         struct epoch_tracker et;
1274         TCPDEBUG0;
1275
1276         inp = sotoinpcb(so);
1277         KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
1278
1279         NET_EPOCH_ENTER(et);
1280         INP_WLOCK(inp);
1281         KASSERT(inp->inp_socket != NULL,
1282             ("tcp_usr_abort: inp_socket == NULL"));
1283
1284         /*
1285          * If we still have full TCP state, and we're not dropped, drop.
1286          */
1287         if (!(inp->inp_flags & INP_DROPPED)) {
1288                 tp = intotcpcb(inp);
1289                 TCPDEBUG1();
1290                 tp = tcp_drop(tp, ECONNABORTED);
1291                 if (tp == NULL)
1292                         goto dropped;
1293                 TCPDEBUG2(PRU_ABORT);
1294                 TCP_PROBE2(debug__user, tp, PRU_ABORT);
1295         }
1296         if (!(inp->inp_flags & INP_DROPPED)) {
1297                 soref(so);
1298                 inp->inp_flags |= INP_SOCKREF;
1299         }
1300         INP_WUNLOCK(inp);
1301 dropped:
1302         NET_EPOCH_EXIT(et);
1303 }
1304
1305 /*
1306  * TCP socket is closed.  Start friendly disconnect.
1307  */
1308 static void
1309 tcp_usr_close(struct socket *so)
1310 {
1311         struct inpcb *inp;
1312         struct tcpcb *tp = NULL;
1313         struct epoch_tracker et;
1314         TCPDEBUG0;
1315
1316         inp = sotoinpcb(so);
1317         KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
1318
1319         NET_EPOCH_ENTER(et);
1320         INP_WLOCK(inp);
1321         KASSERT(inp->inp_socket != NULL,
1322             ("tcp_usr_close: inp_socket == NULL"));
1323
1324         /*
1325          * If we still have full TCP state, and we're not dropped, initiate
1326          * a disconnect.
1327          */
1328         if (!(inp->inp_flags & INP_DROPPED)) {
1329                 tp = intotcpcb(inp);
1330                 tp->t_flags |= TF_CLOSED;
1331                 TCPDEBUG1();
1332                 tcp_disconnect(tp);
1333                 TCPDEBUG2(PRU_CLOSE);
1334                 TCP_PROBE2(debug__user, tp, PRU_CLOSE);
1335         }
1336         if (!(inp->inp_flags & INP_DROPPED)) {
1337                 soref(so);
1338                 inp->inp_flags |= INP_SOCKREF;
1339         }
1340         INP_WUNLOCK(inp);
1341         NET_EPOCH_EXIT(et);
1342 }
1343
1344 static int
1345 tcp_pru_options_support(struct tcpcb *tp, int flags)
1346 {
1347         /*
1348          * If the specific TCP stack has a pru_options
1349          * specified then it does not always support
1350          * all the PRU_XX options and we must ask it.
1351          * If the function is not specified then all
1352          * of the PRU_XX options are supported.
1353          */
1354         int ret = 0;
1355
1356         if (tp->t_fb->tfb_pru_options) {
1357                 ret = (*tp->t_fb->tfb_pru_options)(tp, flags);
1358         }
1359         return (ret);
1360 }
1361
1362 /*
1363  * Receive out-of-band data.
1364  */
1365 static int
1366 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
1367 {
1368         int error = 0;
1369         struct inpcb *inp;
1370         struct tcpcb *tp = NULL;
1371
1372         TCPDEBUG0;
1373         inp = sotoinpcb(so);
1374         KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
1375         INP_WLOCK(inp);
1376         if (inp->inp_flags & INP_DROPPED) {
1377                 error = ECONNRESET;
1378                 goto out;
1379         }
1380         tp = intotcpcb(inp);
1381         error = tcp_pru_options_support(tp, PRUS_OOB);
1382         if (error) {
1383                 goto out;
1384         }
1385         TCPDEBUG1();
1386         if ((so->so_oobmark == 0 &&
1387              (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
1388             so->so_options & SO_OOBINLINE ||
1389             tp->t_oobflags & TCPOOB_HADDATA) {
1390                 error = EINVAL;
1391                 goto out;
1392         }
1393         if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
1394                 error = EWOULDBLOCK;
1395                 goto out;
1396         }
1397         m->m_len = 1;
1398         *mtod(m, caddr_t) = tp->t_iobc;
1399         if ((flags & MSG_PEEK) == 0)
1400                 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
1401
1402 out:
1403         TCPDEBUG2(PRU_RCVOOB);
1404         TCP_PROBE2(debug__user, tp, PRU_RCVOOB);
1405         INP_WUNLOCK(inp);
1406         return (error);
1407 }
1408
1409 #ifdef INET
1410 struct protosw tcp_protosw = {
1411         .pr_type =              SOCK_STREAM,
1412         .pr_protocol =          IPPROTO_TCP,
1413         .pr_flags =             PR_CONNREQUIRED | PR_IMPLOPCL | PR_WANTRCVD |
1414                                     PR_CAPATTACH,
1415         .pr_ctloutput =         tcp_ctloutput,
1416         .pr_abort =             tcp_usr_abort,
1417         .pr_accept =            tcp_usr_accept,
1418         .pr_attach =            tcp_usr_attach,
1419         .pr_bind =              tcp_usr_bind,
1420         .pr_connect =           tcp_usr_connect,
1421         .pr_control =           in_control,
1422         .pr_detach =            tcp_usr_detach,
1423         .pr_disconnect =        tcp_usr_disconnect,
1424         .pr_listen =            tcp_usr_listen,
1425         .pr_peeraddr =          in_getpeeraddr,
1426         .pr_rcvd =              tcp_usr_rcvd,
1427         .pr_rcvoob =            tcp_usr_rcvoob,
1428         .pr_send =              tcp_usr_send,
1429         .pr_ready =             tcp_usr_ready,
1430         .pr_shutdown =          tcp_usr_shutdown,
1431         .pr_sockaddr =          in_getsockaddr,
1432         .pr_sosetlabel =        in_pcbsosetlabel,
1433         .pr_close =             tcp_usr_close,
1434 };
1435 #endif /* INET */
1436
1437 #ifdef INET6
1438 struct protosw tcp6_protosw = {
1439         .pr_type =              SOCK_STREAM,
1440         .pr_protocol =          IPPROTO_TCP,
1441         .pr_flags =             PR_CONNREQUIRED | PR_IMPLOPCL |PR_WANTRCVD |
1442                                     PR_CAPATTACH,
1443         .pr_ctloutput =         tcp_ctloutput,
1444         .pr_abort =             tcp_usr_abort,
1445         .pr_accept =            tcp6_usr_accept,
1446         .pr_attach =            tcp_usr_attach,
1447         .pr_bind =              tcp6_usr_bind,
1448         .pr_connect =           tcp6_usr_connect,
1449         .pr_control =           in6_control,
1450         .pr_detach =            tcp_usr_detach,
1451         .pr_disconnect =        tcp_usr_disconnect,
1452         .pr_listen =            tcp6_usr_listen,
1453         .pr_peeraddr =          in6_mapped_peeraddr,
1454         .pr_rcvd =              tcp_usr_rcvd,
1455         .pr_rcvoob =            tcp_usr_rcvoob,
1456         .pr_send =              tcp_usr_send,
1457         .pr_ready =             tcp_usr_ready,
1458         .pr_shutdown =          tcp_usr_shutdown,
1459         .pr_sockaddr =          in6_mapped_sockaddr,
1460         .pr_sosetlabel =        in_pcbsosetlabel,
1461         .pr_close =             tcp_usr_close,
1462 };
1463 #endif /* INET6 */
1464
1465 #ifdef INET
1466 /*
1467  * Common subroutine to open a TCP connection to remote host specified
1468  * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
1469  * port number if needed.  Call in_pcbconnect_setup to do the routing and
1470  * to choose a local host address (interface).  If there is an existing
1471  * incarnation of the same connection in TIME-WAIT state and if the remote
1472  * host was sending CC options and if the connection duration was < MSL, then
1473  * truncate the previous TIME-WAIT state and proceed.
1474  * Initialize connection parameters and enter SYN-SENT state.
1475  */
1476 static int
1477 tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
1478 {
1479         struct inpcb *inp = tptoinpcb(tp), *oinp;
1480         struct socket *so = tptosocket(tp);
1481         struct in_addr laddr;
1482         u_short lport;
1483         int error;
1484
1485         NET_EPOCH_ASSERT();
1486         INP_WLOCK_ASSERT(inp);
1487         INP_HASH_WLOCK(&V_tcbinfo);
1488
1489         if (V_tcp_require_unique_port && inp->inp_lport == 0) {
1490                 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
1491                 if (error)
1492                         goto out;
1493         }
1494
1495         /*
1496          * Cannot simply call in_pcbconnect, because there might be an
1497          * earlier incarnation of this same connection still in
1498          * TIME_WAIT state, creating an ADDRINUSE error.
1499          */
1500         laddr = inp->inp_laddr;
1501         lport = inp->inp_lport;
1502         error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
1503             &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
1504         if (error && oinp == NULL)
1505                 goto out;
1506         if (oinp) {
1507                 error = EADDRINUSE;
1508                 goto out;
1509         }
1510         /* Handle initial bind if it hadn't been done in advance. */
1511         if (inp->inp_lport == 0) {
1512                 inp->inp_lport = lport;
1513                 if (in_pcbinshash(inp) != 0) {
1514                         inp->inp_lport = 0;
1515                         error = EAGAIN;
1516                         goto out;
1517                 }
1518         }
1519         inp->inp_laddr = laddr;
1520         in_pcbrehash(inp);
1521         INP_HASH_WUNLOCK(&V_tcbinfo);
1522
1523         /*
1524          * Compute window scaling to request:
1525          * Scale to fit into sweet spot.  See tcp_syncache.c.
1526          * XXX: This should move to tcp_output().
1527          */
1528         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
1529             (TCP_MAXWIN << tp->request_r_scale) < sb_max)
1530                 tp->request_r_scale++;
1531
1532         soisconnecting(so);
1533         TCPSTAT_INC(tcps_connattempt);
1534         tcp_state_change(tp, TCPS_SYN_SENT);
1535         tp->iss = tcp_new_isn(&inp->inp_inc);
1536         if (tp->t_flags & TF_REQ_TSTMP)
1537                 tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
1538         tcp_sendseqinit(tp);
1539
1540         return 0;
1541
1542 out:
1543         INP_HASH_WUNLOCK(&V_tcbinfo);
1544         return (error);
1545 }
1546 #endif /* INET */
1547
1548 #ifdef INET6
1549 static int
1550 tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
1551 {
1552         struct inpcb *inp = tptoinpcb(tp);
1553         int error;
1554
1555         INP_WLOCK_ASSERT(inp);
1556         INP_HASH_WLOCK(&V_tcbinfo);
1557
1558         if (V_tcp_require_unique_port && inp->inp_lport == 0) {
1559                 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
1560                 if (error)
1561                         goto out;
1562         }
1563         error = in6_pcbconnect(inp, nam, td->td_ucred);
1564         if (error != 0)
1565                 goto out;
1566         INP_HASH_WUNLOCK(&V_tcbinfo);
1567
1568         /* Compute window scaling to request.  */
1569         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
1570             (TCP_MAXWIN << tp->request_r_scale) < sb_max)
1571                 tp->request_r_scale++;
1572
1573         soisconnecting(inp->inp_socket);
1574         TCPSTAT_INC(tcps_connattempt);
1575         tcp_state_change(tp, TCPS_SYN_SENT);
1576         tp->iss = tcp_new_isn(&inp->inp_inc);
1577         if (tp->t_flags & TF_REQ_TSTMP)
1578                 tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
1579         tcp_sendseqinit(tp);
1580
1581         return 0;
1582
1583 out:
1584         INP_HASH_WUNLOCK(&V_tcbinfo);
1585         return error;
1586 }
1587 #endif /* INET6 */
1588
1589 /*
1590  * Export TCP internal state information via a struct tcp_info, based on the
1591  * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
1592  * (TCP state machine, etc).  We export all information using FreeBSD-native
1593  * constants -- for example, the numeric values for tcpi_state will differ
1594  * from Linux.
1595  */
1596 static void
1597 tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
1598 {
1599
1600         INP_WLOCK_ASSERT(tptoinpcb(tp));
1601         bzero(ti, sizeof(*ti));
1602
1603         ti->tcpi_state = tp->t_state;
1604         if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
1605                 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
1606         if (tp->t_flags & TF_SACK_PERMIT)
1607                 ti->tcpi_options |= TCPI_OPT_SACK;
1608         if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
1609                 ti->tcpi_options |= TCPI_OPT_WSCALE;
1610                 ti->tcpi_snd_wscale = tp->snd_scale;
1611                 ti->tcpi_rcv_wscale = tp->rcv_scale;
1612         }
1613         if (tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT))
1614                 ti->tcpi_options |= TCPI_OPT_ECN;
1615
1616         ti->tcpi_rto = tp->t_rxtcur * tick;
1617         ti->tcpi_last_data_recv = ((uint32_t)ticks - tp->t_rcvtime) * tick;
1618         ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
1619         ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
1620
1621         ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
1622         ti->tcpi_snd_cwnd = tp->snd_cwnd;
1623
1624         /*
1625          * FreeBSD-specific extension fields for tcp_info.
1626          */
1627         ti->tcpi_rcv_space = tp->rcv_wnd;
1628         ti->tcpi_rcv_nxt = tp->rcv_nxt;
1629         ti->tcpi_snd_wnd = tp->snd_wnd;
1630         ti->tcpi_snd_bwnd = 0;          /* Unused, kept for compat. */
1631         ti->tcpi_snd_nxt = tp->snd_nxt;
1632         ti->tcpi_snd_mss = tp->t_maxseg;
1633         ti->tcpi_rcv_mss = tp->t_maxseg;
1634         ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
1635         ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
1636         ti->tcpi_snd_zerowin = tp->t_sndzerowin;
1637 #ifdef TCP_OFFLOAD
1638         if (tp->t_flags & TF_TOE) {
1639                 ti->tcpi_options |= TCPI_OPT_TOE;
1640                 tcp_offload_tcp_info(tp, ti);
1641         }
1642 #endif
1643         /*
1644          * AccECN related counters.
1645          */
1646         if ((tp->t_flags2 & (TF2_ECN_PERMIT | TF2_ACE_PERMIT)) ==
1647             (TF2_ECN_PERMIT | TF2_ACE_PERMIT))
1648                 /*
1649                  * Internal counter starts at 5 for AccECN
1650                  * but 0 for RFC3168 ECN.
1651                  */
1652                 ti->tcpi_delivered_ce = tp->t_scep - 5;
1653         else
1654                 ti->tcpi_delivered_ce = tp->t_scep;
1655         ti->tcpi_received_ce = tp->t_rcep;
1656 }
1657
1658 /*
1659  * tcp_ctloutput() must drop the inpcb lock before performing copyin on
1660  * socket option arguments.  When it re-acquires the lock after the copy, it
1661  * has to revalidate that the connection is still valid for the socket
1662  * option.
1663  */
1664 #define INP_WLOCK_RECHECK_CLEANUP(inp, cleanup) do {                    \
1665         INP_WLOCK(inp);                                                 \
1666         if (inp->inp_flags & INP_DROPPED) {                             \
1667                 INP_WUNLOCK(inp);                                       \
1668                 cleanup;                                                \
1669                 return (ECONNRESET);                                    \
1670         }                                                               \
1671         tp = intotcpcb(inp);                                            \
1672 } while(0)
1673 #define INP_WLOCK_RECHECK(inp) INP_WLOCK_RECHECK_CLEANUP((inp), /* noop */)
1674
1675 int
1676 tcp_ctloutput_set(struct inpcb *inp, struct sockopt *sopt)
1677 {
1678         struct socket *so = inp->inp_socket;
1679         struct tcpcb *tp = intotcpcb(inp);
1680         int error = 0;
1681
1682         MPASS(sopt->sopt_dir == SOPT_SET);
1683         INP_WLOCK_ASSERT(inp);
1684         KASSERT((inp->inp_flags & INP_DROPPED) == 0,
1685             ("inp_flags == %x", inp->inp_flags));
1686         KASSERT(so != NULL, ("inp_socket == NULL"));
1687
1688         if (sopt->sopt_level != IPPROTO_TCP) {
1689                 INP_WUNLOCK(inp);
1690 #ifdef INET6
1691                 if (inp->inp_vflag & INP_IPV6PROTO)
1692                         error = ip6_ctloutput(so, sopt);
1693 #endif
1694 #if defined(INET6) && defined(INET)
1695                 else
1696 #endif
1697 #ifdef INET
1698                         error = ip_ctloutput(so, sopt);
1699 #endif
1700                 /*
1701                  * When an IP-level socket option affects TCP, pass control
1702                  * down to stack tfb_tcp_ctloutput, otherwise return what
1703                  * IP level returned.
1704                  */
1705                 switch (sopt->sopt_level) {
1706 #ifdef INET6
1707                 case IPPROTO_IPV6:
1708                         if ((inp->inp_vflag & INP_IPV6PROTO) == 0)
1709                                 return (error);
1710                         switch (sopt->sopt_name) {
1711                         case IPV6_TCLASS:
1712                                 /* Notify tcp stacks that care (e.g. RACK). */
1713                                 break;
1714                         case IPV6_USE_MIN_MTU:
1715                                 /* Update t_maxseg accordingly. */
1716                                 break;
1717                         default:
1718                                 return (error);
1719                         }
1720                         break;
1721 #endif
1722 #ifdef INET
1723                 case IPPROTO_IP:
1724                         switch (sopt->sopt_name) {
1725                         case IP_TOS:
1726                                 inp->inp_ip_tos &= ~IPTOS_ECN_MASK;
1727                                 break;
1728                         case IP_TTL:
1729                                 /* Notify tcp stacks that care (e.g. RACK). */
1730                                 break;
1731                         default:
1732                                 return (error);
1733                         }
1734                         break;
1735 #endif
1736                 default:
1737                         return (error);
1738                 }
1739                 INP_WLOCK(inp);
1740                 if (inp->inp_flags & INP_DROPPED) {
1741                         INP_WUNLOCK(inp);
1742                         return (ECONNRESET);
1743                 }
1744         } else if (sopt->sopt_name == TCP_FUNCTION_BLK) {
1745                 /*
1746                  * Protect the TCP option TCP_FUNCTION_BLK so
1747                  * that a sub-function can *never* overwrite this.
1748                  */
1749                 struct tcp_function_set fsn;
1750                 struct tcp_function_block *blk;
1751
1752                 INP_WUNLOCK(inp);
1753                 error = sooptcopyin(sopt, &fsn, sizeof fsn, sizeof fsn);
1754                 if (error)
1755                         return (error);
1756
1757                 INP_WLOCK(inp);
1758                 if (inp->inp_flags & INP_DROPPED) {
1759                         INP_WUNLOCK(inp);
1760                         return (ECONNRESET);
1761                 }
1762                 tp = intotcpcb(inp);
1763
1764                 blk = find_and_ref_tcp_functions(&fsn);
1765                 if (blk == NULL) {
1766                         INP_WUNLOCK(inp);
1767                         return (ENOENT);
1768                 }
1769                 if (tp->t_fb == blk) {
1770                         /* You already have this */
1771                         refcount_release(&blk->tfb_refcnt);
1772                         INP_WUNLOCK(inp);
1773                         return (0);
1774                 }
1775                 if (tp->t_state != TCPS_CLOSED) {
1776                         /*
1777                          * The user has advanced the state
1778                          * past the initial point, we may not
1779                          * be able to switch.
1780                          */
1781                         if (blk->tfb_tcp_handoff_ok != NULL) {
1782                                 /*
1783                                  * Does the stack provide a
1784                                  * query mechanism, if so it may
1785                                  * still be possible?
1786                                  */
1787                                 error = (*blk->tfb_tcp_handoff_ok)(tp);
1788                         } else
1789                                 error = EINVAL;
1790                         if (error) {
1791                                 refcount_release(&blk->tfb_refcnt);
1792                                 INP_WUNLOCK(inp);
1793                                 return(error);
1794                         }
1795                 }
1796                 if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
1797                         refcount_release(&blk->tfb_refcnt);
1798                         INP_WUNLOCK(inp);
1799                         return (ENOENT);
1800                 }
1801                 /*
1802                  * Release the old refcnt, the
1803                  * lookup acquired a ref on the
1804                  * new one already.
1805                  */
1806                 if (tp->t_fb->tfb_tcp_fb_fini) {
1807                         struct epoch_tracker et;
1808                         /*
1809                          * Tell the stack to cleanup with 0 i.e.
1810                          * the tcb is not going away.
1811                          */
1812                         NET_EPOCH_ENTER(et);
1813                         (*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
1814                         NET_EPOCH_EXIT(et);
1815                 }
1816 #ifdef TCPHPTS
1817                 /* Assure that we are not on any hpts */
1818                 tcp_hpts_remove(tptoinpcb(tp));
1819 #endif
1820                 if (blk->tfb_tcp_fb_init) {
1821                         error = (*blk->tfb_tcp_fb_init)(tp);
1822                         if (error) {
1823                                 refcount_release(&blk->tfb_refcnt);
1824                                 if (tp->t_fb->tfb_tcp_fb_init) {
1825                                         if((*tp->t_fb->tfb_tcp_fb_init)(tp) != 0)  {
1826                                                 /* Fall back failed, drop the connection */
1827                                                 INP_WUNLOCK(inp);
1828                                                 soabort(so);
1829                                                 return (error);
1830                                         }
1831                                 }
1832                                 goto err_out;
1833                         }
1834                 }
1835                 refcount_release(&tp->t_fb->tfb_refcnt);
1836                 tp->t_fb = blk;
1837 #ifdef TCP_OFFLOAD
1838                 if (tp->t_flags & TF_TOE) {
1839                         tcp_offload_ctloutput(tp, sopt->sopt_dir,
1840                              sopt->sopt_name);
1841                 }
1842 #endif
1843 err_out:
1844                 INP_WUNLOCK(inp);
1845                 return (error);
1846         }
1847
1848         /* Pass in the INP locked, callee must unlock it. */
1849         return (tp->t_fb->tfb_tcp_ctloutput(inp, sopt));
1850 }
1851
1852 static int
1853 tcp_ctloutput_get(struct inpcb *inp, struct sockopt *sopt)
1854 {
1855         struct socket *so = inp->inp_socket;
1856         struct tcpcb *tp = intotcpcb(inp);
1857         int error = 0;
1858
1859         MPASS(sopt->sopt_dir == SOPT_GET);
1860         INP_WLOCK_ASSERT(inp);
1861         KASSERT((inp->inp_flags & INP_DROPPED) == 0,
1862             ("inp_flags == %x", inp->inp_flags));
1863         KASSERT(so != NULL, ("inp_socket == NULL"));
1864
1865         if (sopt->sopt_level != IPPROTO_TCP) {
1866                 INP_WUNLOCK(inp);
1867 #ifdef INET6
1868                 if (inp->inp_vflag & INP_IPV6PROTO)
1869                         error = ip6_ctloutput(so, sopt);
1870 #endif /* INET6 */
1871 #if defined(INET6) && defined(INET)
1872                 else
1873 #endif
1874 #ifdef INET
1875                         error = ip_ctloutput(so, sopt);
1876 #endif
1877                 return (error);
1878         }
1879         if (((sopt->sopt_name == TCP_FUNCTION_BLK) ||
1880              (sopt->sopt_name == TCP_FUNCTION_ALIAS))) {
1881                 struct tcp_function_set fsn;
1882
1883                 if (sopt->sopt_name == TCP_FUNCTION_ALIAS) {
1884                         memset(&fsn, 0, sizeof(fsn));
1885                         find_tcp_function_alias(tp->t_fb, &fsn);
1886                 } else {
1887                         strncpy(fsn.function_set_name,
1888                             tp->t_fb->tfb_tcp_block_name,
1889                             TCP_FUNCTION_NAME_LEN_MAX);
1890                         fsn.function_set_name[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0';
1891                 }
1892                 fsn.pcbcnt = tp->t_fb->tfb_refcnt;
1893                 INP_WUNLOCK(inp);
1894                 error = sooptcopyout(sopt, &fsn, sizeof fsn);
1895                 return (error);
1896         }
1897
1898         /* Pass in the INP locked, callee must unlock it. */
1899         return (tp->t_fb->tfb_tcp_ctloutput(inp, sopt));
1900 }
1901
1902 int
1903 tcp_ctloutput(struct socket *so, struct sockopt *sopt)
1904 {
1905         struct  inpcb *inp;
1906
1907         inp = sotoinpcb(so);
1908         KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
1909
1910         INP_WLOCK(inp);
1911         if (inp->inp_flags & INP_DROPPED) {
1912                 INP_WUNLOCK(inp);
1913                 return (ECONNRESET);
1914         }
1915         if (sopt->sopt_dir == SOPT_SET)
1916                 return (tcp_ctloutput_set(inp, sopt));
1917         else if (sopt->sopt_dir == SOPT_GET)
1918                 return (tcp_ctloutput_get(inp, sopt));
1919         else
1920                 panic("%s: sopt_dir $%d", __func__, sopt->sopt_dir);
1921 }
1922
1923 /*
1924  * If this assert becomes untrue, we need to change the size of the buf
1925  * variable in tcp_default_ctloutput().
1926  */
1927 #ifdef CTASSERT
1928 CTASSERT(TCP_CA_NAME_MAX <= TCP_LOG_ID_LEN);
1929 CTASSERT(TCP_LOG_REASON_LEN <= TCP_LOG_ID_LEN);
1930 #endif
1931
1932 #ifdef KERN_TLS
1933 static int
1934 copyin_tls_enable(struct sockopt *sopt, struct tls_enable *tls)
1935 {
1936         struct tls_enable_v0 tls_v0;
1937         int error;
1938
1939         if (sopt->sopt_valsize == sizeof(tls_v0)) {
1940                 error = sooptcopyin(sopt, &tls_v0, sizeof(tls_v0),
1941                     sizeof(tls_v0));
1942                 if (error)
1943                         return (error);
1944                 memset(tls, 0, sizeof(*tls));
1945                 tls->cipher_key = tls_v0.cipher_key;
1946                 tls->iv = tls_v0.iv;
1947                 tls->auth_key = tls_v0.auth_key;
1948                 tls->cipher_algorithm = tls_v0.cipher_algorithm;
1949                 tls->cipher_key_len = tls_v0.cipher_key_len;
1950                 tls->iv_len = tls_v0.iv_len;
1951                 tls->auth_algorithm = tls_v0.auth_algorithm;
1952                 tls->auth_key_len = tls_v0.auth_key_len;
1953                 tls->flags = tls_v0.flags;
1954                 tls->tls_vmajor = tls_v0.tls_vmajor;
1955                 tls->tls_vminor = tls_v0.tls_vminor;
1956                 return (0);
1957         }
1958
1959         return (sooptcopyin(sopt, tls, sizeof(*tls), sizeof(*tls)));
1960 }
1961 #endif
1962
1963 extern struct cc_algo newreno_cc_algo;
1964
1965 static int
1966 tcp_set_cc_mod(struct inpcb *inp, struct sockopt *sopt)
1967 {
1968         struct cc_algo *algo;
1969         void *ptr = NULL;
1970         struct tcpcb *tp;
1971         struct cc_var cc_mem;
1972         char    buf[TCP_CA_NAME_MAX];
1973         size_t mem_sz;
1974         int error;
1975
1976         INP_WUNLOCK(inp);
1977         error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1);
1978         if (error)
1979                 return(error);
1980         buf[sopt->sopt_valsize] = '\0';
1981         CC_LIST_RLOCK();
1982         STAILQ_FOREACH(algo, &cc_list, entries) {
1983                 if (strncmp(buf, algo->name,
1984                             TCP_CA_NAME_MAX) == 0) {
1985                         if (algo->flags & CC_MODULE_BEING_REMOVED) {
1986                                 /* We can't "see" modules being unloaded */
1987                                 continue;
1988                         }
1989                         break;
1990                 }
1991         }
1992         if (algo == NULL) {
1993                 CC_LIST_RUNLOCK();
1994                 return(ESRCH);
1995         }
1996         /* 
1997          * With a reference the algorithm cannot be removed
1998          * so we hold a reference through the change process.
1999          */
2000         cc_refer(algo);
2001         CC_LIST_RUNLOCK();
2002         if (algo->cb_init != NULL) {
2003                 /* We can now pre-get the memory for the CC */
2004                 mem_sz = (*algo->cc_data_sz)();
2005                 if (mem_sz == 0) {
2006                         goto no_mem_needed;
2007                 }
2008                 ptr = malloc(mem_sz, M_CC_MEM, M_WAITOK);
2009         } else {
2010 no_mem_needed:
2011                 mem_sz = 0;
2012                 ptr = NULL;
2013         }
2014         /*
2015          * Make sure its all clean and zero and also get
2016          * back the inplock.
2017          */
2018         memset(&cc_mem, 0, sizeof(cc_mem));
2019         INP_WLOCK(inp);
2020         if (inp->inp_flags & INP_DROPPED) {
2021                 INP_WUNLOCK(inp);
2022                 if (ptr)
2023                         free(ptr, M_CC_MEM);
2024                 /* Release our temp reference */
2025                 CC_LIST_RLOCK();
2026                 cc_release(algo);
2027                 CC_LIST_RUNLOCK();
2028                 return (ECONNRESET);
2029         }
2030         tp = intotcpcb(inp);
2031         if (ptr != NULL)
2032                 memset(ptr, 0, mem_sz);
2033         cc_mem.ccvc.tcp = tp;
2034         /*
2035          * We once again hold a write lock over the tcb so it's
2036          * safe to do these things without ordering concerns.
2037          * Note here we init into stack memory.
2038          */
2039         if (algo->cb_init != NULL)
2040                 error = algo->cb_init(&cc_mem, ptr);
2041         else
2042                 error = 0;
2043         /*
2044          * The CC algorithms, when given their memory
2045          * should not fail we could in theory have a
2046          * KASSERT here.
2047          */
2048         if (error == 0) {
2049                 /*
2050                  * Touchdown, lets go ahead and move the
2051                  * connection to the new CC module by
2052                  * copying in the cc_mem after we call
2053                  * the old ones cleanup (if any).
2054                  */
2055                 if (CC_ALGO(tp)->cb_destroy != NULL)
2056                         CC_ALGO(tp)->cb_destroy(tp->ccv);
2057                 /* Detach the old CC from the tcpcb  */
2058                 cc_detach(tp);
2059                 /* Copy in our temp memory that was inited */
2060                 memcpy(tp->ccv, &cc_mem, sizeof(struct cc_var));
2061                 /* Now attach the new, which takes a reference */
2062                 cc_attach(tp, algo);
2063                 /* Ok now are we where we have gotten past any conn_init? */
2064                 if (TCPS_HAVEESTABLISHED(tp->t_state) && (CC_ALGO(tp)->conn_init != NULL)) {
2065                         /* Yep run the connection init for the new CC */
2066                         CC_ALGO(tp)->conn_init(tp->ccv);
2067                 }
2068         } else if (ptr)
2069                 free(ptr, M_CC_MEM);
2070         INP_WUNLOCK(inp);
2071         /* Now lets release our temp reference */
2072         CC_LIST_RLOCK();
2073         cc_release(algo);
2074         CC_LIST_RUNLOCK();
2075         return (error);
2076 }
2077
2078 int
2079 tcp_default_ctloutput(struct inpcb *inp, struct sockopt *sopt)
2080 {
2081         struct tcpcb *tp = intotcpcb(inp);
2082         int     error, opt, optval;
2083         u_int   ui;
2084         struct  tcp_info ti;
2085 #ifdef KERN_TLS
2086         struct tls_enable tls;
2087         struct socket *so = inp->inp_socket;
2088 #endif
2089         char    *pbuf, buf[TCP_LOG_ID_LEN];
2090 #ifdef STATS
2091         struct statsblob *sbp;
2092 #endif
2093         size_t  len;
2094
2095         INP_WLOCK_ASSERT(inp);
2096         KASSERT((inp->inp_flags & INP_DROPPED) == 0,
2097             ("inp_flags == %x", inp->inp_flags));
2098         KASSERT(inp->inp_socket != NULL, ("inp_socket == NULL"));
2099
2100         switch (sopt->sopt_level) {
2101 #ifdef INET6
2102         case IPPROTO_IPV6:
2103                 MPASS(inp->inp_vflag & INP_IPV6PROTO);
2104                 switch (sopt->sopt_name) {
2105                 case IPV6_USE_MIN_MTU:
2106                         tcp6_use_min_mtu(tp);
2107                         /* FALLTHROUGH */
2108                 }
2109                 INP_WUNLOCK(inp);
2110                 return (0);
2111 #endif
2112 #ifdef INET
2113         case IPPROTO_IP:
2114                 INP_WUNLOCK(inp);
2115                 return (0);
2116 #endif
2117         }
2118
2119         /*
2120          * For TCP_CCALGOOPT forward the control to CC module, for both
2121          * SOPT_SET and SOPT_GET.
2122          */
2123         switch (sopt->sopt_name) {
2124         case TCP_CCALGOOPT:
2125                 INP_WUNLOCK(inp);
2126                 if (sopt->sopt_valsize > CC_ALGOOPT_LIMIT)
2127                         return (EINVAL);
2128                 pbuf = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO);
2129                 error = sooptcopyin(sopt, pbuf, sopt->sopt_valsize,
2130                     sopt->sopt_valsize);
2131                 if (error) {
2132                         free(pbuf, M_TEMP);
2133                         return (error);
2134                 }
2135                 INP_WLOCK_RECHECK_CLEANUP(inp, free(pbuf, M_TEMP));
2136                 if (CC_ALGO(tp)->ctl_output != NULL)
2137                         error = CC_ALGO(tp)->ctl_output(tp->ccv, sopt, pbuf);
2138                 else
2139                         error = ENOENT;
2140                 INP_WUNLOCK(inp);
2141                 if (error == 0 && sopt->sopt_dir == SOPT_GET)
2142                         error = sooptcopyout(sopt, pbuf, sopt->sopt_valsize);
2143                 free(pbuf, M_TEMP);
2144                 return (error);
2145         }
2146
2147         switch (sopt->sopt_dir) {
2148         case SOPT_SET:
2149                 switch (sopt->sopt_name) {
2150 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
2151                 case TCP_MD5SIG:
2152                         INP_WUNLOCK(inp);
2153                         if (!TCPMD5_ENABLED())
2154                                 return (ENOPROTOOPT);
2155                         error = TCPMD5_PCBCTL(inp, sopt);
2156                         if (error)
2157                                 return (error);
2158                         INP_WLOCK_RECHECK(inp);
2159                         goto unlock_and_done;
2160 #endif /* IPSEC */
2161
2162                 case TCP_NODELAY:
2163                 case TCP_NOOPT:
2164                 case TCP_LRD:
2165                         INP_WUNLOCK(inp);
2166                         error = sooptcopyin(sopt, &optval, sizeof optval,
2167                             sizeof optval);
2168                         if (error)
2169                                 return (error);
2170
2171                         INP_WLOCK_RECHECK(inp);
2172                         switch (sopt->sopt_name) {
2173                         case TCP_NODELAY:
2174                                 opt = TF_NODELAY;
2175                                 break;
2176                         case TCP_NOOPT:
2177                                 opt = TF_NOOPT;
2178                                 break;
2179                         case TCP_LRD:
2180                                 opt = TF_LRD;
2181                                 break;
2182                         default:
2183                                 opt = 0; /* dead code to fool gcc */
2184                                 break;
2185                         }
2186
2187                         if (optval)
2188                                 tp->t_flags |= opt;
2189                         else
2190                                 tp->t_flags &= ~opt;
2191 unlock_and_done:
2192 #ifdef TCP_OFFLOAD
2193                         if (tp->t_flags & TF_TOE) {
2194                                 tcp_offload_ctloutput(tp, sopt->sopt_dir,
2195                                     sopt->sopt_name);
2196                         }
2197 #endif
2198                         INP_WUNLOCK(inp);
2199                         break;
2200
2201                 case TCP_NOPUSH:
2202                         INP_WUNLOCK(inp);
2203                         error = sooptcopyin(sopt, &optval, sizeof optval,
2204                             sizeof optval);
2205                         if (error)
2206                                 return (error);
2207
2208                         INP_WLOCK_RECHECK(inp);
2209                         if (optval)
2210                                 tp->t_flags |= TF_NOPUSH;
2211                         else if (tp->t_flags & TF_NOPUSH) {
2212                                 tp->t_flags &= ~TF_NOPUSH;
2213                                 if (TCPS_HAVEESTABLISHED(tp->t_state)) {
2214                                         struct epoch_tracker et;
2215
2216                                         NET_EPOCH_ENTER(et);
2217                                         error = tcp_output_nodrop(tp);
2218                                         NET_EPOCH_EXIT(et);
2219                                 }
2220                         }
2221                         goto unlock_and_done;
2222
2223                 case TCP_REMOTE_UDP_ENCAPS_PORT:
2224                         INP_WUNLOCK(inp);
2225                         error = sooptcopyin(sopt, &optval, sizeof optval,
2226                             sizeof optval);
2227                         if (error)
2228                                 return (error);
2229                         if ((optval < TCP_TUNNELING_PORT_MIN) ||
2230                             (optval > TCP_TUNNELING_PORT_MAX)) {
2231                                 /* Its got to be in range */
2232                                 return (EINVAL);
2233                         }
2234                         if ((V_tcp_udp_tunneling_port == 0) && (optval != 0)) {
2235                                 /* You have to have enabled a UDP tunneling port first */
2236                                 return (EINVAL);
2237                         }
2238                         INP_WLOCK_RECHECK(inp);
2239                         if (tp->t_state != TCPS_CLOSED) {
2240                                 /* You can't change after you are connected */
2241                                 error = EINVAL;
2242                         } else {
2243                                 /* Ok we are all good set the port */
2244                                 tp->t_port = htons(optval);
2245                         }
2246                         goto unlock_and_done;
2247
2248                 case TCP_MAXSEG:
2249                         INP_WUNLOCK(inp);
2250                         error = sooptcopyin(sopt, &optval, sizeof optval,
2251                             sizeof optval);
2252                         if (error)
2253                                 return (error);
2254
2255                         INP_WLOCK_RECHECK(inp);
2256                         if (optval > 0 && optval <= tp->t_maxseg &&
2257                             optval + 40 >= V_tcp_minmss)
2258                                 tp->t_maxseg = optval;
2259                         else
2260                                 error = EINVAL;
2261                         goto unlock_and_done;
2262
2263                 case TCP_INFO:
2264                         INP_WUNLOCK(inp);
2265                         error = EINVAL;
2266                         break;
2267
2268                 case TCP_STATS:
2269                         INP_WUNLOCK(inp);
2270 #ifdef STATS
2271                         error = sooptcopyin(sopt, &optval, sizeof optval,
2272                             sizeof optval);
2273                         if (error)
2274                                 return (error);
2275
2276                         if (optval > 0)
2277                                 sbp = stats_blob_alloc(
2278                                     V_tcp_perconn_stats_dflt_tpl, 0);
2279                         else
2280                                 sbp = NULL;
2281
2282                         INP_WLOCK_RECHECK(inp);
2283                         if ((tp->t_stats != NULL && sbp == NULL) ||
2284                             (tp->t_stats == NULL && sbp != NULL)) {
2285                                 struct statsblob *t = tp->t_stats;
2286                                 tp->t_stats = sbp;
2287                                 sbp = t;
2288                         }
2289                         INP_WUNLOCK(inp);
2290
2291                         stats_blob_destroy(sbp);
2292 #else
2293                         return (EOPNOTSUPP);
2294 #endif /* !STATS */
2295                         break;
2296
2297                 case TCP_CONGESTION:
2298                         error = tcp_set_cc_mod(inp, sopt);
2299                         break;
2300
2301                 case TCP_REUSPORT_LB_NUMA:
2302                         INP_WUNLOCK(inp);
2303                         error = sooptcopyin(sopt, &optval, sizeof(optval),
2304                             sizeof(optval));
2305                         INP_WLOCK_RECHECK(inp);
2306                         if (!error)
2307                                 error = in_pcblbgroup_numa(inp, optval);
2308                         INP_WUNLOCK(inp);
2309                         break;
2310
2311 #ifdef KERN_TLS
2312                 case TCP_TXTLS_ENABLE:
2313                         INP_WUNLOCK(inp);
2314                         error = copyin_tls_enable(sopt, &tls);
2315                         if (error)
2316                                 break;
2317                         error = ktls_enable_tx(so, &tls);
2318                         break;
2319                 case TCP_TXTLS_MODE:
2320                         INP_WUNLOCK(inp);
2321                         error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
2322                         if (error)
2323                                 return (error);
2324
2325                         INP_WLOCK_RECHECK(inp);
2326                         error = ktls_set_tx_mode(so, ui);
2327                         INP_WUNLOCK(inp);
2328                         break;
2329                 case TCP_RXTLS_ENABLE:
2330                         INP_WUNLOCK(inp);
2331                         error = sooptcopyin(sopt, &tls, sizeof(tls),
2332                             sizeof(tls));
2333                         if (error)
2334                                 break;
2335                         error = ktls_enable_rx(so, &tls);
2336                         break;
2337 #endif
2338                 case TCP_MAXUNACKTIME:
2339                 case TCP_KEEPIDLE:
2340                 case TCP_KEEPINTVL:
2341                 case TCP_KEEPINIT:
2342                         INP_WUNLOCK(inp);
2343                         error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
2344                         if (error)
2345                                 return (error);
2346
2347                         if (ui > (UINT_MAX / hz)) {
2348                                 error = EINVAL;
2349                                 break;
2350                         }
2351                         ui *= hz;
2352
2353                         INP_WLOCK_RECHECK(inp);
2354                         switch (sopt->sopt_name) {
2355                         case TCP_MAXUNACKTIME:
2356                                 tp->t_maxunacktime = ui;
2357                                 break;
2358
2359                         case TCP_KEEPIDLE:
2360                                 tp->t_keepidle = ui;
2361                                 /*
2362                                  * XXX: better check current remaining
2363                                  * timeout and "merge" it with new value.
2364                                  */
2365                                 if ((tp->t_state > TCPS_LISTEN) &&
2366                                     (tp->t_state <= TCPS_CLOSING))
2367                                         tcp_timer_activate(tp, TT_KEEP,
2368                                             TP_KEEPIDLE(tp));
2369                                 break;
2370                         case TCP_KEEPINTVL:
2371                                 tp->t_keepintvl = ui;
2372                                 if ((tp->t_state == TCPS_FIN_WAIT_2) &&
2373                                     (TP_MAXIDLE(tp) > 0))
2374                                         tcp_timer_activate(tp, TT_2MSL,
2375                                             TP_MAXIDLE(tp));
2376                                 break;
2377                         case TCP_KEEPINIT:
2378                                 tp->t_keepinit = ui;
2379                                 if (tp->t_state == TCPS_SYN_RECEIVED ||
2380                                     tp->t_state == TCPS_SYN_SENT)
2381                                         tcp_timer_activate(tp, TT_KEEP,
2382                                             TP_KEEPINIT(tp));
2383                                 break;
2384                         }
2385                         goto unlock_and_done;
2386
2387                 case TCP_KEEPCNT:
2388                         INP_WUNLOCK(inp);
2389                         error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
2390                         if (error)
2391                                 return (error);
2392
2393                         INP_WLOCK_RECHECK(inp);
2394                         tp->t_keepcnt = ui;
2395                         if ((tp->t_state == TCPS_FIN_WAIT_2) &&
2396                             (TP_MAXIDLE(tp) > 0))
2397                                 tcp_timer_activate(tp, TT_2MSL,
2398                                     TP_MAXIDLE(tp));
2399                         goto unlock_and_done;
2400
2401 #ifdef TCPPCAP
2402                 case TCP_PCAP_OUT:
2403                 case TCP_PCAP_IN:
2404                         INP_WUNLOCK(inp);
2405                         error = sooptcopyin(sopt, &optval, sizeof optval,
2406                             sizeof optval);
2407                         if (error)
2408                                 return (error);
2409
2410                         INP_WLOCK_RECHECK(inp);
2411                         if (optval >= 0)
2412                                 tcp_pcap_set_sock_max(TCP_PCAP_OUT ?
2413                                         &(tp->t_outpkts) : &(tp->t_inpkts),
2414                                         optval);
2415                         else
2416                                 error = EINVAL;
2417                         goto unlock_and_done;
2418 #endif
2419
2420                 case TCP_FASTOPEN: {
2421                         struct tcp_fastopen tfo_optval;
2422
2423                         INP_WUNLOCK(inp);
2424                         if (!V_tcp_fastopen_client_enable &&
2425                             !V_tcp_fastopen_server_enable)
2426                                 return (EPERM);
2427
2428                         error = sooptcopyin(sopt, &tfo_optval,
2429                                     sizeof(tfo_optval), sizeof(int));
2430                         if (error)
2431                                 return (error);
2432
2433                         INP_WLOCK_RECHECK(inp);
2434                         if ((tp->t_state != TCPS_CLOSED) &&
2435                             (tp->t_state != TCPS_LISTEN)) {
2436                                 error = EINVAL;
2437                                 goto unlock_and_done;
2438                         }
2439                         if (tfo_optval.enable) {
2440                                 if (tp->t_state == TCPS_LISTEN) {
2441                                         if (!V_tcp_fastopen_server_enable) {
2442                                                 error = EPERM;
2443                                                 goto unlock_and_done;
2444                                         }
2445
2446                                         if (tp->t_tfo_pending == NULL)
2447                                                 tp->t_tfo_pending =
2448                                                     tcp_fastopen_alloc_counter();
2449                                 } else {
2450                                         /*
2451                                          * If a pre-shared key was provided,
2452                                          * stash it in the client cookie
2453                                          * field of the tcpcb for use during
2454                                          * connect.
2455                                          */
2456                                         if (sopt->sopt_valsize ==
2457                                             sizeof(tfo_optval)) {
2458                                                 memcpy(tp->t_tfo_cookie.client,
2459                                                        tfo_optval.psk,
2460                                                        TCP_FASTOPEN_PSK_LEN);
2461                                                 tp->t_tfo_client_cookie_len =
2462                                                     TCP_FASTOPEN_PSK_LEN;
2463                                         }
2464                                 }
2465                                 tp->t_flags |= TF_FASTOPEN;
2466                         } else
2467                                 tp->t_flags &= ~TF_FASTOPEN;
2468                         goto unlock_and_done;
2469                 }
2470
2471 #ifdef TCP_BLACKBOX
2472                 case TCP_LOG:
2473                         INP_WUNLOCK(inp);
2474                         error = sooptcopyin(sopt, &optval, sizeof optval,
2475                             sizeof optval);
2476                         if (error)
2477                                 return (error);
2478
2479                         INP_WLOCK_RECHECK(inp);
2480                         error = tcp_log_state_change(tp, optval);
2481                         goto unlock_and_done;
2482
2483                 case TCP_LOGBUF:
2484                         INP_WUNLOCK(inp);
2485                         error = EINVAL;
2486                         break;
2487
2488                 case TCP_LOGID:
2489                         INP_WUNLOCK(inp);
2490                         error = sooptcopyin(sopt, buf, TCP_LOG_ID_LEN - 1, 0);
2491                         if (error)
2492                                 break;
2493                         buf[sopt->sopt_valsize] = '\0';
2494                         INP_WLOCK_RECHECK(inp);
2495                         error = tcp_log_set_id(tp, buf);
2496                         /* tcp_log_set_id() unlocks the INP. */
2497                         break;
2498
2499                 case TCP_LOGDUMP:
2500                 case TCP_LOGDUMPID:
2501                         INP_WUNLOCK(inp);
2502                         error =
2503                             sooptcopyin(sopt, buf, TCP_LOG_REASON_LEN - 1, 0);
2504                         if (error)
2505                                 break;
2506                         buf[sopt->sopt_valsize] = '\0';
2507                         INP_WLOCK_RECHECK(inp);
2508                         if (sopt->sopt_name == TCP_LOGDUMP) {
2509                                 error = tcp_log_dump_tp_logbuf(tp, buf,
2510                                     M_WAITOK, true);
2511                                 INP_WUNLOCK(inp);
2512                         } else {
2513                                 tcp_log_dump_tp_bucket_logbufs(tp, buf);
2514                                 /*
2515                                  * tcp_log_dump_tp_bucket_logbufs() drops the
2516                                  * INP lock.
2517                                  */
2518                         }
2519                         break;
2520 #endif
2521
2522                 default:
2523                         INP_WUNLOCK(inp);
2524                         error = ENOPROTOOPT;
2525                         break;
2526                 }
2527                 break;
2528
2529         case SOPT_GET:
2530                 tp = intotcpcb(inp);
2531                 switch (sopt->sopt_name) {
2532 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
2533                 case TCP_MD5SIG:
2534                         INP_WUNLOCK(inp);
2535                         if (!TCPMD5_ENABLED())
2536                                 return (ENOPROTOOPT);
2537                         error = TCPMD5_PCBCTL(inp, sopt);
2538                         break;
2539 #endif
2540
2541                 case TCP_NODELAY:
2542                         optval = tp->t_flags & TF_NODELAY;
2543                         INP_WUNLOCK(inp);
2544                         error = sooptcopyout(sopt, &optval, sizeof optval);
2545                         break;
2546                 case TCP_MAXSEG:
2547                         optval = tp->t_maxseg;
2548                         INP_WUNLOCK(inp);
2549                         error = sooptcopyout(sopt, &optval, sizeof optval);
2550                         break;
2551                 case TCP_REMOTE_UDP_ENCAPS_PORT:
2552                         optval = ntohs(tp->t_port);
2553                         INP_WUNLOCK(inp);
2554                         error = sooptcopyout(sopt, &optval, sizeof optval);
2555                         break;
2556                 case TCP_NOOPT:
2557                         optval = tp->t_flags & TF_NOOPT;
2558                         INP_WUNLOCK(inp);
2559                         error = sooptcopyout(sopt, &optval, sizeof optval);
2560                         break;
2561                 case TCP_NOPUSH:
2562                         optval = tp->t_flags & TF_NOPUSH;
2563                         INP_WUNLOCK(inp);
2564                         error = sooptcopyout(sopt, &optval, sizeof optval);
2565                         break;
2566                 case TCP_INFO:
2567                         tcp_fill_info(tp, &ti);
2568                         INP_WUNLOCK(inp);
2569                         error = sooptcopyout(sopt, &ti, sizeof ti);
2570                         break;
2571                 case TCP_STATS:
2572                         {
2573 #ifdef STATS
2574                         int nheld;
2575                         TYPEOF_MEMBER(struct statsblob, flags) sbflags = 0;
2576
2577                         error = 0;
2578                         socklen_t outsbsz = sopt->sopt_valsize;
2579                         if (tp->t_stats == NULL)
2580                                 error = ENOENT;
2581                         else if (outsbsz >= tp->t_stats->cursz)
2582                                 outsbsz = tp->t_stats->cursz;
2583                         else if (outsbsz >= sizeof(struct statsblob))
2584                                 outsbsz = sizeof(struct statsblob);
2585                         else
2586                                 error = EINVAL;
2587                         INP_WUNLOCK(inp);
2588                         if (error)
2589                                 break;
2590
2591                         sbp = sopt->sopt_val;
2592                         nheld = atop(round_page(((vm_offset_t)sbp) +
2593                             (vm_size_t)outsbsz) - trunc_page((vm_offset_t)sbp));
2594                         vm_page_t ma[nheld];
2595                         if (vm_fault_quick_hold_pages(
2596                             &curproc->p_vmspace->vm_map, (vm_offset_t)sbp,
2597                             outsbsz, VM_PROT_READ | VM_PROT_WRITE, ma,
2598                             nheld) < 0) {
2599                                 error = EFAULT;
2600                                 break;
2601                         }
2602
2603                         if ((error = copyin_nofault(&(sbp->flags), &sbflags,
2604                             SIZEOF_MEMBER(struct statsblob, flags))))
2605                                 goto unhold;
2606
2607                         INP_WLOCK_RECHECK(inp);
2608                         error = stats_blob_snapshot(&sbp, outsbsz, tp->t_stats,
2609                             sbflags | SB_CLONE_USRDSTNOFAULT);
2610                         INP_WUNLOCK(inp);
2611                         sopt->sopt_valsize = outsbsz;
2612 unhold:
2613                         vm_page_unhold_pages(ma, nheld);
2614 #else
2615                         INP_WUNLOCK(inp);
2616                         error = EOPNOTSUPP;
2617 #endif /* !STATS */
2618                         break;
2619                         }
2620                 case TCP_CONGESTION:
2621                         len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
2622                         INP_WUNLOCK(inp);
2623                         error = sooptcopyout(sopt, buf, len + 1);
2624                         break;
2625                 case TCP_MAXUNACKTIME:
2626                 case TCP_KEEPIDLE:
2627                 case TCP_KEEPINTVL:
2628                 case TCP_KEEPINIT:
2629                 case TCP_KEEPCNT:
2630                         switch (sopt->sopt_name) {
2631                         case TCP_MAXUNACKTIME:
2632                                 ui = TP_MAXUNACKTIME(tp) / hz;
2633                                 break;
2634                         case TCP_KEEPIDLE:
2635                                 ui = TP_KEEPIDLE(tp) / hz;
2636                                 break;
2637                         case TCP_KEEPINTVL:
2638                                 ui = TP_KEEPINTVL(tp) / hz;
2639                                 break;
2640                         case TCP_KEEPINIT:
2641                                 ui = TP_KEEPINIT(tp) / hz;
2642                                 break;
2643                         case TCP_KEEPCNT:
2644                                 ui = TP_KEEPCNT(tp);
2645                                 break;
2646                         }
2647                         INP_WUNLOCK(inp);
2648                         error = sooptcopyout(sopt, &ui, sizeof(ui));
2649                         break;
2650 #ifdef TCPPCAP
2651                 case TCP_PCAP_OUT:
2652                 case TCP_PCAP_IN:
2653                         optval = tcp_pcap_get_sock_max(TCP_PCAP_OUT ?
2654                                         &(tp->t_outpkts) : &(tp->t_inpkts));
2655                         INP_WUNLOCK(inp);
2656                         error = sooptcopyout(sopt, &optval, sizeof optval);
2657                         break;
2658 #endif
2659                 case TCP_FASTOPEN:
2660                         optval = tp->t_flags & TF_FASTOPEN;
2661                         INP_WUNLOCK(inp);
2662                         error = sooptcopyout(sopt, &optval, sizeof optval);
2663                         break;
2664 #ifdef TCP_BLACKBOX
2665                 case TCP_LOG:
2666                         optval = tp->t_logstate;
2667                         INP_WUNLOCK(inp);
2668                         error = sooptcopyout(sopt, &optval, sizeof(optval));
2669                         break;
2670                 case TCP_LOGBUF:
2671                         /* tcp_log_getlogbuf() does INP_WUNLOCK(inp) */
2672                         error = tcp_log_getlogbuf(sopt, tp);
2673                         break;
2674                 case TCP_LOGID:
2675                         len = tcp_log_get_id(tp, buf);
2676                         INP_WUNLOCK(inp);
2677                         error = sooptcopyout(sopt, buf, len + 1);
2678                         break;
2679                 case TCP_LOGDUMP:
2680                 case TCP_LOGDUMPID:
2681                         INP_WUNLOCK(inp);
2682                         error = EINVAL;
2683                         break;
2684 #endif
2685 #ifdef KERN_TLS
2686                 case TCP_TXTLS_MODE:
2687                         error = ktls_get_tx_mode(so, &optval);
2688                         INP_WUNLOCK(inp);
2689                         if (error == 0)
2690                                 error = sooptcopyout(sopt, &optval,
2691                                     sizeof(optval));
2692                         break;
2693                 case TCP_RXTLS_MODE:
2694                         error = ktls_get_rx_mode(so, &optval);
2695                         INP_WUNLOCK(inp);
2696                         if (error == 0)
2697                                 error = sooptcopyout(sopt, &optval,
2698                                     sizeof(optval));
2699                         break;
2700 #endif
2701                 case TCP_LRD:
2702                         optval = tp->t_flags & TF_LRD;
2703                         INP_WUNLOCK(inp);
2704                         error = sooptcopyout(sopt, &optval, sizeof optval);
2705                         break;
2706                 default:
2707                         INP_WUNLOCK(inp);
2708                         error = ENOPROTOOPT;
2709                         break;
2710                 }
2711                 break;
2712         }
2713         return (error);
2714 }
2715 #undef INP_WLOCK_RECHECK
2716 #undef INP_WLOCK_RECHECK_CLEANUP
2717
2718 /*
2719  * Initiate (or continue) disconnect.
2720  * If embryonic state, just send reset (once).
2721  * If in ``let data drain'' option and linger null, just drop.
2722  * Otherwise (hard), mark socket disconnecting and drop
2723  * current input data; switch states based on user close, and
2724  * send segment to peer (with FIN).
2725  */
2726 static void
2727 tcp_disconnect(struct tcpcb *tp)
2728 {
2729         struct inpcb *inp = tptoinpcb(tp);
2730         struct socket *so = tptosocket(tp);
2731
2732         NET_EPOCH_ASSERT();
2733         INP_WLOCK_ASSERT(inp);
2734
2735         /*
2736          * Neither tcp_close() nor tcp_drop() should return NULL, as the
2737          * socket is still open.
2738          */
2739         if (tp->t_state < TCPS_ESTABLISHED &&
2740             !(tp->t_state > TCPS_LISTEN && IS_FASTOPEN(tp->t_flags))) {
2741                 tp = tcp_close(tp);
2742                 KASSERT(tp != NULL,
2743                     ("tcp_disconnect: tcp_close() returned NULL"));
2744         } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
2745                 tp = tcp_drop(tp, 0);
2746                 KASSERT(tp != NULL,
2747                     ("tcp_disconnect: tcp_drop() returned NULL"));
2748         } else {
2749                 soisdisconnecting(so);
2750                 sbflush(&so->so_rcv);
2751                 tcp_usrclosed(tp);
2752                 if (!(inp->inp_flags & INP_DROPPED))
2753                         /* Ignore stack's drop request, we already at it. */
2754                         (void)tcp_output_nodrop(tp);
2755         }
2756 }
2757
2758 /*
2759  * User issued close, and wish to trail through shutdown states:
2760  * if never received SYN, just forget it.  If got a SYN from peer,
2761  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
2762  * If already got a FIN from peer, then almost done; go to LAST_ACK
2763  * state.  In all other cases, have already sent FIN to peer (e.g.
2764  * after PRU_SHUTDOWN), and just have to play tedious game waiting
2765  * for peer to send FIN or not respond to keep-alives, etc.
2766  * We can let the user exit from the close as soon as the FIN is acked.
2767  */
2768 static void
2769 tcp_usrclosed(struct tcpcb *tp)
2770 {
2771
2772         NET_EPOCH_ASSERT();
2773         INP_WLOCK_ASSERT(tptoinpcb(tp));
2774
2775         switch (tp->t_state) {
2776         case TCPS_LISTEN:
2777 #ifdef TCP_OFFLOAD
2778                 tcp_offload_listen_stop(tp);
2779 #endif
2780                 tcp_state_change(tp, TCPS_CLOSED);
2781                 /* FALLTHROUGH */
2782         case TCPS_CLOSED:
2783                 tp = tcp_close(tp);
2784                 /*
2785                  * tcp_close() should never return NULL here as the socket is
2786                  * still open.
2787                  */
2788                 KASSERT(tp != NULL,
2789                     ("tcp_usrclosed: tcp_close() returned NULL"));
2790                 break;
2791
2792         case TCPS_SYN_SENT:
2793         case TCPS_SYN_RECEIVED:
2794                 tp->t_flags |= TF_NEEDFIN;
2795                 break;
2796
2797         case TCPS_ESTABLISHED:
2798                 tcp_state_change(tp, TCPS_FIN_WAIT_1);
2799                 break;
2800
2801         case TCPS_CLOSE_WAIT:
2802                 tcp_state_change(tp, TCPS_LAST_ACK);
2803                 break;
2804         }
2805         if (tp->t_acktime == 0)
2806                 tp->t_acktime = ticks;
2807         if (tp->t_state >= TCPS_FIN_WAIT_2) {
2808                 soisdisconnected(tptosocket(tp));
2809                 /* Prevent the connection hanging in FIN_WAIT_2 forever. */
2810                 if (tp->t_state == TCPS_FIN_WAIT_2) {
2811                         int timeout;
2812
2813                         timeout = (tcp_fast_finwait2_recycle) ?
2814                             tcp_finwait2_timeout : TP_MAXIDLE(tp);
2815                         tcp_timer_activate(tp, TT_2MSL, timeout);
2816                 }
2817         }
2818 }
2819
2820 #ifdef DDB
2821 static void
2822 db_print_indent(int indent)
2823 {
2824         int i;
2825
2826         for (i = 0; i < indent; i++)
2827                 db_printf(" ");
2828 }
2829
2830 static void
2831 db_print_tstate(int t_state)
2832 {
2833
2834         switch (t_state) {
2835         case TCPS_CLOSED:
2836                 db_printf("TCPS_CLOSED");
2837                 return;
2838
2839         case TCPS_LISTEN:
2840                 db_printf("TCPS_LISTEN");
2841                 return;
2842
2843         case TCPS_SYN_SENT:
2844                 db_printf("TCPS_SYN_SENT");
2845                 return;
2846
2847         case TCPS_SYN_RECEIVED:
2848                 db_printf("TCPS_SYN_RECEIVED");
2849                 return;
2850
2851         case TCPS_ESTABLISHED:
2852                 db_printf("TCPS_ESTABLISHED");
2853                 return;
2854
2855         case TCPS_CLOSE_WAIT:
2856                 db_printf("TCPS_CLOSE_WAIT");
2857                 return;
2858
2859         case TCPS_FIN_WAIT_1:
2860                 db_printf("TCPS_FIN_WAIT_1");
2861                 return;
2862
2863         case TCPS_CLOSING:
2864                 db_printf("TCPS_CLOSING");
2865                 return;
2866
2867         case TCPS_LAST_ACK:
2868                 db_printf("TCPS_LAST_ACK");
2869                 return;
2870
2871         case TCPS_FIN_WAIT_2:
2872                 db_printf("TCPS_FIN_WAIT_2");
2873                 return;
2874
2875         case TCPS_TIME_WAIT:
2876                 db_printf("TCPS_TIME_WAIT");
2877                 return;
2878
2879         default:
2880                 db_printf("unknown");
2881                 return;
2882         }
2883 }
2884
2885 static void
2886 db_print_tflags(u_int t_flags)
2887 {
2888         int comma;
2889
2890         comma = 0;
2891         if (t_flags & TF_ACKNOW) {
2892                 db_printf("%sTF_ACKNOW", comma ? ", " : "");
2893                 comma = 1;
2894         }
2895         if (t_flags & TF_DELACK) {
2896                 db_printf("%sTF_DELACK", comma ? ", " : "");
2897                 comma = 1;
2898         }
2899         if (t_flags & TF_NODELAY) {
2900                 db_printf("%sTF_NODELAY", comma ? ", " : "");
2901                 comma = 1;
2902         }
2903         if (t_flags & TF_NOOPT) {
2904                 db_printf("%sTF_NOOPT", comma ? ", " : "");
2905                 comma = 1;
2906         }
2907         if (t_flags & TF_SENTFIN) {
2908                 db_printf("%sTF_SENTFIN", comma ? ", " : "");
2909                 comma = 1;
2910         }
2911         if (t_flags & TF_REQ_SCALE) {
2912                 db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
2913                 comma = 1;
2914         }
2915         if (t_flags & TF_RCVD_SCALE) {
2916                 db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
2917                 comma = 1;
2918         }
2919         if (t_flags & TF_REQ_TSTMP) {
2920                 db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
2921                 comma = 1;
2922         }
2923         if (t_flags & TF_RCVD_TSTMP) {
2924                 db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
2925                 comma = 1;
2926         }
2927         if (t_flags & TF_SACK_PERMIT) {
2928                 db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
2929                 comma = 1;
2930         }
2931         if (t_flags & TF_NEEDSYN) {
2932                 db_printf("%sTF_NEEDSYN", comma ? ", " : "");
2933                 comma = 1;
2934         }
2935         if (t_flags & TF_NEEDFIN) {
2936                 db_printf("%sTF_NEEDFIN", comma ? ", " : "");
2937                 comma = 1;
2938         }
2939         if (t_flags & TF_NOPUSH) {
2940                 db_printf("%sTF_NOPUSH", comma ? ", " : "");
2941                 comma = 1;
2942         }
2943         if (t_flags & TF_PREVVALID) {
2944                 db_printf("%sTF_PREVVALID", comma ? ", " : "");
2945                 comma = 1;
2946         }
2947         if (t_flags & TF_MORETOCOME) {
2948                 db_printf("%sTF_MORETOCOME", comma ? ", " : "");
2949                 comma = 1;
2950         }
2951         if (t_flags & TF_SONOTCONN) {
2952                 db_printf("%sTF_SONOTCONN", comma ? ", " : "");
2953                 comma = 1;
2954         }
2955         if (t_flags & TF_LASTIDLE) {
2956                 db_printf("%sTF_LASTIDLE", comma ? ", " : "");
2957                 comma = 1;
2958         }
2959         if (t_flags & TF_RXWIN0SENT) {
2960                 db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
2961                 comma = 1;
2962         }
2963         if (t_flags & TF_FASTRECOVERY) {
2964                 db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
2965                 comma = 1;
2966         }
2967         if (t_flags & TF_CONGRECOVERY) {
2968                 db_printf("%sTF_CONGRECOVERY", comma ? ", " : "");
2969                 comma = 1;
2970         }
2971         if (t_flags & TF_WASFRECOVERY) {
2972                 db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
2973                 comma = 1;
2974         }
2975         if (t_flags & TF_WASCRECOVERY) {
2976                 db_printf("%sTF_WASCRECOVERY", comma ? ", " : "");
2977                 comma = 1;
2978         }
2979         if (t_flags & TF_SIGNATURE) {
2980                 db_printf("%sTF_SIGNATURE", comma ? ", " : "");
2981                 comma = 1;
2982         }
2983         if (t_flags & TF_FORCEDATA) {
2984                 db_printf("%sTF_FORCEDATA", comma ? ", " : "");
2985                 comma = 1;
2986         }
2987         if (t_flags & TF_TSO) {
2988                 db_printf("%sTF_TSO", comma ? ", " : "");
2989                 comma = 1;
2990         }
2991         if (t_flags & TF_FASTOPEN) {
2992                 db_printf("%sTF_FASTOPEN", comma ? ", " : "");
2993                 comma = 1;
2994         }
2995 }
2996
2997 static void
2998 db_print_tflags2(u_int t_flags2)
2999 {
3000         int comma;
3001
3002         comma = 0;
3003         if (t_flags2 & TF2_PLPMTU_BLACKHOLE) {
3004                 db_printf("%sTF2_PLPMTU_BLACKHOLE", comma ? ", " : "");
3005                 comma = 1;
3006         }
3007         if (t_flags2 & TF2_PLPMTU_PMTUD) {
3008                 db_printf("%sTF2_PLPMTU_PMTUD", comma ? ", " : "");
3009                 comma = 1;
3010         }
3011         if (t_flags2 & TF2_PLPMTU_MAXSEGSNT) {
3012                 db_printf("%sTF2_PLPMTU_MAXSEGSNT", comma ? ", " : "");
3013                 comma = 1;
3014         }
3015         if (t_flags2 & TF2_LOG_AUTO) {
3016                 db_printf("%sTF2_LOG_AUTO", comma ? ", " : "");
3017                 comma = 1;
3018         }
3019         if (t_flags2 & TF2_DROP_AF_DATA) {
3020                 db_printf("%sTF2_DROP_AF_DATA", comma ? ", " : "");
3021                 comma = 1;
3022         }
3023         if (t_flags2 & TF2_ECN_PERMIT) {
3024                 db_printf("%sTF2_ECN_PERMIT", comma ? ", " : "");
3025                 comma = 1;
3026         }
3027         if (t_flags2 & TF2_ECN_SND_CWR) {
3028                 db_printf("%sTF2_ECN_SND_CWR", comma ? ", " : "");
3029                 comma = 1;
3030         }
3031         if (t_flags2 & TF2_ECN_SND_ECE) {
3032                 db_printf("%sTF2_ECN_SND_ECE", comma ? ", " : "");
3033                 comma = 1;
3034         }
3035         if (t_flags2 & TF2_ACE_PERMIT) {
3036                 db_printf("%sTF2_ACE_PERMIT", comma ? ", " : "");
3037                 comma = 1;
3038         }
3039         if (t_flags2 & TF2_FBYTES_COMPLETE) {
3040                 db_printf("%sTF2_FBYTES_COMPLETE", comma ? ", " : "");
3041                 comma = 1;
3042         }
3043 }
3044
3045 static void
3046 db_print_toobflags(char t_oobflags)
3047 {
3048         int comma;
3049
3050         comma = 0;
3051         if (t_oobflags & TCPOOB_HAVEDATA) {
3052                 db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
3053                 comma = 1;
3054         }
3055         if (t_oobflags & TCPOOB_HADDATA) {
3056                 db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
3057                 comma = 1;
3058         }
3059 }
3060
3061 static void
3062 db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
3063 {
3064
3065         db_print_indent(indent);
3066         db_printf("%s at %p\n", name, tp);
3067
3068         indent += 2;
3069
3070         db_print_indent(indent);
3071         db_printf("t_segq first: %p   t_segqlen: %d   t_dupacks: %d\n",
3072            TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
3073
3074         db_print_indent(indent);
3075         db_printf("tt_rexmt: %p   tt_persist: %p   tt_keep: %p\n",
3076             &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep);
3077
3078         db_print_indent(indent);
3079         db_printf("tt_2msl: %p   tt_delack: %p   t_inpcb: %p\n", &tp->t_timers->tt_2msl,
3080             &tp->t_timers->tt_delack, tp->t_inpcb);
3081
3082         db_print_indent(indent);
3083         db_printf("t_state: %d (", tp->t_state);
3084         db_print_tstate(tp->t_state);
3085         db_printf(")\n");
3086
3087         db_print_indent(indent);
3088         db_printf("t_flags: 0x%x (", tp->t_flags);
3089         db_print_tflags(tp->t_flags);
3090         db_printf(")\n");
3091
3092         db_print_indent(indent);
3093         db_printf("t_flags2: 0x%x (", tp->t_flags2);
3094         db_print_tflags2(tp->t_flags2);
3095         db_printf(")\n");
3096
3097         db_print_indent(indent);
3098         db_printf("snd_una: 0x%08x   snd_max: 0x%08x   snd_nxt: x0%08x\n",
3099             tp->snd_una, tp->snd_max, tp->snd_nxt);
3100
3101         db_print_indent(indent);
3102         db_printf("snd_up: 0x%08x   snd_wl1: 0x%08x   snd_wl2: 0x%08x\n",
3103            tp->snd_up, tp->snd_wl1, tp->snd_wl2);
3104
3105         db_print_indent(indent);
3106         db_printf("iss: 0x%08x   irs: 0x%08x   rcv_nxt: 0x%08x\n",
3107             tp->iss, tp->irs, tp->rcv_nxt);
3108
3109         db_print_indent(indent);
3110         db_printf("rcv_adv: 0x%08x   rcv_wnd: %u   rcv_up: 0x%08x\n",
3111             tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
3112
3113         db_print_indent(indent);
3114         db_printf("snd_wnd: %u   snd_cwnd: %u\n",
3115            tp->snd_wnd, tp->snd_cwnd);
3116
3117         db_print_indent(indent);
3118         db_printf("snd_ssthresh: %u   snd_recover: "
3119             "0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
3120
3121         db_print_indent(indent);
3122         db_printf("t_rcvtime: %u   t_startime: %u\n",
3123             tp->t_rcvtime, tp->t_starttime);
3124
3125         db_print_indent(indent);
3126         db_printf("t_rttime: %u   t_rtsq: 0x%08x\n",
3127             tp->t_rtttime, tp->t_rtseq);
3128
3129         db_print_indent(indent);
3130         db_printf("t_rxtcur: %d   t_maxseg: %u   t_srtt: %d\n",
3131             tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
3132
3133         db_print_indent(indent);
3134         db_printf("t_rttvar: %d   t_rxtshift: %d   t_rttmin: %u\n",
3135             tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin);
3136
3137         db_print_indent(indent);
3138         db_printf("t_rttupdated: %lu   max_sndwnd: %u   t_softerror: %d\n",
3139             tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
3140
3141         db_print_indent(indent);
3142         db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
3143         db_print_toobflags(tp->t_oobflags);
3144         db_printf(")   t_iobc: 0x%02x\n", tp->t_iobc);
3145
3146         db_print_indent(indent);
3147         db_printf("snd_scale: %u   rcv_scale: %u   request_r_scale: %u\n",
3148             tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
3149
3150         db_print_indent(indent);
3151         db_printf("ts_recent: %u   ts_recent_age: %u\n",
3152             tp->ts_recent, tp->ts_recent_age);
3153
3154         db_print_indent(indent);
3155         db_printf("ts_offset: %u   last_ack_sent: 0x%08x   snd_cwnd_prev: "
3156             "%u\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
3157
3158         db_print_indent(indent);
3159         db_printf("snd_ssthresh_prev: %u   snd_recover_prev: 0x%08x   "
3160             "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
3161             tp->snd_recover_prev, tp->t_badrxtwin);
3162
3163         db_print_indent(indent);
3164         db_printf("snd_numholes: %d  snd_holes first: %p\n",
3165             tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
3166
3167         db_print_indent(indent);
3168         db_printf("snd_fack: 0x%08x   rcv_numsacks: %d\n",
3169             tp->snd_fack, tp->rcv_numsacks);
3170
3171         /* Skip sackblks, sackhint. */
3172
3173         db_print_indent(indent);
3174         db_printf("t_rttlow: %d   rfbuf_ts: %u   rfbuf_cnt: %d\n",
3175             tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
3176 }
3177
3178 DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
3179 {
3180         struct tcpcb *tp;
3181
3182         if (!have_addr) {
3183                 db_printf("usage: show tcpcb <addr>\n");
3184                 return;
3185         }
3186         tp = (struct tcpcb *)addr;
3187
3188         db_print_tcpcb(tp, "tcpcb", 0);
3189 }
3190 #endif