]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/tcp_usrreq.c
Import riscv DTS files
[FreeBSD/FreeBSD.git] / sys / netinet / tcp_usrreq.c
1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 1982, 1986, 1988, 1993
5  *      The Regents of the University of California.
6  * Copyright (c) 2006-2007 Robert N. M. Watson
7  * Copyright (c) 2010-2011 Juniper Networks, Inc.
8  * All rights reserved.
9  *
10  * Portions of this software were developed by Robert N. M. Watson under
11  * contract to Juniper Networks, Inc.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. Neither the name of the University nor the names of its contributors
22  *    may be used to endorse or promote products derived from this software
23  *    without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35  * SUCH DAMAGE.
36  *
37  *      From: @(#)tcp_usrreq.c  8.2 (Berkeley) 1/3/94
38  */
39
40 #include <sys/cdefs.h>
41 __FBSDID("$FreeBSD$");
42
43 #include "opt_ddb.h"
44 #include "opt_inet.h"
45 #include "opt_inet6.h"
46 #include "opt_ipsec.h"
47 #include "opt_kern_tls.h"
48 #include "opt_tcpdebug.h"
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/limits.h>
53 #include <sys/malloc.h>
54 #include <sys/refcount.h>
55 #include <sys/kernel.h>
56 #include <sys/ktls.h>
57 #include <sys/sysctl.h>
58 #include <sys/mbuf.h>
59 #ifdef INET6
60 #include <sys/domain.h>
61 #endif /* INET6 */
62 #include <sys/socket.h>
63 #include <sys/socketvar.h>
64 #include <sys/protosw.h>
65 #include <sys/proc.h>
66 #include <sys/jail.h>
67 #include <sys/syslog.h>
68
69 #ifdef DDB
70 #include <ddb/ddb.h>
71 #endif
72
73 #include <net/if.h>
74 #include <net/if_var.h>
75 #include <net/route.h>
76 #include <net/vnet.h>
77
78 #include <netinet/in.h>
79 #include <netinet/in_kdtrace.h>
80 #include <netinet/in_pcb.h>
81 #include <netinet/in_systm.h>
82 #include <netinet/in_var.h>
83 #include <netinet/ip_var.h>
84 #ifdef INET6
85 #include <netinet/ip6.h>
86 #include <netinet6/in6_pcb.h>
87 #include <netinet6/ip6_var.h>
88 #include <netinet6/scope6_var.h>
89 #endif
90 #include <netinet/tcp.h>
91 #include <netinet/tcp_fsm.h>
92 #include <netinet/tcp_seq.h>
93 #include <netinet/tcp_timer.h>
94 #include <netinet/tcp_var.h>
95 #include <netinet/tcp_log_buf.h>
96 #include <netinet/tcpip.h>
97 #include <netinet/cc/cc.h>
98 #include <netinet/tcp_fastopen.h>
99 #include <netinet/tcp_hpts.h>
100 #ifdef TCPPCAP
101 #include <netinet/tcp_pcap.h>
102 #endif
103 #ifdef TCPDEBUG
104 #include <netinet/tcp_debug.h>
105 #endif
106 #ifdef TCP_OFFLOAD
107 #include <netinet/tcp_offload.h>
108 #endif
109 #include <netipsec/ipsec_support.h>
110
111 /*
112  * TCP protocol interface to socket abstraction.
113  */
114 static int      tcp_attach(struct socket *);
115 #ifdef INET
116 static int      tcp_connect(struct tcpcb *, struct sockaddr *,
117                     struct thread *td);
118 #endif /* INET */
119 #ifdef INET6
120 static int      tcp6_connect(struct tcpcb *, struct sockaddr *,
121                     struct thread *td);
122 #endif /* INET6 */
123 static void     tcp_disconnect(struct tcpcb *);
124 static void     tcp_usrclosed(struct tcpcb *);
125 static void     tcp_fill_info(struct tcpcb *, struct tcp_info *);
126
127 #ifdef TCPDEBUG
128 #define TCPDEBUG0       int ostate = 0
129 #define TCPDEBUG1()     ostate = tp ? tp->t_state : 0
130 #define TCPDEBUG2(req)  if (tp && (so->so_options & SO_DEBUG)) \
131                                 tcp_trace(TA_USER, ostate, tp, 0, 0, req)
132 #else
133 #define TCPDEBUG0
134 #define TCPDEBUG1()
135 #define TCPDEBUG2(req)
136 #endif
137
138 /*
139  * TCP attaches to socket via pru_attach(), reserving space,
140  * and an internet control block.
141  */
142 static int
143 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
144 {
145         struct inpcb *inp;
146         struct tcpcb *tp = NULL;
147         int error;
148         TCPDEBUG0;
149
150         inp = sotoinpcb(so);
151         KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
152         TCPDEBUG1();
153
154         error = tcp_attach(so);
155         if (error)
156                 goto out;
157
158         if ((so->so_options & SO_LINGER) && so->so_linger == 0)
159                 so->so_linger = TCP_LINGERTIME;
160
161         inp = sotoinpcb(so);
162         tp = intotcpcb(inp);
163 out:
164         TCPDEBUG2(PRU_ATTACH);
165         TCP_PROBE2(debug__user, tp, PRU_ATTACH);
166         return error;
167 }
168
169 /*
170  * tcp_detach is called when the socket layer loses its final reference
171  * to the socket, be it a file descriptor reference, a reference from TCP,
172  * etc.  At this point, there is only one case in which we will keep around
173  * inpcb state: time wait.
174  *
175  * This function can probably be re-absorbed back into tcp_usr_detach() now
176  * that there is a single detach path.
177  */
178 static void
179 tcp_detach(struct socket *so, struct inpcb *inp)
180 {
181         struct tcpcb *tp;
182
183         INP_INFO_LOCK_ASSERT(&V_tcbinfo);
184         INP_WLOCK_ASSERT(inp);
185
186         KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp"));
187         KASSERT(inp->inp_socket == so, ("tcp_detach: inp_socket != so"));
188
189         tp = intotcpcb(inp);
190
191         if (inp->inp_flags & INP_TIMEWAIT) {
192                 /*
193                  * There are two cases to handle: one in which the time wait
194                  * state is being discarded (INP_DROPPED), and one in which
195                  * this connection will remain in timewait.  In the former,
196                  * it is time to discard all state (except tcptw, which has
197                  * already been discarded by the timewait close code, which
198                  * should be further up the call stack somewhere).  In the
199                  * latter case, we detach from the socket, but leave the pcb
200                  * present until timewait ends.
201                  *
202                  * XXXRW: Would it be cleaner to free the tcptw here?
203                  *
204                  * Astute question indeed, from twtcp perspective there are
205                  * four cases to consider:
206                  *
207                  * #1 tcp_detach is called at tcptw creation time by
208                  *  tcp_twstart, then do not discard the newly created tcptw
209                  *  and leave inpcb present until timewait ends
210                  * #2 tcp_detach is called at tcptw creation time by
211                  *  tcp_twstart, but connection is local and tw will be
212                  *  discarded immediately
213                  * #3 tcp_detach is called at timewait end (or reuse) by
214                  *  tcp_twclose, then the tcptw has already been discarded
215                  *  (or reused) and inpcb is freed here
216                  * #4 tcp_detach is called() after timewait ends (or reuse)
217                  *  (e.g. by soclose), then tcptw has already been discarded
218                  *  (or reused) and inpcb is freed here
219                  *
220                  *  In all three cases the tcptw should not be freed here.
221                  */
222                 if (inp->inp_flags & INP_DROPPED) {
223                         in_pcbdetach(inp);
224                         if (__predict_true(tp == NULL)) {
225                                 in_pcbfree(inp);
226                         } else {
227                                 /*
228                                  * This case should not happen as in TIMEWAIT
229                                  * state the inp should not be destroyed before
230                                  * its tcptw.  If INVARIANTS is defined, panic.
231                                  */
232 #ifdef INVARIANTS
233                                 panic("%s: Panic before an inp double-free: "
234                                     "INP_TIMEWAIT && INP_DROPPED && tp != NULL"
235                                     , __func__);
236 #else
237                                 log(LOG_ERR, "%s: Avoid an inp double-free: "
238                                     "INP_TIMEWAIT && INP_DROPPED && tp != NULL"
239                                     , __func__);
240 #endif
241                                 INP_WUNLOCK(inp);
242                         }
243                 } else {
244                         in_pcbdetach(inp);
245                         INP_WUNLOCK(inp);
246                 }
247         } else {
248                 /*
249                  * If the connection is not in timewait, we consider two
250                  * two conditions: one in which no further processing is
251                  * necessary (dropped || embryonic), and one in which TCP is
252                  * not yet done, but no longer requires the socket, so the
253                  * pcb will persist for the time being.
254                  *
255                  * XXXRW: Does the second case still occur?
256                  */
257                 if (inp->inp_flags & INP_DROPPED ||
258                     tp->t_state < TCPS_SYN_SENT) {
259                         tcp_discardcb(tp);
260                         in_pcbdetach(inp);
261                         in_pcbfree(inp);
262                 } else {
263                         in_pcbdetach(inp);
264                         INP_WUNLOCK(inp);
265                 }
266         }
267 }
268
269 /*
270  * pru_detach() detaches the TCP protocol from the socket.
271  * If the protocol state is non-embryonic, then can't
272  * do this directly: have to initiate a pru_disconnect(),
273  * which may finish later; embryonic TCB's can just
274  * be discarded here.
275  */
276 static void
277 tcp_usr_detach(struct socket *so)
278 {
279         struct inpcb *inp;
280         int rlock = 0;
281         struct epoch_tracker et;
282
283         inp = sotoinpcb(so);
284         KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
285         if (!INP_INFO_WLOCKED(&V_tcbinfo)) {
286                 NET_EPOCH_ENTER(et);
287                 rlock = 1;
288         }
289         INP_WLOCK(inp);
290         KASSERT(inp->inp_socket != NULL,
291             ("tcp_usr_detach: inp_socket == NULL"));
292         tcp_detach(so, inp);
293         if (rlock)
294                 NET_EPOCH_EXIT(et);
295 }
296
297 #ifdef INET
298 /*
299  * Give the socket an address.
300  */
301 static int
302 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
303 {
304         int error = 0;
305         struct inpcb *inp;
306         struct tcpcb *tp = NULL;
307         struct sockaddr_in *sinp;
308
309         sinp = (struct sockaddr_in *)nam;
310         if (nam->sa_len != sizeof (*sinp))
311                 return (EINVAL);
312         /*
313          * Must check for multicast addresses and disallow binding
314          * to them.
315          */
316         if (sinp->sin_family == AF_INET &&
317             IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
318                 return (EAFNOSUPPORT);
319
320         TCPDEBUG0;
321         inp = sotoinpcb(so);
322         KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
323         INP_WLOCK(inp);
324         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
325                 error = EINVAL;
326                 goto out;
327         }
328         tp = intotcpcb(inp);
329         TCPDEBUG1();
330         INP_HASH_WLOCK(&V_tcbinfo);
331         error = in_pcbbind(inp, nam, td->td_ucred);
332         INP_HASH_WUNLOCK(&V_tcbinfo);
333 out:
334         TCPDEBUG2(PRU_BIND);
335         TCP_PROBE2(debug__user, tp, PRU_BIND);
336         INP_WUNLOCK(inp);
337
338         return (error);
339 }
340 #endif /* INET */
341
342 #ifdef INET6
343 static int
344 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
345 {
346         int error = 0;
347         struct inpcb *inp;
348         struct tcpcb *tp = NULL;
349         struct sockaddr_in6 *sin6;
350         u_char vflagsav;
351
352         sin6 = (struct sockaddr_in6 *)nam;
353         if (nam->sa_len != sizeof (*sin6))
354                 return (EINVAL);
355         /*
356          * Must check for multicast addresses and disallow binding
357          * to them.
358          */
359         if (sin6->sin6_family == AF_INET6 &&
360             IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
361                 return (EAFNOSUPPORT);
362
363         TCPDEBUG0;
364         inp = sotoinpcb(so);
365         KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
366         INP_WLOCK(inp);
367         vflagsav = inp->inp_vflag;
368         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
369                 error = EINVAL;
370                 goto out;
371         }
372         tp = intotcpcb(inp);
373         TCPDEBUG1();
374         INP_HASH_WLOCK(&V_tcbinfo);
375         inp->inp_vflag &= ~INP_IPV4;
376         inp->inp_vflag |= INP_IPV6;
377 #ifdef INET
378         if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
379                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr))
380                         inp->inp_vflag |= INP_IPV4;
381                 else if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
382                         struct sockaddr_in sin;
383
384                         in6_sin6_2_sin(&sin, sin6);
385                         if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
386                                 error = EAFNOSUPPORT;
387                                 INP_HASH_WUNLOCK(&V_tcbinfo);
388                                 goto out;
389                         }
390                         inp->inp_vflag |= INP_IPV4;
391                         inp->inp_vflag &= ~INP_IPV6;
392                         error = in_pcbbind(inp, (struct sockaddr *)&sin,
393                             td->td_ucred);
394                         INP_HASH_WUNLOCK(&V_tcbinfo);
395                         goto out;
396                 }
397         }
398 #endif
399         error = in6_pcbbind(inp, nam, td->td_ucred);
400         INP_HASH_WUNLOCK(&V_tcbinfo);
401 out:
402         if (error != 0)
403                 inp->inp_vflag = vflagsav;
404         TCPDEBUG2(PRU_BIND);
405         TCP_PROBE2(debug__user, tp, PRU_BIND);
406         INP_WUNLOCK(inp);
407         return (error);
408 }
409 #endif /* INET6 */
410
411 #ifdef INET
412 /*
413  * Prepare to accept connections.
414  */
415 static int
416 tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
417 {
418         int error = 0;
419         struct inpcb *inp;
420         struct tcpcb *tp = NULL;
421
422         TCPDEBUG0;
423         inp = sotoinpcb(so);
424         KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
425         INP_WLOCK(inp);
426         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
427                 error = EINVAL;
428                 goto out;
429         }
430         tp = intotcpcb(inp);
431         TCPDEBUG1();
432         SOCK_LOCK(so);
433         error = solisten_proto_check(so);
434         INP_HASH_WLOCK(&V_tcbinfo);
435         if (error == 0 && inp->inp_lport == 0)
436                 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
437         INP_HASH_WUNLOCK(&V_tcbinfo);
438         if (error == 0) {
439                 tcp_state_change(tp, TCPS_LISTEN);
440                 solisten_proto(so, backlog);
441 #ifdef TCP_OFFLOAD
442                 if ((so->so_options & SO_NO_OFFLOAD) == 0)
443                         tcp_offload_listen_start(tp);
444 #endif
445         }
446         SOCK_UNLOCK(so);
447
448         if (IS_FASTOPEN(tp->t_flags))
449                 tp->t_tfo_pending = tcp_fastopen_alloc_counter();
450
451 out:
452         TCPDEBUG2(PRU_LISTEN);
453         TCP_PROBE2(debug__user, tp, PRU_LISTEN);
454         INP_WUNLOCK(inp);
455         return (error);
456 }
457 #endif /* INET */
458
459 #ifdef INET6
460 static int
461 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
462 {
463         int error = 0;
464         struct inpcb *inp;
465         struct tcpcb *tp = NULL;
466         u_char vflagsav;
467
468         TCPDEBUG0;
469         inp = sotoinpcb(so);
470         KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
471         INP_WLOCK(inp);
472         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
473                 error = EINVAL;
474                 goto out;
475         }
476         vflagsav = inp->inp_vflag;
477         tp = intotcpcb(inp);
478         TCPDEBUG1();
479         SOCK_LOCK(so);
480         error = solisten_proto_check(so);
481         INP_HASH_WLOCK(&V_tcbinfo);
482         if (error == 0 && inp->inp_lport == 0) {
483                 inp->inp_vflag &= ~INP_IPV4;
484                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
485                         inp->inp_vflag |= INP_IPV4;
486                 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
487         }
488         INP_HASH_WUNLOCK(&V_tcbinfo);
489         if (error == 0) {
490                 tcp_state_change(tp, TCPS_LISTEN);
491                 solisten_proto(so, backlog);
492 #ifdef TCP_OFFLOAD
493                 if ((so->so_options & SO_NO_OFFLOAD) == 0)
494                         tcp_offload_listen_start(tp);
495 #endif
496         }
497         SOCK_UNLOCK(so);
498
499         if (IS_FASTOPEN(tp->t_flags))
500                 tp->t_tfo_pending = tcp_fastopen_alloc_counter();
501
502         if (error != 0)
503                 inp->inp_vflag = vflagsav;
504
505 out:
506         TCPDEBUG2(PRU_LISTEN);
507         TCP_PROBE2(debug__user, tp, PRU_LISTEN);
508         INP_WUNLOCK(inp);
509         return (error);
510 }
511 #endif /* INET6 */
512
513 #ifdef INET
514 /*
515  * Initiate connection to peer.
516  * Create a template for use in transmissions on this connection.
517  * Enter SYN_SENT state, and mark socket as connecting.
518  * Start keep-alive timer, and seed output sequence space.
519  * Send initial segment on connection.
520  */
521 static int
522 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
523 {
524         int error = 0;
525         struct inpcb *inp;
526         struct tcpcb *tp = NULL;
527         struct sockaddr_in *sinp;
528
529         sinp = (struct sockaddr_in *)nam;
530         if (nam->sa_len != sizeof (*sinp))
531                 return (EINVAL);
532         /*
533          * Must disallow TCP ``connections'' to multicast addresses.
534          */
535         if (sinp->sin_family == AF_INET
536             && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
537                 return (EAFNOSUPPORT);
538         if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
539                 return (error);
540
541         TCPDEBUG0;
542         inp = sotoinpcb(so);
543         KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
544         INP_WLOCK(inp);
545         if (inp->inp_flags & INP_TIMEWAIT) {
546                 error = EADDRINUSE;
547                 goto out;
548         }
549         if (inp->inp_flags & INP_DROPPED) {
550                 error = ECONNREFUSED;
551                 goto out;
552         }
553         tp = intotcpcb(inp);
554         TCPDEBUG1();
555         if ((error = tcp_connect(tp, nam, td)) != 0)
556                 goto out;
557 #ifdef TCP_OFFLOAD
558         if (registered_toedevs > 0 &&
559             (so->so_options & SO_NO_OFFLOAD) == 0 &&
560             (error = tcp_offload_connect(so, nam)) == 0)
561                 goto out;
562 #endif
563         tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
564         error = tp->t_fb->tfb_tcp_output(tp);
565 out:
566         TCPDEBUG2(PRU_CONNECT);
567         TCP_PROBE2(debug__user, tp, PRU_CONNECT);
568         INP_WUNLOCK(inp);
569         return (error);
570 }
571 #endif /* INET */
572
573 #ifdef INET6
574 static int
575 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
576 {
577         int error = 0;
578         struct inpcb *inp;
579         struct tcpcb *tp = NULL;
580         struct sockaddr_in6 *sin6;
581         u_int8_t incflagsav;
582         u_char vflagsav;
583
584         TCPDEBUG0;
585
586         sin6 = (struct sockaddr_in6 *)nam;
587         if (nam->sa_len != sizeof (*sin6))
588                 return (EINVAL);
589         /*
590          * Must disallow TCP ``connections'' to multicast addresses.
591          */
592         if (sin6->sin6_family == AF_INET6
593             && IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
594                 return (EAFNOSUPPORT);
595
596         inp = sotoinpcb(so);
597         KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
598         INP_WLOCK(inp);
599         vflagsav = inp->inp_vflag;
600         incflagsav = inp->inp_inc.inc_flags;
601         if (inp->inp_flags & INP_TIMEWAIT) {
602                 error = EADDRINUSE;
603                 goto out;
604         }
605         if (inp->inp_flags & INP_DROPPED) {
606                 error = ECONNREFUSED;
607                 goto out;
608         }
609         tp = intotcpcb(inp);
610         TCPDEBUG1();
611 #ifdef INET
612         /*
613          * XXXRW: Some confusion: V4/V6 flags relate to binding, and
614          * therefore probably require the hash lock, which isn't held here.
615          * Is this a significant problem?
616          */
617         if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
618                 struct sockaddr_in sin;
619
620                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
621                         error = EINVAL;
622                         goto out;
623                 }
624                 if ((inp->inp_vflag & INP_IPV4) == 0) {
625                         error = EAFNOSUPPORT;
626                         goto out;
627                 }
628
629                 in6_sin6_2_sin(&sin, sin6);
630                 if (IN_MULTICAST(ntohl(sin.sin_addr.s_addr))) {
631                         error = EAFNOSUPPORT;
632                         goto out;
633                 }
634                 if ((error = prison_remote_ip4(td->td_ucred,
635                     &sin.sin_addr)) != 0)
636                         goto out;
637                 inp->inp_vflag |= INP_IPV4;
638                 inp->inp_vflag &= ~INP_IPV6;
639                 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
640                         goto out;
641 #ifdef TCP_OFFLOAD
642                 if (registered_toedevs > 0 &&
643                     (so->so_options & SO_NO_OFFLOAD) == 0 &&
644                     (error = tcp_offload_connect(so, nam)) == 0)
645                         goto out;
646 #endif
647                 error = tp->t_fb->tfb_tcp_output(tp);
648                 goto out;
649         } else {
650                 if ((inp->inp_vflag & INP_IPV6) == 0) {
651                         error = EAFNOSUPPORT;
652                         goto out;
653                 }
654         }
655 #endif
656         if ((error = prison_remote_ip6(td->td_ucred, &sin6->sin6_addr)) != 0)
657                 goto out;
658         inp->inp_vflag &= ~INP_IPV4;
659         inp->inp_vflag |= INP_IPV6;
660         inp->inp_inc.inc_flags |= INC_ISIPV6;
661         if ((error = tcp6_connect(tp, nam, td)) != 0)
662                 goto out;
663 #ifdef TCP_OFFLOAD
664         if (registered_toedevs > 0 &&
665             (so->so_options & SO_NO_OFFLOAD) == 0 &&
666             (error = tcp_offload_connect(so, nam)) == 0)
667                 goto out;
668 #endif
669         tcp_timer_activate(tp, TT_KEEP, TP_KEEPINIT(tp));
670         error = tp->t_fb->tfb_tcp_output(tp);
671
672 out:
673         /*
674          * If the implicit bind in the connect call fails, restore
675          * the flags we modified.
676          */
677         if (error != 0 && inp->inp_lport == 0) {
678                 inp->inp_vflag = vflagsav;
679                 inp->inp_inc.inc_flags = incflagsav;
680         }
681
682         TCPDEBUG2(PRU_CONNECT);
683         TCP_PROBE2(debug__user, tp, PRU_CONNECT);
684         INP_WUNLOCK(inp);
685         return (error);
686 }
687 #endif /* INET6 */
688
689 /*
690  * Initiate disconnect from peer.
691  * If connection never passed embryonic stage, just drop;
692  * else if don't need to let data drain, then can just drop anyways,
693  * else have to begin TCP shutdown process: mark socket disconnecting,
694  * drain unread data, state switch to reflect user close, and
695  * send segment (e.g. FIN) to peer.  Socket will be really disconnected
696  * when peer sends FIN and acks ours.
697  *
698  * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
699  */
700 static int
701 tcp_usr_disconnect(struct socket *so)
702 {
703         struct inpcb *inp;
704         struct tcpcb *tp = NULL;
705         struct epoch_tracker et;
706         int error = 0;
707
708         TCPDEBUG0;
709         NET_EPOCH_ENTER(et);
710         inp = sotoinpcb(so);
711         KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
712         INP_WLOCK(inp);
713         if (inp->inp_flags & INP_TIMEWAIT)
714                 goto out;
715         if (inp->inp_flags & INP_DROPPED) {
716                 error = ECONNRESET;
717                 goto out;
718         }
719         tp = intotcpcb(inp);
720         TCPDEBUG1();
721         tcp_disconnect(tp);
722 out:
723         TCPDEBUG2(PRU_DISCONNECT);
724         TCP_PROBE2(debug__user, tp, PRU_DISCONNECT);
725         INP_WUNLOCK(inp);
726         NET_EPOCH_EXIT(et);
727         return (error);
728 }
729
730 #ifdef INET
731 /*
732  * Accept a connection.  Essentially all the work is done at higher levels;
733  * just return the address of the peer, storing through addr.
734  */
735 static int
736 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
737 {
738         int error = 0;
739         struct inpcb *inp = NULL;
740         struct tcpcb *tp = NULL;
741         struct in_addr addr;
742         in_port_t port = 0;
743         TCPDEBUG0;
744
745         if (so->so_state & SS_ISDISCONNECTED)
746                 return (ECONNABORTED);
747
748         inp = sotoinpcb(so);
749         KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
750         INP_WLOCK(inp);
751         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
752                 error = ECONNABORTED;
753                 goto out;
754         }
755         tp = intotcpcb(inp);
756         TCPDEBUG1();
757
758         /*
759          * We inline in_getpeeraddr and COMMON_END here, so that we can
760          * copy the data of interest and defer the malloc until after we
761          * release the lock.
762          */
763         port = inp->inp_fport;
764         addr = inp->inp_faddr;
765
766 out:
767         TCPDEBUG2(PRU_ACCEPT);
768         TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
769         INP_WUNLOCK(inp);
770         if (error == 0)
771                 *nam = in_sockaddr(port, &addr);
772         return error;
773 }
774 #endif /* INET */
775
776 #ifdef INET6
777 static int
778 tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
779 {
780         struct inpcb *inp = NULL;
781         int error = 0;
782         struct tcpcb *tp = NULL;
783         struct in_addr addr;
784         struct in6_addr addr6;
785         struct epoch_tracker et;
786         in_port_t port = 0;
787         int v4 = 0;
788         TCPDEBUG0;
789
790         if (so->so_state & SS_ISDISCONNECTED)
791                 return (ECONNABORTED);
792
793         inp = sotoinpcb(so);
794         KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
795         NET_EPOCH_ENTER(et);
796         INP_WLOCK(inp);
797         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
798                 error = ECONNABORTED;
799                 goto out;
800         }
801         tp = intotcpcb(inp);
802         TCPDEBUG1();
803
804         /*
805          * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
806          * copy the data of interest and defer the malloc until after we
807          * release the lock.
808          */
809         if (inp->inp_vflag & INP_IPV4) {
810                 v4 = 1;
811                 port = inp->inp_fport;
812                 addr = inp->inp_faddr;
813         } else {
814                 port = inp->inp_fport;
815                 addr6 = inp->in6p_faddr;
816         }
817
818 out:
819         TCPDEBUG2(PRU_ACCEPT);
820         TCP_PROBE2(debug__user, tp, PRU_ACCEPT);
821         INP_WUNLOCK(inp);
822         NET_EPOCH_EXIT(et);
823         if (error == 0) {
824                 if (v4)
825                         *nam = in6_v4mapsin6_sockaddr(port, &addr);
826                 else
827                         *nam = in6_sockaddr(port, &addr6);
828         }
829         return error;
830 }
831 #endif /* INET6 */
832
833 /*
834  * Mark the connection as being incapable of further output.
835  */
836 static int
837 tcp_usr_shutdown(struct socket *so)
838 {
839         int error = 0;
840         struct inpcb *inp;
841         struct tcpcb *tp = NULL;
842         struct epoch_tracker et;
843
844         TCPDEBUG0;
845         NET_EPOCH_ENTER(et);
846         inp = sotoinpcb(so);
847         KASSERT(inp != NULL, ("inp == NULL"));
848         INP_WLOCK(inp);
849         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
850                 error = ECONNRESET;
851                 goto out;
852         }
853         tp = intotcpcb(inp);
854         TCPDEBUG1();
855         socantsendmore(so);
856         tcp_usrclosed(tp);
857         if (!(inp->inp_flags & INP_DROPPED))
858                 error = tp->t_fb->tfb_tcp_output(tp);
859
860 out:
861         TCPDEBUG2(PRU_SHUTDOWN);
862         TCP_PROBE2(debug__user, tp, PRU_SHUTDOWN);
863         INP_WUNLOCK(inp);
864         NET_EPOCH_EXIT(et);
865
866         return (error);
867 }
868
869 /*
870  * After a receive, possibly send window update to peer.
871  */
872 static int
873 tcp_usr_rcvd(struct socket *so, int flags)
874 {
875         struct inpcb *inp;
876         struct tcpcb *tp = NULL;
877         int error = 0;
878
879         TCPDEBUG0;
880         inp = sotoinpcb(so);
881         KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
882         INP_WLOCK(inp);
883         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
884                 error = ECONNRESET;
885                 goto out;
886         }
887         tp = intotcpcb(inp);
888         TCPDEBUG1();
889         /*
890          * For passively-created TFO connections, don't attempt a window
891          * update while still in SYN_RECEIVED as this may trigger an early
892          * SYN|ACK.  It is preferable to have the SYN|ACK be sent along with
893          * application response data, or failing that, when the DELACK timer
894          * expires.
895          */
896         if (IS_FASTOPEN(tp->t_flags) &&
897             (tp->t_state == TCPS_SYN_RECEIVED))
898                 goto out;
899 #ifdef TCP_OFFLOAD
900         if (tp->t_flags & TF_TOE)
901                 tcp_offload_rcvd(tp);
902         else
903 #endif
904         tp->t_fb->tfb_tcp_output(tp);
905
906 out:
907         TCPDEBUG2(PRU_RCVD);
908         TCP_PROBE2(debug__user, tp, PRU_RCVD);
909         INP_WUNLOCK(inp);
910         return (error);
911 }
912
913 /*
914  * Do a send by putting data in output queue and updating urgent
915  * marker if URG set.  Possibly send more data.  Unlike the other
916  * pru_*() routines, the mbuf chains are our responsibility.  We
917  * must either enqueue them or free them.  The other pru_* routines
918  * generally are caller-frees.
919  */
920 static int
921 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
922     struct sockaddr *nam, struct mbuf *control, struct thread *td)
923 {
924         struct epoch_tracker et;
925         int error = 0;
926         struct inpcb *inp;
927         struct tcpcb *tp = NULL;
928 #ifdef INET
929 #ifdef INET6
930         struct sockaddr_in sin;
931 #endif
932         struct sockaddr_in *sinp;
933 #endif
934 #ifdef INET6
935         int isipv6;
936 #endif
937         u_int8_t incflagsav;
938         u_char vflagsav;
939         bool restoreflags;
940         TCPDEBUG0;
941
942         /*
943          * We require the pcbinfo "read lock" if we will close the socket
944          * as part of this call.
945          */
946         if (flags & PRUS_EOF)
947                 NET_EPOCH_ENTER(et);
948         inp = sotoinpcb(so);
949         KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
950         INP_WLOCK(inp);
951         vflagsav = inp->inp_vflag;
952         incflagsav = inp->inp_inc.inc_flags;
953         restoreflags = false;
954         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
955                 if (control)
956                         m_freem(control);
957                 /*
958                  * In case of PRUS_NOTREADY, tcp_usr_ready() is responsible
959                  * for freeing memory.
960                  */
961                 if (m && (flags & PRUS_NOTREADY) == 0)
962                         m_freem(m);
963                 error = ECONNRESET;
964                 goto out;
965         }
966         tp = intotcpcb(inp);
967         TCPDEBUG1();
968         if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
969                 switch (nam->sa_family) {
970 #ifdef INET
971                 case AF_INET:
972                         sinp = (struct sockaddr_in *)nam;
973                         if (sinp->sin_len != sizeof(struct sockaddr_in)) {
974                                 if (m)
975                                         m_freem(m);
976                                 error = EINVAL;
977                                 goto out;
978                         }
979                         if ((inp->inp_vflag & INP_IPV6) != 0) {
980                                 if (m)
981                                         m_freem(m);
982                                 error = EAFNOSUPPORT;
983                                 goto out;
984                         }
985                         if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
986                                 if (m)
987                                         m_freem(m);
988                                 error = EAFNOSUPPORT;
989                                 goto out;
990                         }
991                         if ((error = prison_remote_ip4(td->td_ucred,
992                             &sinp->sin_addr))) {
993                                 if (m)
994                                         m_freem(m);
995                                 goto out;
996                         }
997 #ifdef INET6
998                         isipv6 = 0;
999 #endif
1000                         break;
1001 #endif /* INET */
1002 #ifdef INET6
1003                 case AF_INET6:
1004                 {
1005                         struct sockaddr_in6 *sin6;
1006
1007                         sin6 = (struct sockaddr_in6 *)nam;
1008                         if (sin6->sin6_len != sizeof(*sin6)) {
1009                                 if (m)
1010                                         m_freem(m);
1011                                 error = EINVAL;
1012                                 goto out;
1013                         }
1014                         if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
1015                                 if (m)
1016                                         m_freem(m);
1017                                 error = EAFNOSUPPORT;
1018                                 goto out;
1019                         }
1020                         if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr)) {
1021 #ifdef INET
1022                                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
1023                                         error = EINVAL;
1024                                         if (m)
1025                                                 m_freem(m);
1026                                         goto out;
1027                                 }
1028                                 if ((inp->inp_vflag & INP_IPV4) == 0) {
1029                                         error = EAFNOSUPPORT;
1030                                         if (m)
1031                                                 m_freem(m);
1032                                         goto out;
1033                                 }
1034                                 restoreflags = true;
1035                                 inp->inp_vflag &= ~INP_IPV6;
1036                                 sinp = &sin;
1037                                 in6_sin6_2_sin(sinp, sin6);
1038                                 if (IN_MULTICAST(
1039                                     ntohl(sinp->sin_addr.s_addr))) {
1040                                         error = EAFNOSUPPORT;
1041                                         if (m)
1042                                                 m_freem(m);
1043                                         goto out;
1044                                 }
1045                                 if ((error = prison_remote_ip4(td->td_ucred,
1046                                     &sinp->sin_addr))) {
1047                                         if (m)
1048                                                 m_freem(m);
1049                                         goto out;
1050                                 }
1051                                 isipv6 = 0;
1052 #else /* !INET */
1053                                 error = EAFNOSUPPORT;
1054                                 if (m)
1055                                         m_freem(m);
1056                                 goto out;
1057 #endif /* INET */
1058                         } else {
1059                                 if ((inp->inp_vflag & INP_IPV6) == 0) {
1060                                         if (m)
1061                                                 m_freem(m);
1062                                         error = EAFNOSUPPORT;
1063                                         goto out;
1064                                 }
1065                                 restoreflags = true;
1066                                 inp->inp_vflag &= ~INP_IPV4;
1067                                 inp->inp_inc.inc_flags |= INC_ISIPV6;
1068                                 if ((error = prison_remote_ip6(td->td_ucred,
1069                                     &sin6->sin6_addr))) {
1070                                         if (m)
1071                                                 m_freem(m);
1072                                         goto out;
1073                                 }
1074                                 isipv6 = 1;
1075                         }
1076                         break;
1077                 }
1078 #endif /* INET6 */
1079                 default:
1080                         if (m)
1081                                 m_freem(m);
1082                         error = EAFNOSUPPORT;
1083                         goto out;
1084                 }
1085         }
1086         if (control) {
1087                 /* TCP doesn't do control messages (rights, creds, etc) */
1088                 if (control->m_len) {
1089                         m_freem(control);
1090                         if (m)
1091                                 m_freem(m);
1092                         error = EINVAL;
1093                         goto out;
1094                 }
1095                 m_freem(control);       /* empty control, just free it */
1096         }
1097         if (!(flags & PRUS_OOB)) {
1098                 sbappendstream(&so->so_snd, m, flags);
1099                 if (nam && tp->t_state < TCPS_SYN_SENT) {
1100                         /*
1101                          * Do implied connect if not yet connected,
1102                          * initialize window to default value, and
1103                          * initialize maxseg using peer's cached MSS.
1104                          */
1105 #ifdef INET6
1106                         if (isipv6)
1107                                 error = tcp6_connect(tp, nam, td);
1108 #endif /* INET6 */
1109 #if defined(INET6) && defined(INET)
1110                         else
1111 #endif
1112 #ifdef INET
1113                                 error = tcp_connect(tp,
1114                                     (struct sockaddr *)sinp, td);
1115 #endif
1116                         /*
1117                          * The bind operation in tcp_connect succeeded. We
1118                          * no longer want to restore the flags if later
1119                          * operations fail.
1120                          */
1121                         if (error == 0 || inp->inp_lport != 0)
1122                                 restoreflags = false;
1123
1124                         if (error)
1125                                 goto out;
1126                         if (IS_FASTOPEN(tp->t_flags))
1127                                 tcp_fastopen_connect(tp);
1128                         else {
1129                                 tp->snd_wnd = TTCP_CLIENT_SND_WND;
1130                                 tcp_mss(tp, -1);
1131                         }
1132                 }
1133                 if (flags & PRUS_EOF) {
1134                         /*
1135                          * Close the send side of the connection after
1136                          * the data is sent.
1137                          */
1138                         NET_EPOCH_ASSERT();
1139                         socantsendmore(so);
1140                         tcp_usrclosed(tp);
1141                 }
1142                 if (!(inp->inp_flags & INP_DROPPED) &&
1143                     !(flags & PRUS_NOTREADY)) {
1144                         if (flags & PRUS_MORETOCOME)
1145                                 tp->t_flags |= TF_MORETOCOME;
1146                         error = tp->t_fb->tfb_tcp_output(tp);
1147                         if (flags & PRUS_MORETOCOME)
1148                                 tp->t_flags &= ~TF_MORETOCOME;
1149                 }
1150         } else {
1151                 /*
1152                  * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
1153                  */
1154                 SOCKBUF_LOCK(&so->so_snd);
1155                 if (sbspace(&so->so_snd) < -512) {
1156                         SOCKBUF_UNLOCK(&so->so_snd);
1157                         m_freem(m);
1158                         error = ENOBUFS;
1159                         goto out;
1160                 }
1161                 /*
1162                  * According to RFC961 (Assigned Protocols),
1163                  * the urgent pointer points to the last octet
1164                  * of urgent data.  We continue, however,
1165                  * to consider it to indicate the first octet
1166                  * of data past the urgent section.
1167                  * Otherwise, snd_up should be one lower.
1168                  */
1169                 sbappendstream_locked(&so->so_snd, m, flags);
1170                 SOCKBUF_UNLOCK(&so->so_snd);
1171                 if (nam && tp->t_state < TCPS_SYN_SENT) {
1172                         /*
1173                          * Do implied connect if not yet connected,
1174                          * initialize window to default value, and
1175                          * initialize maxseg using peer's cached MSS.
1176                          */
1177
1178                         /*
1179                          * Not going to contemplate SYN|URG
1180                          */
1181                         if (IS_FASTOPEN(tp->t_flags))
1182                                 tp->t_flags &= ~TF_FASTOPEN;
1183 #ifdef INET6
1184                         if (isipv6)
1185                                 error = tcp6_connect(tp, nam, td);
1186 #endif /* INET6 */
1187 #if defined(INET6) && defined(INET)
1188                         else
1189 #endif
1190 #ifdef INET
1191                                 error = tcp_connect(tp,
1192                                     (struct sockaddr *)sinp, td);
1193 #endif
1194                         /*
1195                          * The bind operation in tcp_connect succeeded. We
1196                          * no longer want to restore the flags if later
1197                          * operations fail.
1198                          */
1199                         if (error == 0 || inp->inp_lport != 0)
1200                                 restoreflags = false;
1201
1202                         if (error)
1203                                 goto out;
1204                         tp->snd_wnd = TTCP_CLIENT_SND_WND;
1205                         tcp_mss(tp, -1);
1206                 }
1207                 tp->snd_up = tp->snd_una + sbavail(&so->so_snd);
1208                 if (!(flags & PRUS_NOTREADY)) {
1209                         tp->t_flags |= TF_FORCEDATA;
1210                         error = tp->t_fb->tfb_tcp_output(tp);
1211                         tp->t_flags &= ~TF_FORCEDATA;
1212                 }
1213         }
1214         TCP_LOG_EVENT(tp, NULL,
1215             &inp->inp_socket->so_rcv,
1216             &inp->inp_socket->so_snd,
1217             TCP_LOG_USERSEND, error,
1218             0, NULL, false);
1219 out:
1220         /*
1221          * If the request was unsuccessful and we changed flags,
1222          * restore the original flags.
1223          */
1224         if (error != 0 && restoreflags) {
1225                 inp->inp_vflag = vflagsav;
1226                 inp->inp_inc.inc_flags = incflagsav;
1227         }
1228         TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
1229                   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
1230         TCP_PROBE2(debug__user, tp, (flags & PRUS_OOB) ? PRU_SENDOOB :
1231                    ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
1232         INP_WUNLOCK(inp);
1233         if (flags & PRUS_EOF)
1234                 NET_EPOCH_EXIT(et);
1235         return (error);
1236 }
1237
1238 static int
1239 tcp_usr_ready(struct socket *so, struct mbuf *m, int count)
1240 {
1241         struct inpcb *inp;
1242         struct tcpcb *tp;
1243         int error;
1244
1245         inp = sotoinpcb(so);
1246         INP_WLOCK(inp);
1247         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
1248                 INP_WUNLOCK(inp);
1249                 mb_free_notready(m, count);
1250                 return (ECONNRESET);
1251         }
1252         tp = intotcpcb(inp);
1253
1254         SOCKBUF_LOCK(&so->so_snd);
1255         error = sbready(&so->so_snd, m, count);
1256         SOCKBUF_UNLOCK(&so->so_snd);
1257         if (error == 0)
1258                 error = tp->t_fb->tfb_tcp_output(tp);
1259         INP_WUNLOCK(inp);
1260
1261         return (error);
1262 }
1263
1264 /*
1265  * Abort the TCP.  Drop the connection abruptly.
1266  */
1267 static void
1268 tcp_usr_abort(struct socket *so)
1269 {
1270         struct inpcb *inp;
1271         struct tcpcb *tp = NULL;
1272         struct epoch_tracker et;
1273         TCPDEBUG0;
1274
1275         inp = sotoinpcb(so);
1276         KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
1277
1278         NET_EPOCH_ENTER(et);
1279         INP_WLOCK(inp);
1280         KASSERT(inp->inp_socket != NULL,
1281             ("tcp_usr_abort: inp_socket == NULL"));
1282
1283         /*
1284          * If we still have full TCP state, and we're not dropped, drop.
1285          */
1286         if (!(inp->inp_flags & INP_TIMEWAIT) &&
1287             !(inp->inp_flags & INP_DROPPED)) {
1288                 tp = intotcpcb(inp);
1289                 TCPDEBUG1();
1290                 tp = tcp_drop(tp, ECONNABORTED);
1291                 if (tp == NULL)
1292                         goto dropped;
1293                 TCPDEBUG2(PRU_ABORT);
1294                 TCP_PROBE2(debug__user, tp, PRU_ABORT);
1295         }
1296         if (!(inp->inp_flags & INP_DROPPED)) {
1297                 SOCK_LOCK(so);
1298                 so->so_state |= SS_PROTOREF;
1299                 SOCK_UNLOCK(so);
1300                 inp->inp_flags |= INP_SOCKREF;
1301         }
1302         INP_WUNLOCK(inp);
1303 dropped:
1304         NET_EPOCH_EXIT(et);
1305 }
1306
1307 /*
1308  * TCP socket is closed.  Start friendly disconnect.
1309  */
1310 static void
1311 tcp_usr_close(struct socket *so)
1312 {
1313         struct inpcb *inp;
1314         struct tcpcb *tp = NULL;
1315         struct epoch_tracker et;
1316         TCPDEBUG0;
1317
1318         inp = sotoinpcb(so);
1319         KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
1320
1321         NET_EPOCH_ENTER(et);
1322         INP_WLOCK(inp);
1323         KASSERT(inp->inp_socket != NULL,
1324             ("tcp_usr_close: inp_socket == NULL"));
1325
1326         /*
1327          * If we still have full TCP state, and we're not dropped, initiate
1328          * a disconnect.
1329          */
1330         if (!(inp->inp_flags & INP_TIMEWAIT) &&
1331             !(inp->inp_flags & INP_DROPPED)) {
1332                 tp = intotcpcb(inp);
1333                 TCPDEBUG1();
1334                 tcp_disconnect(tp);
1335                 TCPDEBUG2(PRU_CLOSE);
1336                 TCP_PROBE2(debug__user, tp, PRU_CLOSE);
1337         }
1338         if (!(inp->inp_flags & INP_DROPPED)) {
1339                 SOCK_LOCK(so);
1340                 so->so_state |= SS_PROTOREF;
1341                 SOCK_UNLOCK(so);
1342                 inp->inp_flags |= INP_SOCKREF;
1343         }
1344         INP_WUNLOCK(inp);
1345         NET_EPOCH_EXIT(et);
1346 }
1347
1348 /*
1349  * Receive out-of-band data.
1350  */
1351 static int
1352 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
1353 {
1354         int error = 0;
1355         struct inpcb *inp;
1356         struct tcpcb *tp = NULL;
1357
1358         TCPDEBUG0;
1359         inp = sotoinpcb(so);
1360         KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
1361         INP_WLOCK(inp);
1362         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
1363                 error = ECONNRESET;
1364                 goto out;
1365         }
1366         tp = intotcpcb(inp);
1367         TCPDEBUG1();
1368         if ((so->so_oobmark == 0 &&
1369              (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
1370             so->so_options & SO_OOBINLINE ||
1371             tp->t_oobflags & TCPOOB_HADDATA) {
1372                 error = EINVAL;
1373                 goto out;
1374         }
1375         if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
1376                 error = EWOULDBLOCK;
1377                 goto out;
1378         }
1379         m->m_len = 1;
1380         *mtod(m, caddr_t) = tp->t_iobc;
1381         if ((flags & MSG_PEEK) == 0)
1382                 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
1383
1384 out:
1385         TCPDEBUG2(PRU_RCVOOB);
1386         TCP_PROBE2(debug__user, tp, PRU_RCVOOB);
1387         INP_WUNLOCK(inp);
1388         return (error);
1389 }
1390
1391 #ifdef INET
1392 struct pr_usrreqs tcp_usrreqs = {
1393         .pru_abort =            tcp_usr_abort,
1394         .pru_accept =           tcp_usr_accept,
1395         .pru_attach =           tcp_usr_attach,
1396         .pru_bind =             tcp_usr_bind,
1397         .pru_connect =          tcp_usr_connect,
1398         .pru_control =          in_control,
1399         .pru_detach =           tcp_usr_detach,
1400         .pru_disconnect =       tcp_usr_disconnect,
1401         .pru_listen =           tcp_usr_listen,
1402         .pru_peeraddr =         in_getpeeraddr,
1403         .pru_rcvd =             tcp_usr_rcvd,
1404         .pru_rcvoob =           tcp_usr_rcvoob,
1405         .pru_send =             tcp_usr_send,
1406         .pru_ready =            tcp_usr_ready,
1407         .pru_shutdown =         tcp_usr_shutdown,
1408         .pru_sockaddr =         in_getsockaddr,
1409         .pru_sosetlabel =       in_pcbsosetlabel,
1410         .pru_close =            tcp_usr_close,
1411 };
1412 #endif /* INET */
1413
1414 #ifdef INET6
1415 struct pr_usrreqs tcp6_usrreqs = {
1416         .pru_abort =            tcp_usr_abort,
1417         .pru_accept =           tcp6_usr_accept,
1418         .pru_attach =           tcp_usr_attach,
1419         .pru_bind =             tcp6_usr_bind,
1420         .pru_connect =          tcp6_usr_connect,
1421         .pru_control =          in6_control,
1422         .pru_detach =           tcp_usr_detach,
1423         .pru_disconnect =       tcp_usr_disconnect,
1424         .pru_listen =           tcp6_usr_listen,
1425         .pru_peeraddr =         in6_mapped_peeraddr,
1426         .pru_rcvd =             tcp_usr_rcvd,
1427         .pru_rcvoob =           tcp_usr_rcvoob,
1428         .pru_send =             tcp_usr_send,
1429         .pru_ready =            tcp_usr_ready,
1430         .pru_shutdown =         tcp_usr_shutdown,
1431         .pru_sockaddr =         in6_mapped_sockaddr,
1432         .pru_sosetlabel =       in_pcbsosetlabel,
1433         .pru_close =            tcp_usr_close,
1434 };
1435 #endif /* INET6 */
1436
1437 #ifdef INET
1438 /*
1439  * Common subroutine to open a TCP connection to remote host specified
1440  * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
1441  * port number if needed.  Call in_pcbconnect_setup to do the routing and
1442  * to choose a local host address (interface).  If there is an existing
1443  * incarnation of the same connection in TIME-WAIT state and if the remote
1444  * host was sending CC options and if the connection duration was < MSL, then
1445  * truncate the previous TIME-WAIT state and proceed.
1446  * Initialize connection parameters and enter SYN-SENT state.
1447  */
1448 static int
1449 tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
1450 {
1451         struct inpcb *inp = tp->t_inpcb, *oinp;
1452         struct socket *so = inp->inp_socket;
1453         struct in_addr laddr;
1454         u_short lport;
1455         int error;
1456
1457         INP_WLOCK_ASSERT(inp);
1458         INP_HASH_WLOCK(&V_tcbinfo);
1459
1460         if (inp->inp_lport == 0) {
1461                 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
1462                 if (error)
1463                         goto out;
1464         }
1465
1466         /*
1467          * Cannot simply call in_pcbconnect, because there might be an
1468          * earlier incarnation of this same connection still in
1469          * TIME_WAIT state, creating an ADDRINUSE error.
1470          */
1471         laddr = inp->inp_laddr;
1472         lport = inp->inp_lport;
1473         error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
1474             &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
1475         if (error && oinp == NULL)
1476                 goto out;
1477         if (oinp) {
1478                 error = EADDRINUSE;
1479                 goto out;
1480         }
1481         inp->inp_laddr = laddr;
1482         in_pcbrehash(inp);
1483         INP_HASH_WUNLOCK(&V_tcbinfo);
1484
1485         /*
1486          * Compute window scaling to request:
1487          * Scale to fit into sweet spot.  See tcp_syncache.c.
1488          * XXX: This should move to tcp_output().
1489          */
1490         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
1491             (TCP_MAXWIN << tp->request_r_scale) < sb_max)
1492                 tp->request_r_scale++;
1493
1494         soisconnecting(so);
1495         TCPSTAT_INC(tcps_connattempt);
1496         tcp_state_change(tp, TCPS_SYN_SENT);
1497         tp->iss = tcp_new_isn(&inp->inp_inc);
1498         if (tp->t_flags & TF_REQ_TSTMP)
1499                 tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
1500         tcp_sendseqinit(tp);
1501
1502         return 0;
1503
1504 out:
1505         INP_HASH_WUNLOCK(&V_tcbinfo);
1506         return (error);
1507 }
1508 #endif /* INET */
1509
1510 #ifdef INET6
1511 static int
1512 tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
1513 {
1514         struct inpcb *inp = tp->t_inpcb;
1515         int error;
1516
1517         INP_WLOCK_ASSERT(inp);
1518         INP_HASH_WLOCK(&V_tcbinfo);
1519
1520         if (inp->inp_lport == 0) {
1521                 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
1522                 if (error)
1523                         goto out;
1524         }
1525         error = in6_pcbconnect(inp, nam, td->td_ucred);
1526         if (error != 0)
1527                 goto out;
1528         INP_HASH_WUNLOCK(&V_tcbinfo);
1529
1530         /* Compute window scaling to request.  */
1531         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
1532             (TCP_MAXWIN << tp->request_r_scale) < sb_max)
1533                 tp->request_r_scale++;
1534
1535         soisconnecting(inp->inp_socket);
1536         TCPSTAT_INC(tcps_connattempt);
1537         tcp_state_change(tp, TCPS_SYN_SENT);
1538         tp->iss = tcp_new_isn(&inp->inp_inc);
1539         if (tp->t_flags & TF_REQ_TSTMP)
1540                 tp->ts_offset = tcp_new_ts_offset(&inp->inp_inc);
1541         tcp_sendseqinit(tp);
1542
1543         return 0;
1544
1545 out:
1546         INP_HASH_WUNLOCK(&V_tcbinfo);
1547         return error;
1548 }
1549 #endif /* INET6 */
1550
1551 /*
1552  * Export TCP internal state information via a struct tcp_info, based on the
1553  * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
1554  * (TCP state machine, etc).  We export all information using FreeBSD-native
1555  * constants -- for example, the numeric values for tcpi_state will differ
1556  * from Linux.
1557  */
1558 static void
1559 tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
1560 {
1561
1562         INP_WLOCK_ASSERT(tp->t_inpcb);
1563         bzero(ti, sizeof(*ti));
1564
1565         ti->tcpi_state = tp->t_state;
1566         if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
1567                 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
1568         if (tp->t_flags & TF_SACK_PERMIT)
1569                 ti->tcpi_options |= TCPI_OPT_SACK;
1570         if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
1571                 ti->tcpi_options |= TCPI_OPT_WSCALE;
1572                 ti->tcpi_snd_wscale = tp->snd_scale;
1573                 ti->tcpi_rcv_wscale = tp->rcv_scale;
1574         }
1575         if (tp->t_flags & TF_ECN_PERMIT)
1576                 ti->tcpi_options |= TCPI_OPT_ECN;
1577
1578         ti->tcpi_rto = tp->t_rxtcur * tick;
1579         ti->tcpi_last_data_recv = ((uint32_t)ticks - tp->t_rcvtime) * tick;
1580         ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
1581         ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
1582
1583         ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
1584         ti->tcpi_snd_cwnd = tp->snd_cwnd;
1585
1586         /*
1587          * FreeBSD-specific extension fields for tcp_info.
1588          */
1589         ti->tcpi_rcv_space = tp->rcv_wnd;
1590         ti->tcpi_rcv_nxt = tp->rcv_nxt;
1591         ti->tcpi_snd_wnd = tp->snd_wnd;
1592         ti->tcpi_snd_bwnd = 0;          /* Unused, kept for compat. */
1593         ti->tcpi_snd_nxt = tp->snd_nxt;
1594         ti->tcpi_snd_mss = tp->t_maxseg;
1595         ti->tcpi_rcv_mss = tp->t_maxseg;
1596         ti->tcpi_snd_rexmitpack = tp->t_sndrexmitpack;
1597         ti->tcpi_rcv_ooopack = tp->t_rcvoopack;
1598         ti->tcpi_snd_zerowin = tp->t_sndzerowin;
1599 #ifdef TCP_OFFLOAD
1600         if (tp->t_flags & TF_TOE) {
1601                 ti->tcpi_options |= TCPI_OPT_TOE;
1602                 tcp_offload_tcp_info(tp, ti);
1603         }
1604 #endif
1605 }
1606
1607 /*
1608  * tcp_ctloutput() must drop the inpcb lock before performing copyin on
1609  * socket option arguments.  When it re-acquires the lock after the copy, it
1610  * has to revalidate that the connection is still valid for the socket
1611  * option.
1612  */
1613 #define INP_WLOCK_RECHECK_CLEANUP(inp, cleanup) do {                    \
1614         INP_WLOCK(inp);                                                 \
1615         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {            \
1616                 INP_WUNLOCK(inp);                                       \
1617                 cleanup;                                                \
1618                 return (ECONNRESET);                                    \
1619         }                                                               \
1620         tp = intotcpcb(inp);                                            \
1621 } while(0)
1622 #define INP_WLOCK_RECHECK(inp) INP_WLOCK_RECHECK_CLEANUP((inp), /* noop */)
1623
1624 int
1625 tcp_ctloutput(struct socket *so, struct sockopt *sopt)
1626 {
1627         int     error;
1628         struct  inpcb *inp;
1629         struct  tcpcb *tp;
1630         struct tcp_function_block *blk;
1631         struct tcp_function_set fsn;
1632
1633         error = 0;
1634         inp = sotoinpcb(so);
1635         KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
1636         if (sopt->sopt_level != IPPROTO_TCP) {
1637 #ifdef INET6
1638                 if (inp->inp_vflag & INP_IPV6PROTO) {
1639                         error = ip6_ctloutput(so, sopt);
1640                         /*
1641                          * In case of the IPV6_USE_MIN_MTU socket option,
1642                          * the INC_IPV6MINMTU flag to announce a corresponding
1643                          * MSS during the initial handshake.
1644                          * If the TCP connection is not in the front states,
1645                          * just reduce the MSS being used.
1646                          * This avoids the sending of TCP segments which will
1647                          * be fragmented at the IPv6 layer.
1648                          */
1649                         if ((error == 0) &&
1650                             (sopt->sopt_dir == SOPT_SET) &&
1651                             (sopt->sopt_level == IPPROTO_IPV6) &&
1652                             (sopt->sopt_name == IPV6_USE_MIN_MTU)) {
1653                                 INP_WLOCK(inp);
1654                                 if ((inp->inp_flags &
1655                                     (INP_TIMEWAIT | INP_DROPPED))) {
1656                                         INP_WUNLOCK(inp);
1657                                         return (ECONNRESET);
1658                                 }
1659                                 inp->inp_inc.inc_flags |= INC_IPV6MINMTU;
1660                                 tp = intotcpcb(inp);
1661                                 if ((tp->t_state >= TCPS_SYN_SENT) &&
1662                                     (inp->inp_inc.inc_flags & INC_ISIPV6)) {
1663                                         struct ip6_pktopts *opt;
1664
1665                                         opt = inp->in6p_outputopts;
1666                                         if ((opt != NULL) &&
1667                                             (opt->ip6po_minmtu ==
1668                                             IP6PO_MINMTU_ALL)) {
1669                                                 if (tp->t_maxseg > TCP6_MSS) {
1670                                                         tp->t_maxseg = TCP6_MSS;
1671                                                 }
1672                                         }
1673                                 }
1674                                 INP_WUNLOCK(inp);
1675                         }
1676                 }
1677 #endif /* INET6 */
1678 #if defined(INET6) && defined(INET)
1679                 else
1680 #endif
1681 #ifdef INET
1682                 {
1683                         error = ip_ctloutput(so, sopt);
1684                 }
1685 #endif
1686                 return (error);
1687         }
1688         INP_WLOCK(inp);
1689         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
1690                 INP_WUNLOCK(inp);
1691                 return (ECONNRESET);
1692         }
1693         tp = intotcpcb(inp);
1694         /*
1695          * Protect the TCP option TCP_FUNCTION_BLK so
1696          * that a sub-function can *never* overwrite this.
1697          */
1698         if ((sopt->sopt_dir == SOPT_SET) && 
1699             (sopt->sopt_name == TCP_FUNCTION_BLK)) {
1700                 INP_WUNLOCK(inp);
1701                 error = sooptcopyin(sopt, &fsn, sizeof fsn,
1702                     sizeof fsn);
1703                 if (error)
1704                         return (error);
1705                 INP_WLOCK_RECHECK(inp);
1706                 blk = find_and_ref_tcp_functions(&fsn);
1707                 if (blk == NULL) {
1708                         INP_WUNLOCK(inp);
1709                         return (ENOENT);
1710                 }
1711                 if (tp->t_fb == blk) {
1712                         /* You already have this */
1713                         refcount_release(&blk->tfb_refcnt);
1714                         INP_WUNLOCK(inp);
1715                         return (0);
1716                 }
1717                 if (tp->t_state != TCPS_CLOSED) {
1718                         /* 
1719                          * The user has advanced the state
1720                          * past the initial point, we may not
1721                          * be able to switch. 
1722                          */
1723                         if (blk->tfb_tcp_handoff_ok != NULL) {
1724                                 /* 
1725                                  * Does the stack provide a
1726                                  * query mechanism, if so it may
1727                                  * still be possible?
1728                                  */
1729                                 error = (*blk->tfb_tcp_handoff_ok)(tp);
1730                         } else
1731                                 error = EINVAL;
1732                         if (error) {
1733                                 refcount_release(&blk->tfb_refcnt);
1734                                 INP_WUNLOCK(inp);
1735                                 return(error);
1736                         }
1737                 }
1738                 if (blk->tfb_flags & TCP_FUNC_BEING_REMOVED) {
1739                         refcount_release(&blk->tfb_refcnt);
1740                         INP_WUNLOCK(inp);
1741                         return (ENOENT);
1742                 }
1743                 /* 
1744                  * Release the old refcnt, the
1745                  * lookup acquired a ref on the
1746                  * new one already.
1747                  */
1748                 if (tp->t_fb->tfb_tcp_fb_fini) {
1749                         /* 
1750                          * Tell the stack to cleanup with 0 i.e.
1751                          * the tcb is not going away.
1752                          */
1753                         (*tp->t_fb->tfb_tcp_fb_fini)(tp, 0);
1754                 }
1755 #ifdef TCPHPTS 
1756                 /* Assure that we are not on any hpts */
1757                 tcp_hpts_remove(tp->t_inpcb, HPTS_REMOVE_ALL);
1758 #endif
1759                 if (blk->tfb_tcp_fb_init) {
1760                         error = (*blk->tfb_tcp_fb_init)(tp);
1761                         if (error) {
1762                                 refcount_release(&blk->tfb_refcnt);
1763                                 if (tp->t_fb->tfb_tcp_fb_init) {
1764                                         if((*tp->t_fb->tfb_tcp_fb_init)(tp) != 0)  {
1765                                                 /* Fall back failed, drop the connection */
1766                                                 INP_WUNLOCK(inp);
1767                                                 soabort(so);
1768                                                 return(error);
1769                                         }
1770                                 }
1771                                 goto err_out;
1772                         }
1773                 }
1774                 refcount_release(&tp->t_fb->tfb_refcnt);
1775                 tp->t_fb = blk;
1776 #ifdef TCP_OFFLOAD
1777                 if (tp->t_flags & TF_TOE) {
1778                         tcp_offload_ctloutput(tp, sopt->sopt_dir,
1779                              sopt->sopt_name);
1780                 }
1781 #endif
1782 err_out:
1783                 INP_WUNLOCK(inp);
1784                 return (error);
1785         } else if ((sopt->sopt_dir == SOPT_GET) && 
1786             (sopt->sopt_name == TCP_FUNCTION_BLK)) {
1787                 strncpy(fsn.function_set_name, tp->t_fb->tfb_tcp_block_name,
1788                     TCP_FUNCTION_NAME_LEN_MAX);
1789                 fsn.function_set_name[TCP_FUNCTION_NAME_LEN_MAX - 1] = '\0';
1790                 fsn.pcbcnt = tp->t_fb->tfb_refcnt;
1791                 INP_WUNLOCK(inp);
1792                 error = sooptcopyout(sopt, &fsn, sizeof fsn);
1793                 return (error);
1794         }
1795         /* Pass in the INP locked, called must unlock it */
1796         return (tp->t_fb->tfb_tcp_ctloutput(so, sopt, inp, tp));
1797 }
1798
1799 /*
1800  * If this assert becomes untrue, we need to change the size of the buf
1801  * variable in tcp_default_ctloutput().
1802  */
1803 #ifdef CTASSERT
1804 CTASSERT(TCP_CA_NAME_MAX <= TCP_LOG_ID_LEN);
1805 CTASSERT(TCP_LOG_REASON_LEN <= TCP_LOG_ID_LEN);
1806 #endif
1807
1808 int
1809 tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp, struct tcpcb *tp)
1810 {
1811         int     error, opt, optval;
1812         u_int   ui;
1813         struct  tcp_info ti;
1814 #ifdef KERN_TLS
1815         struct tls_enable tls;
1816 #endif
1817         struct cc_algo *algo;
1818         char    *pbuf, buf[TCP_LOG_ID_LEN];
1819         size_t  len;
1820
1821         /*
1822          * For TCP_CCALGOOPT forward the control to CC module, for both
1823          * SOPT_SET and SOPT_GET.
1824          */
1825         switch (sopt->sopt_name) {
1826         case TCP_CCALGOOPT:
1827                 INP_WUNLOCK(inp);
1828                 if (sopt->sopt_valsize > CC_ALGOOPT_LIMIT)
1829                         return (EINVAL);
1830                 pbuf = malloc(sopt->sopt_valsize, M_TEMP, M_WAITOK | M_ZERO);
1831                 error = sooptcopyin(sopt, pbuf, sopt->sopt_valsize,
1832                     sopt->sopt_valsize);
1833                 if (error) {
1834                         free(pbuf, M_TEMP);
1835                         return (error);
1836                 }
1837                 INP_WLOCK_RECHECK_CLEANUP(inp, free(pbuf, M_TEMP));
1838                 if (CC_ALGO(tp)->ctl_output != NULL)
1839                         error = CC_ALGO(tp)->ctl_output(tp->ccv, sopt, pbuf);
1840                 else
1841                         error = ENOENT;
1842                 INP_WUNLOCK(inp);
1843                 if (error == 0 && sopt->sopt_dir == SOPT_GET)
1844                         error = sooptcopyout(sopt, pbuf, sopt->sopt_valsize);
1845                 free(pbuf, M_TEMP);
1846                 return (error);
1847         }
1848
1849         switch (sopt->sopt_dir) {
1850         case SOPT_SET:
1851                 switch (sopt->sopt_name) {
1852 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
1853                 case TCP_MD5SIG:
1854                         if (!TCPMD5_ENABLED()) {
1855                                 INP_WUNLOCK(inp);
1856                                 return (ENOPROTOOPT);
1857                         }
1858                         error = TCPMD5_PCBCTL(inp, sopt);
1859                         if (error)
1860                                 return (error);
1861                         goto unlock_and_done;
1862 #endif /* IPSEC */
1863
1864                 case TCP_NODELAY:
1865                 case TCP_NOOPT:
1866                         INP_WUNLOCK(inp);
1867                         error = sooptcopyin(sopt, &optval, sizeof optval,
1868                             sizeof optval);
1869                         if (error)
1870                                 return (error);
1871
1872                         INP_WLOCK_RECHECK(inp);
1873                         switch (sopt->sopt_name) {
1874                         case TCP_NODELAY:
1875                                 opt = TF_NODELAY;
1876                                 break;
1877                         case TCP_NOOPT:
1878                                 opt = TF_NOOPT;
1879                                 break;
1880                         default:
1881                                 opt = 0; /* dead code to fool gcc */
1882                                 break;
1883                         }
1884
1885                         if (optval)
1886                                 tp->t_flags |= opt;
1887                         else
1888                                 tp->t_flags &= ~opt;
1889 unlock_and_done:
1890 #ifdef TCP_OFFLOAD
1891                         if (tp->t_flags & TF_TOE) {
1892                                 tcp_offload_ctloutput(tp, sopt->sopt_dir,
1893                                     sopt->sopt_name);
1894                         }
1895 #endif
1896                         INP_WUNLOCK(inp);
1897                         break;
1898
1899                 case TCP_NOPUSH:
1900                         INP_WUNLOCK(inp);
1901                         error = sooptcopyin(sopt, &optval, sizeof optval,
1902                             sizeof optval);
1903                         if (error)
1904                                 return (error);
1905
1906                         INP_WLOCK_RECHECK(inp);
1907                         if (optval)
1908                                 tp->t_flags |= TF_NOPUSH;
1909                         else if (tp->t_flags & TF_NOPUSH) {
1910                                 tp->t_flags &= ~TF_NOPUSH;
1911                                 if (TCPS_HAVEESTABLISHED(tp->t_state))
1912                                         error = tp->t_fb->tfb_tcp_output(tp);
1913                         }
1914                         goto unlock_and_done;
1915
1916                 case TCP_MAXSEG:
1917                         INP_WUNLOCK(inp);
1918                         error = sooptcopyin(sopt, &optval, sizeof optval,
1919                             sizeof optval);
1920                         if (error)
1921                                 return (error);
1922
1923                         INP_WLOCK_RECHECK(inp);
1924                         if (optval > 0 && optval <= tp->t_maxseg &&
1925                             optval + 40 >= V_tcp_minmss)
1926                                 tp->t_maxseg = optval;
1927                         else
1928                                 error = EINVAL;
1929                         goto unlock_and_done;
1930
1931                 case TCP_INFO:
1932                         INP_WUNLOCK(inp);
1933                         error = EINVAL;
1934                         break;
1935
1936                 case TCP_CONGESTION:
1937                         INP_WUNLOCK(inp);
1938                         error = sooptcopyin(sopt, buf, TCP_CA_NAME_MAX - 1, 1);
1939                         if (error)
1940                                 break;
1941                         buf[sopt->sopt_valsize] = '\0';
1942                         INP_WLOCK_RECHECK(inp);
1943                         CC_LIST_RLOCK();
1944                         STAILQ_FOREACH(algo, &cc_list, entries)
1945                                 if (strncmp(buf, algo->name,
1946                                     TCP_CA_NAME_MAX) == 0)
1947                                         break;
1948                         CC_LIST_RUNLOCK();
1949                         if (algo == NULL) {
1950                                 INP_WUNLOCK(inp);
1951                                 error = EINVAL;
1952                                 break;
1953                         }
1954                         /*
1955                          * We hold a write lock over the tcb so it's safe to
1956                          * do these things without ordering concerns.
1957                          */
1958                         if (CC_ALGO(tp)->cb_destroy != NULL)
1959                                 CC_ALGO(tp)->cb_destroy(tp->ccv);
1960                         CC_DATA(tp) = NULL;
1961                         CC_ALGO(tp) = algo;
1962                         /*
1963                          * If something goes pear shaped initialising the new
1964                          * algo, fall back to newreno (which does not
1965                          * require initialisation).
1966                          */
1967                         if (algo->cb_init != NULL &&
1968                             algo->cb_init(tp->ccv) != 0) {
1969                                 CC_ALGO(tp) = &newreno_cc_algo;
1970                                 /*
1971                                  * The only reason init should fail is
1972                                  * because of malloc.
1973                                  */
1974                                 error = ENOMEM;
1975                         }
1976                         INP_WUNLOCK(inp);
1977                         break;
1978
1979 #ifdef KERN_TLS
1980                 case TCP_TXTLS_ENABLE:
1981                         INP_WUNLOCK(inp);
1982                         error = sooptcopyin(sopt, &tls, sizeof(tls),
1983                             sizeof(tls));
1984                         if (error)
1985                                 break;
1986                         error = ktls_enable_tx(so, &tls);
1987                         break;
1988                 case TCP_TXTLS_MODE:
1989                         INP_WUNLOCK(inp);
1990                         error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
1991                         if (error)
1992                                 return (error);
1993
1994                         INP_WLOCK_RECHECK(inp);
1995                         error = ktls_set_tx_mode(so, ui);
1996                         INP_WUNLOCK(inp);
1997                         break;
1998 #endif
1999
2000                 case TCP_KEEPIDLE:
2001                 case TCP_KEEPINTVL:
2002                 case TCP_KEEPINIT:
2003                         INP_WUNLOCK(inp);
2004                         error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
2005                         if (error)
2006                                 return (error);
2007
2008                         if (ui > (UINT_MAX / hz)) {
2009                                 error = EINVAL;
2010                                 break;
2011                         }
2012                         ui *= hz;
2013
2014                         INP_WLOCK_RECHECK(inp);
2015                         switch (sopt->sopt_name) {
2016                         case TCP_KEEPIDLE:
2017                                 tp->t_keepidle = ui;
2018                                 /*
2019                                  * XXX: better check current remaining
2020                                  * timeout and "merge" it with new value.
2021                                  */
2022                                 if ((tp->t_state > TCPS_LISTEN) &&
2023                                     (tp->t_state <= TCPS_CLOSING))
2024                                         tcp_timer_activate(tp, TT_KEEP,
2025                                             TP_KEEPIDLE(tp));
2026                                 break;
2027                         case TCP_KEEPINTVL:
2028                                 tp->t_keepintvl = ui;
2029                                 if ((tp->t_state == TCPS_FIN_WAIT_2) &&
2030                                     (TP_MAXIDLE(tp) > 0))
2031                                         tcp_timer_activate(tp, TT_2MSL,
2032                                             TP_MAXIDLE(tp));
2033                                 break;
2034                         case TCP_KEEPINIT:
2035                                 tp->t_keepinit = ui;
2036                                 if (tp->t_state == TCPS_SYN_RECEIVED ||
2037                                     tp->t_state == TCPS_SYN_SENT)
2038                                         tcp_timer_activate(tp, TT_KEEP,
2039                                             TP_KEEPINIT(tp));
2040                                 break;
2041                         }
2042                         goto unlock_and_done;
2043
2044                 case TCP_KEEPCNT:
2045                         INP_WUNLOCK(inp);
2046                         error = sooptcopyin(sopt, &ui, sizeof(ui), sizeof(ui));
2047                         if (error)
2048                                 return (error);
2049
2050                         INP_WLOCK_RECHECK(inp);
2051                         tp->t_keepcnt = ui;
2052                         if ((tp->t_state == TCPS_FIN_WAIT_2) &&
2053                             (TP_MAXIDLE(tp) > 0))
2054                                 tcp_timer_activate(tp, TT_2MSL,
2055                                     TP_MAXIDLE(tp));
2056                         goto unlock_and_done;
2057
2058 #ifdef TCPPCAP
2059                 case TCP_PCAP_OUT:
2060                 case TCP_PCAP_IN:
2061                         INP_WUNLOCK(inp);
2062                         error = sooptcopyin(sopt, &optval, sizeof optval,
2063                             sizeof optval);
2064                         if (error)
2065                                 return (error);
2066
2067                         INP_WLOCK_RECHECK(inp);
2068                         if (optval >= 0)
2069                                 tcp_pcap_set_sock_max(TCP_PCAP_OUT ?
2070                                         &(tp->t_outpkts) : &(tp->t_inpkts),
2071                                         optval);
2072                         else
2073                                 error = EINVAL;
2074                         goto unlock_and_done;
2075 #endif
2076
2077                 case TCP_FASTOPEN: {
2078                         struct tcp_fastopen tfo_optval;
2079
2080                         INP_WUNLOCK(inp);
2081                         if (!V_tcp_fastopen_client_enable &&
2082                             !V_tcp_fastopen_server_enable)
2083                                 return (EPERM);
2084
2085                         error = sooptcopyin(sopt, &tfo_optval,
2086                                     sizeof(tfo_optval), sizeof(int));
2087                         if (error)
2088                                 return (error);
2089
2090                         INP_WLOCK_RECHECK(inp);
2091                         if (tfo_optval.enable) {
2092                                 if (tp->t_state == TCPS_LISTEN) {
2093                                         if (!V_tcp_fastopen_server_enable) {
2094                                                 error = EPERM;
2095                                                 goto unlock_and_done;
2096                                         }
2097
2098                                         tp->t_flags |= TF_FASTOPEN;
2099                                         if (tp->t_tfo_pending == NULL)
2100                                                 tp->t_tfo_pending =
2101                                                     tcp_fastopen_alloc_counter();
2102                                 } else {
2103                                         /*
2104                                          * If a pre-shared key was provided,
2105                                          * stash it in the client cookie
2106                                          * field of the tcpcb for use during
2107                                          * connect.
2108                                          */
2109                                         if (sopt->sopt_valsize ==
2110                                             sizeof(tfo_optval)) {
2111                                                 memcpy(tp->t_tfo_cookie.client,
2112                                                        tfo_optval.psk,
2113                                                        TCP_FASTOPEN_PSK_LEN);
2114                                                 tp->t_tfo_client_cookie_len =
2115                                                     TCP_FASTOPEN_PSK_LEN;
2116                                         }
2117                                         tp->t_flags |= TF_FASTOPEN;
2118                                 }
2119                         } else
2120                                 tp->t_flags &= ~TF_FASTOPEN;
2121                         goto unlock_and_done;
2122                 }
2123
2124 #ifdef TCP_BLACKBOX
2125                 case TCP_LOG:
2126                         INP_WUNLOCK(inp);
2127                         error = sooptcopyin(sopt, &optval, sizeof optval,
2128                             sizeof optval);
2129                         if (error)
2130                                 return (error);
2131
2132                         INP_WLOCK_RECHECK(inp);
2133                         error = tcp_log_state_change(tp, optval);
2134                         goto unlock_and_done;
2135
2136                 case TCP_LOGBUF:
2137                         INP_WUNLOCK(inp);
2138                         error = EINVAL;
2139                         break;
2140
2141                 case TCP_LOGID:
2142                         INP_WUNLOCK(inp);
2143                         error = sooptcopyin(sopt, buf, TCP_LOG_ID_LEN - 1, 0);
2144                         if (error)
2145                                 break;
2146                         buf[sopt->sopt_valsize] = '\0';
2147                         INP_WLOCK_RECHECK(inp);
2148                         error = tcp_log_set_id(tp, buf);
2149                         /* tcp_log_set_id() unlocks the INP. */
2150                         break;
2151
2152                 case TCP_LOGDUMP:
2153                 case TCP_LOGDUMPID:
2154                         INP_WUNLOCK(inp);
2155                         error =
2156                             sooptcopyin(sopt, buf, TCP_LOG_REASON_LEN - 1, 0);
2157                         if (error)
2158                                 break;
2159                         buf[sopt->sopt_valsize] = '\0';
2160                         INP_WLOCK_RECHECK(inp);
2161                         if (sopt->sopt_name == TCP_LOGDUMP) {
2162                                 error = tcp_log_dump_tp_logbuf(tp, buf,
2163                                     M_WAITOK, true);
2164                                 INP_WUNLOCK(inp);
2165                         } else {
2166                                 tcp_log_dump_tp_bucket_logbufs(tp, buf);
2167                                 /*
2168                                  * tcp_log_dump_tp_bucket_logbufs() drops the
2169                                  * INP lock.
2170                                  */
2171                         }
2172                         break;
2173 #endif
2174
2175                 default:
2176                         INP_WUNLOCK(inp);
2177                         error = ENOPROTOOPT;
2178                         break;
2179                 }
2180                 break;
2181
2182         case SOPT_GET:
2183                 tp = intotcpcb(inp);
2184                 switch (sopt->sopt_name) {
2185 #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE)
2186                 case TCP_MD5SIG:
2187                         if (!TCPMD5_ENABLED()) {
2188                                 INP_WUNLOCK(inp);
2189                                 return (ENOPROTOOPT);
2190                         }
2191                         error = TCPMD5_PCBCTL(inp, sopt);
2192                         break;
2193 #endif
2194
2195                 case TCP_NODELAY:
2196                         optval = tp->t_flags & TF_NODELAY;
2197                         INP_WUNLOCK(inp);
2198                         error = sooptcopyout(sopt, &optval, sizeof optval);
2199                         break;
2200                 case TCP_MAXSEG:
2201                         optval = tp->t_maxseg;
2202                         INP_WUNLOCK(inp);
2203                         error = sooptcopyout(sopt, &optval, sizeof optval);
2204                         break;
2205                 case TCP_NOOPT:
2206                         optval = tp->t_flags & TF_NOOPT;
2207                         INP_WUNLOCK(inp);
2208                         error = sooptcopyout(sopt, &optval, sizeof optval);
2209                         break;
2210                 case TCP_NOPUSH:
2211                         optval = tp->t_flags & TF_NOPUSH;
2212                         INP_WUNLOCK(inp);
2213                         error = sooptcopyout(sopt, &optval, sizeof optval);
2214                         break;
2215                 case TCP_INFO:
2216                         tcp_fill_info(tp, &ti);
2217                         INP_WUNLOCK(inp);
2218                         error = sooptcopyout(sopt, &ti, sizeof ti);
2219                         break;
2220                 case TCP_CONGESTION:
2221                         len = strlcpy(buf, CC_ALGO(tp)->name, TCP_CA_NAME_MAX);
2222                         INP_WUNLOCK(inp);
2223                         error = sooptcopyout(sopt, buf, len + 1);
2224                         break;
2225                 case TCP_KEEPIDLE:
2226                 case TCP_KEEPINTVL:
2227                 case TCP_KEEPINIT:
2228                 case TCP_KEEPCNT:
2229                         switch (sopt->sopt_name) {
2230                         case TCP_KEEPIDLE:
2231                                 ui = TP_KEEPIDLE(tp) / hz;
2232                                 break;
2233                         case TCP_KEEPINTVL:
2234                                 ui = TP_KEEPINTVL(tp) / hz;
2235                                 break;
2236                         case TCP_KEEPINIT:
2237                                 ui = TP_KEEPINIT(tp) / hz;
2238                                 break;
2239                         case TCP_KEEPCNT:
2240                                 ui = TP_KEEPCNT(tp);
2241                                 break;
2242                         }
2243                         INP_WUNLOCK(inp);
2244                         error = sooptcopyout(sopt, &ui, sizeof(ui));
2245                         break;
2246 #ifdef TCPPCAP
2247                 case TCP_PCAP_OUT:
2248                 case TCP_PCAP_IN:
2249                         optval = tcp_pcap_get_sock_max(TCP_PCAP_OUT ?
2250                                         &(tp->t_outpkts) : &(tp->t_inpkts));
2251                         INP_WUNLOCK(inp);
2252                         error = sooptcopyout(sopt, &optval, sizeof optval);
2253                         break;
2254 #endif
2255                 case TCP_FASTOPEN:
2256                         optval = tp->t_flags & TF_FASTOPEN;
2257                         INP_WUNLOCK(inp);
2258                         error = sooptcopyout(sopt, &optval, sizeof optval);
2259                         break;
2260 #ifdef TCP_BLACKBOX
2261                 case TCP_LOG:
2262                         optval = tp->t_logstate;
2263                         INP_WUNLOCK(inp);
2264                         error = sooptcopyout(sopt, &optval, sizeof(optval));
2265                         break;
2266                 case TCP_LOGBUF:
2267                         /* tcp_log_getlogbuf() does INP_WUNLOCK(inp) */
2268                         error = tcp_log_getlogbuf(sopt, tp);
2269                         break;
2270                 case TCP_LOGID:
2271                         len = tcp_log_get_id(tp, buf);
2272                         INP_WUNLOCK(inp);
2273                         error = sooptcopyout(sopt, buf, len + 1);
2274                         break;
2275                 case TCP_LOGDUMP:
2276                 case TCP_LOGDUMPID:
2277                         INP_WUNLOCK(inp);
2278                         error = EINVAL;
2279                         break;
2280 #endif
2281 #ifdef KERN_TLS
2282                 case TCP_TXTLS_MODE:
2283                         optval = ktls_get_tx_mode(so);
2284                         INP_WUNLOCK(inp);
2285                         error = sooptcopyout(sopt, &optval, sizeof(optval));
2286                         break;
2287 #endif
2288                 default:
2289                         INP_WUNLOCK(inp);
2290                         error = ENOPROTOOPT;
2291                         break;
2292                 }
2293                 break;
2294         }
2295         return (error);
2296 }
2297 #undef INP_WLOCK_RECHECK
2298 #undef INP_WLOCK_RECHECK_CLEANUP
2299
2300 /*
2301  * Attach TCP protocol to socket, allocating
2302  * internet protocol control block, tcp control block,
2303  * bufer space, and entering LISTEN state if to accept connections.
2304  */
2305 static int
2306 tcp_attach(struct socket *so)
2307 {
2308         struct tcpcb *tp;
2309         struct inpcb *inp;
2310         struct epoch_tracker et;
2311         int error;
2312
2313         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
2314                 error = soreserve(so, V_tcp_sendspace, V_tcp_recvspace);
2315                 if (error)
2316                         return (error);
2317         }
2318         so->so_rcv.sb_flags |= SB_AUTOSIZE;
2319         so->so_snd.sb_flags |= SB_AUTOSIZE;
2320         NET_EPOCH_ENTER(et);
2321         error = in_pcballoc(so, &V_tcbinfo);
2322         if (error) {
2323                 NET_EPOCH_EXIT(et);
2324                 return (error);
2325         }
2326         inp = sotoinpcb(so);
2327 #ifdef INET6
2328         if (inp->inp_vflag & INP_IPV6PROTO) {
2329                 inp->inp_vflag |= INP_IPV6;
2330                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
2331                         inp->inp_vflag |= INP_IPV4;
2332                 inp->in6p_hops = -1;    /* use kernel default */
2333         }
2334         else
2335 #endif
2336         inp->inp_vflag |= INP_IPV4;
2337         tp = tcp_newtcpcb(inp);
2338         if (tp == NULL) {
2339                 in_pcbdetach(inp);
2340                 in_pcbfree(inp);
2341                 NET_EPOCH_EXIT(et);
2342                 return (ENOBUFS);
2343         }
2344         tp->t_state = TCPS_CLOSED;
2345         INP_WUNLOCK(inp);
2346         NET_EPOCH_EXIT(et);
2347         TCPSTATES_INC(TCPS_CLOSED);
2348         return (0);
2349 }
2350
2351 /*
2352  * Initiate (or continue) disconnect.
2353  * If embryonic state, just send reset (once).
2354  * If in ``let data drain'' option and linger null, just drop.
2355  * Otherwise (hard), mark socket disconnecting and drop
2356  * current input data; switch states based on user close, and
2357  * send segment to peer (with FIN).
2358  */
2359 static void
2360 tcp_disconnect(struct tcpcb *tp)
2361 {
2362         struct inpcb *inp = tp->t_inpcb;
2363         struct socket *so = inp->inp_socket;
2364
2365         NET_EPOCH_ASSERT();
2366         INP_WLOCK_ASSERT(inp);
2367
2368         /*
2369          * Neither tcp_close() nor tcp_drop() should return NULL, as the
2370          * socket is still open.
2371          */
2372         if (tp->t_state < TCPS_ESTABLISHED &&
2373             !(tp->t_state > TCPS_LISTEN && IS_FASTOPEN(tp->t_flags))) {
2374                 tp = tcp_close(tp);
2375                 KASSERT(tp != NULL,
2376                     ("tcp_disconnect: tcp_close() returned NULL"));
2377         } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
2378                 tp = tcp_drop(tp, 0);
2379                 KASSERT(tp != NULL,
2380                     ("tcp_disconnect: tcp_drop() returned NULL"));
2381         } else {
2382                 soisdisconnecting(so);
2383                 sbflush(&so->so_rcv);
2384                 tcp_usrclosed(tp);
2385                 if (!(inp->inp_flags & INP_DROPPED))
2386                         tp->t_fb->tfb_tcp_output(tp);
2387         }
2388 }
2389
2390 /*
2391  * User issued close, and wish to trail through shutdown states:
2392  * if never received SYN, just forget it.  If got a SYN from peer,
2393  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
2394  * If already got a FIN from peer, then almost done; go to LAST_ACK
2395  * state.  In all other cases, have already sent FIN to peer (e.g.
2396  * after PRU_SHUTDOWN), and just have to play tedious game waiting
2397  * for peer to send FIN or not respond to keep-alives, etc.
2398  * We can let the user exit from the close as soon as the FIN is acked.
2399  */
2400 static void
2401 tcp_usrclosed(struct tcpcb *tp)
2402 {
2403
2404         NET_EPOCH_ASSERT();
2405         INP_WLOCK_ASSERT(tp->t_inpcb);
2406
2407         switch (tp->t_state) {
2408         case TCPS_LISTEN:
2409 #ifdef TCP_OFFLOAD
2410                 tcp_offload_listen_stop(tp);
2411 #endif
2412                 tcp_state_change(tp, TCPS_CLOSED);
2413                 /* FALLTHROUGH */
2414         case TCPS_CLOSED:
2415                 tp = tcp_close(tp);
2416                 /*
2417                  * tcp_close() should never return NULL here as the socket is
2418                  * still open.
2419                  */
2420                 KASSERT(tp != NULL,
2421                     ("tcp_usrclosed: tcp_close() returned NULL"));
2422                 break;
2423
2424         case TCPS_SYN_SENT:
2425         case TCPS_SYN_RECEIVED:
2426                 tp->t_flags |= TF_NEEDFIN;
2427                 break;
2428
2429         case TCPS_ESTABLISHED:
2430                 tcp_state_change(tp, TCPS_FIN_WAIT_1);
2431                 break;
2432
2433         case TCPS_CLOSE_WAIT:
2434                 tcp_state_change(tp, TCPS_LAST_ACK);
2435                 break;
2436         }
2437         if (tp->t_state >= TCPS_FIN_WAIT_2) {
2438                 soisdisconnected(tp->t_inpcb->inp_socket);
2439                 /* Prevent the connection hanging in FIN_WAIT_2 forever. */
2440                 if (tp->t_state == TCPS_FIN_WAIT_2) {
2441                         int timeout;
2442
2443                         timeout = (tcp_fast_finwait2_recycle) ? 
2444                             tcp_finwait2_timeout : TP_MAXIDLE(tp);
2445                         tcp_timer_activate(tp, TT_2MSL, timeout);
2446                 }
2447         }
2448 }
2449
2450 #ifdef DDB
2451 static void
2452 db_print_indent(int indent)
2453 {
2454         int i;
2455
2456         for (i = 0; i < indent; i++)
2457                 db_printf(" ");
2458 }
2459
2460 static void
2461 db_print_tstate(int t_state)
2462 {
2463
2464         switch (t_state) {
2465         case TCPS_CLOSED:
2466                 db_printf("TCPS_CLOSED");
2467                 return;
2468
2469         case TCPS_LISTEN:
2470                 db_printf("TCPS_LISTEN");
2471                 return;
2472
2473         case TCPS_SYN_SENT:
2474                 db_printf("TCPS_SYN_SENT");
2475                 return;
2476
2477         case TCPS_SYN_RECEIVED:
2478                 db_printf("TCPS_SYN_RECEIVED");
2479                 return;
2480
2481         case TCPS_ESTABLISHED:
2482                 db_printf("TCPS_ESTABLISHED");
2483                 return;
2484
2485         case TCPS_CLOSE_WAIT:
2486                 db_printf("TCPS_CLOSE_WAIT");
2487                 return;
2488
2489         case TCPS_FIN_WAIT_1:
2490                 db_printf("TCPS_FIN_WAIT_1");
2491                 return;
2492
2493         case TCPS_CLOSING:
2494                 db_printf("TCPS_CLOSING");
2495                 return;
2496
2497         case TCPS_LAST_ACK:
2498                 db_printf("TCPS_LAST_ACK");
2499                 return;
2500
2501         case TCPS_FIN_WAIT_2:
2502                 db_printf("TCPS_FIN_WAIT_2");
2503                 return;
2504
2505         case TCPS_TIME_WAIT:
2506                 db_printf("TCPS_TIME_WAIT");
2507                 return;
2508
2509         default:
2510                 db_printf("unknown");
2511                 return;
2512         }
2513 }
2514
2515 static void
2516 db_print_tflags(u_int t_flags)
2517 {
2518         int comma;
2519
2520         comma = 0;
2521         if (t_flags & TF_ACKNOW) {
2522                 db_printf("%sTF_ACKNOW", comma ? ", " : "");
2523                 comma = 1;
2524         }
2525         if (t_flags & TF_DELACK) {
2526                 db_printf("%sTF_DELACK", comma ? ", " : "");
2527                 comma = 1;
2528         }
2529         if (t_flags & TF_NODELAY) {
2530                 db_printf("%sTF_NODELAY", comma ? ", " : "");
2531                 comma = 1;
2532         }
2533         if (t_flags & TF_NOOPT) {
2534                 db_printf("%sTF_NOOPT", comma ? ", " : "");
2535                 comma = 1;
2536         }
2537         if (t_flags & TF_SENTFIN) {
2538                 db_printf("%sTF_SENTFIN", comma ? ", " : "");
2539                 comma = 1;
2540         }
2541         if (t_flags & TF_REQ_SCALE) {
2542                 db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
2543                 comma = 1;
2544         }
2545         if (t_flags & TF_RCVD_SCALE) {
2546                 db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
2547                 comma = 1;
2548         }
2549         if (t_flags & TF_REQ_TSTMP) {
2550                 db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
2551                 comma = 1;
2552         }
2553         if (t_flags & TF_RCVD_TSTMP) {
2554                 db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
2555                 comma = 1;
2556         }
2557         if (t_flags & TF_SACK_PERMIT) {
2558                 db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
2559                 comma = 1;
2560         }
2561         if (t_flags & TF_NEEDSYN) {
2562                 db_printf("%sTF_NEEDSYN", comma ? ", " : "");
2563                 comma = 1;
2564         }
2565         if (t_flags & TF_NEEDFIN) {
2566                 db_printf("%sTF_NEEDFIN", comma ? ", " : "");
2567                 comma = 1;
2568         }
2569         if (t_flags & TF_NOPUSH) {
2570                 db_printf("%sTF_NOPUSH", comma ? ", " : "");
2571                 comma = 1;
2572         }
2573         if (t_flags & TF_MORETOCOME) {
2574                 db_printf("%sTF_MORETOCOME", comma ? ", " : "");
2575                 comma = 1;
2576         }
2577         if (t_flags & TF_LQ_OVERFLOW) {
2578                 db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : "");
2579                 comma = 1;
2580         }
2581         if (t_flags & TF_LASTIDLE) {
2582                 db_printf("%sTF_LASTIDLE", comma ? ", " : "");
2583                 comma = 1;
2584         }
2585         if (t_flags & TF_RXWIN0SENT) {
2586                 db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
2587                 comma = 1;
2588         }
2589         if (t_flags & TF_FASTRECOVERY) {
2590                 db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
2591                 comma = 1;
2592         }
2593         if (t_flags & TF_CONGRECOVERY) {
2594                 db_printf("%sTF_CONGRECOVERY", comma ? ", " : "");
2595                 comma = 1;
2596         }
2597         if (t_flags & TF_WASFRECOVERY) {
2598                 db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
2599                 comma = 1;
2600         }
2601         if (t_flags & TF_SIGNATURE) {
2602                 db_printf("%sTF_SIGNATURE", comma ? ", " : "");
2603                 comma = 1;
2604         }
2605         if (t_flags & TF_FORCEDATA) {
2606                 db_printf("%sTF_FORCEDATA", comma ? ", " : "");
2607                 comma = 1;
2608         }
2609         if (t_flags & TF_TSO) {
2610                 db_printf("%sTF_TSO", comma ? ", " : "");
2611                 comma = 1;
2612         }
2613         if (t_flags & TF_ECN_PERMIT) {
2614                 db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
2615                 comma = 1;
2616         }
2617         if (t_flags & TF_FASTOPEN) {
2618                 db_printf("%sTF_FASTOPEN", comma ? ", " : "");
2619                 comma = 1;
2620         }
2621 }
2622
2623 static void
2624 db_print_toobflags(char t_oobflags)
2625 {
2626         int comma;
2627
2628         comma = 0;
2629         if (t_oobflags & TCPOOB_HAVEDATA) {
2630                 db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
2631                 comma = 1;
2632         }
2633         if (t_oobflags & TCPOOB_HADDATA) {
2634                 db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
2635                 comma = 1;
2636         }
2637 }
2638
2639 static void
2640 db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
2641 {
2642
2643         db_print_indent(indent);
2644         db_printf("%s at %p\n", name, tp);
2645
2646         indent += 2;
2647
2648         db_print_indent(indent);
2649         db_printf("t_segq first: %p   t_segqlen: %d   t_dupacks: %d\n",
2650            TAILQ_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
2651
2652         db_print_indent(indent);
2653         db_printf("tt_rexmt: %p   tt_persist: %p   tt_keep: %p\n",
2654             &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep);
2655
2656         db_print_indent(indent);
2657         db_printf("tt_2msl: %p   tt_delack: %p   t_inpcb: %p\n", &tp->t_timers->tt_2msl,
2658             &tp->t_timers->tt_delack, tp->t_inpcb);
2659
2660         db_print_indent(indent);
2661         db_printf("t_state: %d (", tp->t_state);
2662         db_print_tstate(tp->t_state);
2663         db_printf(")\n");
2664
2665         db_print_indent(indent);
2666         db_printf("t_flags: 0x%x (", tp->t_flags);
2667         db_print_tflags(tp->t_flags);
2668         db_printf(")\n");
2669
2670         db_print_indent(indent);
2671         db_printf("snd_una: 0x%08x   snd_max: 0x%08x   snd_nxt: x0%08x\n",
2672             tp->snd_una, tp->snd_max, tp->snd_nxt);
2673
2674         db_print_indent(indent);
2675         db_printf("snd_up: 0x%08x   snd_wl1: 0x%08x   snd_wl2: 0x%08x\n",
2676            tp->snd_up, tp->snd_wl1, tp->snd_wl2);
2677
2678         db_print_indent(indent);
2679         db_printf("iss: 0x%08x   irs: 0x%08x   rcv_nxt: 0x%08x\n",
2680             tp->iss, tp->irs, tp->rcv_nxt);
2681
2682         db_print_indent(indent);
2683         db_printf("rcv_adv: 0x%08x   rcv_wnd: %u   rcv_up: 0x%08x\n",
2684             tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
2685
2686         db_print_indent(indent);
2687         db_printf("snd_wnd: %u   snd_cwnd: %u\n",
2688            tp->snd_wnd, tp->snd_cwnd);
2689
2690         db_print_indent(indent);
2691         db_printf("snd_ssthresh: %u   snd_recover: "
2692             "0x%08x\n", tp->snd_ssthresh, tp->snd_recover);
2693
2694         db_print_indent(indent);
2695         db_printf("t_rcvtime: %u   t_startime: %u\n",
2696             tp->t_rcvtime, tp->t_starttime);
2697
2698         db_print_indent(indent);
2699         db_printf("t_rttime: %u   t_rtsq: 0x%08x\n",
2700             tp->t_rtttime, tp->t_rtseq);
2701
2702         db_print_indent(indent);
2703         db_printf("t_rxtcur: %d   t_maxseg: %u   t_srtt: %d\n",
2704             tp->t_rxtcur, tp->t_maxseg, tp->t_srtt);
2705
2706         db_print_indent(indent);
2707         db_printf("t_rttvar: %d   t_rxtshift: %d   t_rttmin: %u   "
2708             "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin,
2709             tp->t_rttbest);
2710
2711         db_print_indent(indent);
2712         db_printf("t_rttupdated: %lu   max_sndwnd: %u   t_softerror: %d\n",
2713             tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
2714
2715         db_print_indent(indent);
2716         db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
2717         db_print_toobflags(tp->t_oobflags);
2718         db_printf(")   t_iobc: 0x%02x\n", tp->t_iobc);
2719
2720         db_print_indent(indent);
2721         db_printf("snd_scale: %u   rcv_scale: %u   request_r_scale: %u\n",
2722             tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
2723
2724         db_print_indent(indent);
2725         db_printf("ts_recent: %u   ts_recent_age: %u\n",
2726             tp->ts_recent, tp->ts_recent_age);
2727
2728         db_print_indent(indent);
2729         db_printf("ts_offset: %u   last_ack_sent: 0x%08x   snd_cwnd_prev: "
2730             "%u\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
2731
2732         db_print_indent(indent);
2733         db_printf("snd_ssthresh_prev: %u   snd_recover_prev: 0x%08x   "
2734             "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
2735             tp->snd_recover_prev, tp->t_badrxtwin);
2736
2737         db_print_indent(indent);
2738         db_printf("snd_numholes: %d  snd_holes first: %p\n",
2739             tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
2740
2741         db_print_indent(indent);
2742         db_printf("snd_fack: 0x%08x   rcv_numsacks: %d   sack_newdata: "
2743             "0x%08x\n", tp->snd_fack, tp->rcv_numsacks, tp->sack_newdata);
2744
2745         /* Skip sackblks, sackhint. */
2746
2747         db_print_indent(indent);
2748         db_printf("t_rttlow: %d   rfbuf_ts: %u   rfbuf_cnt: %d\n",
2749             tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
2750 }
2751
2752 DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
2753 {
2754         struct tcpcb *tp;
2755
2756         if (!have_addr) {
2757                 db_printf("usage: show tcpcb <addr>\n");
2758                 return;
2759         }
2760         tp = (struct tcpcb *)addr;
2761
2762         db_print_tcpcb(tp, "tcpcb", 0);
2763 }
2764 #endif