]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/netinet/tcp_usrreq.c
Make callers to in6_selectsrc() and in6_pcbladdr() pass in memory
[FreeBSD/FreeBSD.git] / sys / netinet / tcp_usrreq.c
1 /*-
2  * Copyright (c) 1982, 1986, 1988, 1993
3  *      The Regents of the University of California.
4  * Copyright (c) 2006-2007 Robert N. M. Watson
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 4. Neither the name of the University nor the names of its contributors
16  *    may be used to endorse or promote products derived from this software
17  *    without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  *      From: @(#)tcp_usrreq.c  8.2 (Berkeley) 1/3/94
32  */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include "opt_ddb.h"
38 #include "opt_inet.h"
39 #include "opt_inet6.h"
40 #include "opt_tcpdebug.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/malloc.h>
45 #include <sys/kernel.h>
46 #include <sys/sysctl.h>
47 #include <sys/mbuf.h>
48 #ifdef INET6
49 #include <sys/domain.h>
50 #endif /* INET6 */
51 #include <sys/socket.h>
52 #include <sys/socketvar.h>
53 #include <sys/protosw.h>
54 #include <sys/proc.h>
55 #include <sys/jail.h>
56 #include <sys/vimage.h>
57
58 #ifdef DDB
59 #include <ddb/ddb.h>
60 #endif
61
62 #include <net/if.h>
63 #include <net/route.h>
64
65 #include <netinet/in.h>
66 #include <netinet/in_systm.h>
67 #ifdef INET6
68 #include <netinet/ip6.h>
69 #endif
70 #include <netinet/in_pcb.h>
71 #ifdef INET6
72 #include <netinet6/in6_pcb.h>
73 #endif
74 #include <netinet/in_var.h>
75 #include <netinet/ip_var.h>
76 #ifdef INET6
77 #include <netinet6/ip6_var.h>
78 #include <netinet6/scope6_var.h>
79 #endif
80 #include <netinet/tcp.h>
81 #include <netinet/tcp_fsm.h>
82 #include <netinet/tcp_seq.h>
83 #include <netinet/tcp_timer.h>
84 #include <netinet/tcp_var.h>
85 #include <netinet/tcpip.h>
86 #ifdef TCPDEBUG
87 #include <netinet/tcp_debug.h>
88 #endif
89 #include <netinet/tcp_offload.h>
90 #include <netinet/vinet.h>
91
92 /*
93  * TCP protocol interface to socket abstraction.
94  */
95 static int      tcp_attach(struct socket *);
96 static int      tcp_connect(struct tcpcb *, struct sockaddr *,
97                     struct thread *td);
98 #ifdef INET6
99 static int      tcp6_connect(struct tcpcb *, struct sockaddr *,
100                     struct thread *td);
101 #endif /* INET6 */
102 static void     tcp_disconnect(struct tcpcb *);
103 static void     tcp_usrclosed(struct tcpcb *);
104 static void     tcp_fill_info(struct tcpcb *, struct tcp_info *);
105
106 #ifdef TCPDEBUG
107 #define TCPDEBUG0       int ostate = 0
108 #define TCPDEBUG1()     ostate = tp ? tp->t_state : 0
109 #define TCPDEBUG2(req)  if (tp && (so->so_options & SO_DEBUG)) \
110                                 tcp_trace(TA_USER, ostate, tp, 0, 0, req)
111 #else
112 #define TCPDEBUG0
113 #define TCPDEBUG1()
114 #define TCPDEBUG2(req)
115 #endif
116
117 /*
118  * TCP attaches to socket via pru_attach(), reserving space,
119  * and an internet control block.
120  */
121 static int
122 tcp_usr_attach(struct socket *so, int proto, struct thread *td)
123 {
124         struct inpcb *inp;
125         struct tcpcb *tp = NULL;
126         int error;
127         TCPDEBUG0;
128
129         inp = sotoinpcb(so);
130         KASSERT(inp == NULL, ("tcp_usr_attach: inp != NULL"));
131         TCPDEBUG1();
132
133         error = tcp_attach(so);
134         if (error)
135                 goto out;
136
137         if ((so->so_options & SO_LINGER) && so->so_linger == 0)
138                 so->so_linger = TCP_LINGERTIME;
139
140         inp = sotoinpcb(so);
141         tp = intotcpcb(inp);
142 out:
143         TCPDEBUG2(PRU_ATTACH);
144         return error;
145 }
146
147 /*
148  * tcp_detach is called when the socket layer loses its final reference
149  * to the socket, be it a file descriptor reference, a reference from TCP,
150  * etc.  At this point, there is only one case in which we will keep around
151  * inpcb state: time wait.
152  *
153  * This function can probably be re-absorbed back into tcp_usr_detach() now
154  * that there is a single detach path.
155  */
156 static void
157 tcp_detach(struct socket *so, struct inpcb *inp)
158 {
159         struct tcpcb *tp;
160 #ifdef INVARIANTS
161         INIT_VNET_INET(so->so_vnet);
162 #endif
163
164         INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
165         INP_WLOCK_ASSERT(inp);
166
167         KASSERT(so->so_pcb == inp, ("tcp_detach: so_pcb != inp"));
168         KASSERT(inp->inp_socket == so, ("tcp_detach: inp_socket != so"));
169
170         tp = intotcpcb(inp);
171
172         if (inp->inp_flags & INP_TIMEWAIT) {
173                 /*
174                  * There are two cases to handle: one in which the time wait
175                  * state is being discarded (INP_DROPPED), and one in which
176                  * this connection will remain in timewait.  In the former,
177                  * it is time to discard all state (except tcptw, which has
178                  * already been discarded by the timewait close code, which
179                  * should be further up the call stack somewhere).  In the
180                  * latter case, we detach from the socket, but leave the pcb
181                  * present until timewait ends.
182                  *
183                  * XXXRW: Would it be cleaner to free the tcptw here?
184                  */
185                 if (inp->inp_flags & INP_DROPPED) {
186                         KASSERT(tp == NULL, ("tcp_detach: INP_TIMEWAIT && "
187                             "INP_DROPPED && tp != NULL"));
188                         in_pcbdetach(inp);
189                         in_pcbfree(inp);
190                 } else {
191                         in_pcbdetach(inp);
192                         INP_WUNLOCK(inp);
193                 }
194         } else {
195                 /*
196                  * If the connection is not in timewait, we consider two
197                  * two conditions: one in which no further processing is
198                  * necessary (dropped || embryonic), and one in which TCP is
199                  * not yet done, but no longer requires the socket, so the
200                  * pcb will persist for the time being.
201                  *
202                  * XXXRW: Does the second case still occur?
203                  */
204                 if (inp->inp_flags & INP_DROPPED ||
205                     tp->t_state < TCPS_SYN_SENT) {
206                         tcp_discardcb(tp);
207                         in_pcbdetach(inp);
208                         in_pcbfree(inp);
209                 } else
210                         in_pcbdetach(inp);
211         }
212 }
213
214 /*
215  * pru_detach() detaches the TCP protocol from the socket.
216  * If the protocol state is non-embryonic, then can't
217  * do this directly: have to initiate a pru_disconnect(),
218  * which may finish later; embryonic TCB's can just
219  * be discarded here.
220  */
221 static void
222 tcp_usr_detach(struct socket *so)
223 {
224         INIT_VNET_INET(so->so_vnet);
225         struct inpcb *inp;
226
227         inp = sotoinpcb(so);
228         KASSERT(inp != NULL, ("tcp_usr_detach: inp == NULL"));
229         INP_INFO_WLOCK(&V_tcbinfo);
230         INP_WLOCK(inp);
231         KASSERT(inp->inp_socket != NULL,
232             ("tcp_usr_detach: inp_socket == NULL"));
233         tcp_detach(so, inp);
234         INP_INFO_WUNLOCK(&V_tcbinfo);
235 }
236
237 /*
238  * Give the socket an address.
239  */
240 static int
241 tcp_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
242 {
243         INIT_VNET_INET(so->so_vnet);
244         int error = 0;
245         struct inpcb *inp;
246         struct tcpcb *tp = NULL;
247         struct sockaddr_in *sinp;
248
249         sinp = (struct sockaddr_in *)nam;
250         if (nam->sa_len != sizeof (*sinp))
251                 return (EINVAL);
252         /*
253          * Must check for multicast addresses and disallow binding
254          * to them.
255          */
256         if (sinp->sin_family == AF_INET &&
257             IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
258                 return (EAFNOSUPPORT);
259
260         TCPDEBUG0;
261         INP_INFO_WLOCK(&V_tcbinfo);
262         inp = sotoinpcb(so);
263         KASSERT(inp != NULL, ("tcp_usr_bind: inp == NULL"));
264         INP_WLOCK(inp);
265         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
266                 error = EINVAL;
267                 goto out;
268         }
269         tp = intotcpcb(inp);
270         TCPDEBUG1();
271         error = in_pcbbind(inp, nam, td->td_ucred);
272 out:
273         TCPDEBUG2(PRU_BIND);
274         INP_WUNLOCK(inp);
275         INP_INFO_WUNLOCK(&V_tcbinfo);
276
277         return (error);
278 }
279
280 #ifdef INET6
281 static int
282 tcp6_usr_bind(struct socket *so, struct sockaddr *nam, struct thread *td)
283 {
284         INIT_VNET_INET(so->so_vnet);
285         int error = 0;
286         struct inpcb *inp;
287         struct tcpcb *tp = NULL;
288         struct sockaddr_in6 *sin6p;
289
290         sin6p = (struct sockaddr_in6 *)nam;
291         if (nam->sa_len != sizeof (*sin6p))
292                 return (EINVAL);
293         /*
294          * Must check for multicast addresses and disallow binding
295          * to them.
296          */
297         if (sin6p->sin6_family == AF_INET6 &&
298             IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
299                 return (EAFNOSUPPORT);
300
301         TCPDEBUG0;
302         INP_INFO_WLOCK(&V_tcbinfo);
303         inp = sotoinpcb(so);
304         KASSERT(inp != NULL, ("tcp6_usr_bind: inp == NULL"));
305         INP_WLOCK(inp);
306         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
307                 error = EINVAL;
308                 goto out;
309         }
310         tp = intotcpcb(inp);
311         TCPDEBUG1();
312         inp->inp_vflag &= ~INP_IPV4;
313         inp->inp_vflag |= INP_IPV6;
314         if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0) {
315                 if (IN6_IS_ADDR_UNSPECIFIED(&sin6p->sin6_addr))
316                         inp->inp_vflag |= INP_IPV4;
317                 else if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
318                         struct sockaddr_in sin;
319
320                         in6_sin6_2_sin(&sin, sin6p);
321                         inp->inp_vflag |= INP_IPV4;
322                         inp->inp_vflag &= ~INP_IPV6;
323                         error = in_pcbbind(inp, (struct sockaddr *)&sin,
324                             td->td_ucred);
325                         goto out;
326                 }
327         }
328         error = in6_pcbbind(inp, nam, td->td_ucred);
329 out:
330         TCPDEBUG2(PRU_BIND);
331         INP_WUNLOCK(inp);
332         INP_INFO_WUNLOCK(&V_tcbinfo);
333         return (error);
334 }
335 #endif /* INET6 */
336
337 /*
338  * Prepare to accept connections.
339  */
340 static int
341 tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
342 {
343         INIT_VNET_INET(so->so_vnet);
344         int error = 0;
345         struct inpcb *inp;
346         struct tcpcb *tp = NULL;
347
348         TCPDEBUG0;
349         INP_INFO_WLOCK(&V_tcbinfo);
350         inp = sotoinpcb(so);
351         KASSERT(inp != NULL, ("tcp_usr_listen: inp == NULL"));
352         INP_WLOCK(inp);
353         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
354                 error = EINVAL;
355                 goto out;
356         }
357         tp = intotcpcb(inp);
358         TCPDEBUG1();
359         SOCK_LOCK(so);
360         error = solisten_proto_check(so);
361         if (error == 0 && inp->inp_lport == 0)
362                 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
363         if (error == 0) {
364                 tp->t_state = TCPS_LISTEN;
365                 solisten_proto(so, backlog);
366                 tcp_offload_listen_open(tp);
367         }
368         SOCK_UNLOCK(so);
369
370 out:
371         TCPDEBUG2(PRU_LISTEN);
372         INP_WUNLOCK(inp);
373         INP_INFO_WUNLOCK(&V_tcbinfo);
374         return (error);
375 }
376
377 #ifdef INET6
378 static int
379 tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
380 {
381         INIT_VNET_INET(so->so_vnet);
382         int error = 0;
383         struct inpcb *inp;
384         struct tcpcb *tp = NULL;
385
386         TCPDEBUG0;
387         INP_INFO_WLOCK(&V_tcbinfo);
388         inp = sotoinpcb(so);
389         KASSERT(inp != NULL, ("tcp6_usr_listen: inp == NULL"));
390         INP_WLOCK(inp);
391         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
392                 error = EINVAL;
393                 goto out;
394         }
395         tp = intotcpcb(inp);
396         TCPDEBUG1();
397         SOCK_LOCK(so);
398         error = solisten_proto_check(so);
399         if (error == 0 && inp->inp_lport == 0) {
400                 inp->inp_vflag &= ~INP_IPV4;
401                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
402                         inp->inp_vflag |= INP_IPV4;
403                 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
404         }
405         if (error == 0) {
406                 tp->t_state = TCPS_LISTEN;
407                 solisten_proto(so, backlog);
408         }
409         SOCK_UNLOCK(so);
410
411 out:
412         TCPDEBUG2(PRU_LISTEN);
413         INP_WUNLOCK(inp);
414         INP_INFO_WUNLOCK(&V_tcbinfo);
415         return (error);
416 }
417 #endif /* INET6 */
418
419 /*
420  * Initiate connection to peer.
421  * Create a template for use in transmissions on this connection.
422  * Enter SYN_SENT state, and mark socket as connecting.
423  * Start keep-alive timer, and seed output sequence space.
424  * Send initial segment on connection.
425  */
426 static int
427 tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
428 {
429         INIT_VNET_INET(so->so_vnet);
430         int error = 0;
431         struct inpcb *inp;
432         struct tcpcb *tp = NULL;
433         struct sockaddr_in *sinp;
434
435         sinp = (struct sockaddr_in *)nam;
436         if (nam->sa_len != sizeof (*sinp))
437                 return (EINVAL);
438         /*
439          * Must disallow TCP ``connections'' to multicast addresses.
440          */
441         if (sinp->sin_family == AF_INET
442             && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
443                 return (EAFNOSUPPORT);
444         if ((error = prison_remote_ip4(td->td_ucred, &sinp->sin_addr)) != 0)
445                 return (error);
446
447         TCPDEBUG0;
448         INP_INFO_WLOCK(&V_tcbinfo);
449         inp = sotoinpcb(so);
450         KASSERT(inp != NULL, ("tcp_usr_connect: inp == NULL"));
451         INP_WLOCK(inp);
452         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
453                 error = EINVAL;
454                 goto out;
455         }
456         tp = intotcpcb(inp);
457         TCPDEBUG1();
458         if ((error = tcp_connect(tp, nam, td)) != 0)
459                 goto out;
460         error = tcp_output_connect(so, nam);
461 out:
462         TCPDEBUG2(PRU_CONNECT);
463         INP_WUNLOCK(inp);
464         INP_INFO_WUNLOCK(&V_tcbinfo);
465         return (error);
466 }
467
468 #ifdef INET6
469 static int
470 tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
471 {
472         INIT_VNET_INET(so->so_vnet);
473         int error = 0;
474         struct inpcb *inp;
475         struct tcpcb *tp = NULL;
476         struct sockaddr_in6 *sin6p;
477
478         TCPDEBUG0;
479
480         sin6p = (struct sockaddr_in6 *)nam;
481         if (nam->sa_len != sizeof (*sin6p))
482                 return (EINVAL);
483         /*
484          * Must disallow TCP ``connections'' to multicast addresses.
485          */
486         if (sin6p->sin6_family == AF_INET6
487             && IN6_IS_ADDR_MULTICAST(&sin6p->sin6_addr))
488                 return (EAFNOSUPPORT);
489
490         INP_INFO_WLOCK(&V_tcbinfo);
491         inp = sotoinpcb(so);
492         KASSERT(inp != NULL, ("tcp6_usr_connect: inp == NULL"));
493         INP_WLOCK(inp);
494         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
495                 error = EINVAL;
496                 goto out;
497         }
498         tp = intotcpcb(inp);
499         TCPDEBUG1();
500         if (IN6_IS_ADDR_V4MAPPED(&sin6p->sin6_addr)) {
501                 struct sockaddr_in sin;
502
503                 if ((inp->inp_flags & IN6P_IPV6_V6ONLY) != 0) {
504                         error = EINVAL;
505                         goto out;
506                 }
507
508                 in6_sin6_2_sin(&sin, sin6p);
509                 inp->inp_vflag |= INP_IPV4;
510                 inp->inp_vflag &= ~INP_IPV6;
511                 if ((error = prison_remote_ip4(td->td_ucred,
512                     &sin.sin_addr)) != 0)
513                         goto out;
514                 if ((error = tcp_connect(tp, (struct sockaddr *)&sin, td)) != 0)
515                         goto out;
516                 error = tcp_output_connect(so, nam);
517                 goto out;
518         }
519         inp->inp_vflag &= ~INP_IPV4;
520         inp->inp_vflag |= INP_IPV6;
521         inp->inp_inc.inc_flags |= INC_ISIPV6;
522         if ((error = prison_remote_ip6(td->td_ucred, &sin6p->sin6_addr)) != 0)
523                 goto out;
524         if ((error = tcp6_connect(tp, nam, td)) != 0)
525                 goto out;
526         error = tcp_output_connect(so, nam);
527
528 out:
529         TCPDEBUG2(PRU_CONNECT);
530         INP_WUNLOCK(inp);
531         INP_INFO_WUNLOCK(&V_tcbinfo);
532         return (error);
533 }
534 #endif /* INET6 */
535
536 /*
537  * Initiate disconnect from peer.
538  * If connection never passed embryonic stage, just drop;
539  * else if don't need to let data drain, then can just drop anyways,
540  * else have to begin TCP shutdown process: mark socket disconnecting,
541  * drain unread data, state switch to reflect user close, and
542  * send segment (e.g. FIN) to peer.  Socket will be really disconnected
543  * when peer sends FIN and acks ours.
544  *
545  * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
546  */
547 static int
548 tcp_usr_disconnect(struct socket *so)
549 {
550         INIT_VNET_INET(so->so_vnet);
551         struct inpcb *inp;
552         struct tcpcb *tp = NULL;
553         int error = 0;
554
555         TCPDEBUG0;
556         INP_INFO_WLOCK(&V_tcbinfo);
557         inp = sotoinpcb(so);
558         KASSERT(inp != NULL, ("tcp_usr_disconnect: inp == NULL"));
559         INP_WLOCK(inp);
560         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
561                 error = ECONNRESET;
562                 goto out;
563         }
564         tp = intotcpcb(inp);
565         TCPDEBUG1();
566         tcp_disconnect(tp);
567 out:
568         TCPDEBUG2(PRU_DISCONNECT);
569         INP_WUNLOCK(inp);
570         INP_INFO_WUNLOCK(&V_tcbinfo);
571         return (error);
572 }
573
574 /*
575  * Accept a connection.  Essentially all the work is
576  * done at higher levels; just return the address
577  * of the peer, storing through addr.
578  */
579 static int
580 tcp_usr_accept(struct socket *so, struct sockaddr **nam)
581 {
582         INIT_VNET_INET(so->so_vnet);
583         int error = 0;
584         struct inpcb *inp = NULL;
585         struct tcpcb *tp = NULL;
586         struct in_addr addr;
587         in_port_t port = 0;
588         TCPDEBUG0;
589
590         if (so->so_state & SS_ISDISCONNECTED)
591                 return (ECONNABORTED);
592
593         inp = sotoinpcb(so);
594         KASSERT(inp != NULL, ("tcp_usr_accept: inp == NULL"));
595         INP_INFO_RLOCK(&V_tcbinfo);
596         INP_WLOCK(inp);
597         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
598                 error = ECONNABORTED;
599                 goto out;
600         }
601         tp = intotcpcb(inp);
602         TCPDEBUG1();
603
604         /*
605          * We inline in_getpeeraddr and COMMON_END here, so that we can
606          * copy the data of interest and defer the malloc until after we
607          * release the lock.
608          */
609         port = inp->inp_fport;
610         addr = inp->inp_faddr;
611
612 out:
613         TCPDEBUG2(PRU_ACCEPT);
614         INP_WUNLOCK(inp);
615         INP_INFO_RUNLOCK(&V_tcbinfo);
616         if (error == 0)
617                 *nam = in_sockaddr(port, &addr);
618         return error;
619 }
620
621 #ifdef INET6
622 static int
623 tcp6_usr_accept(struct socket *so, struct sockaddr **nam)
624 {
625         struct inpcb *inp = NULL;
626         int error = 0;
627         struct tcpcb *tp = NULL;
628         struct in_addr addr;
629         struct in6_addr addr6;
630         in_port_t port = 0;
631         int v4 = 0;
632         TCPDEBUG0;
633
634         if (so->so_state & SS_ISDISCONNECTED)
635                 return (ECONNABORTED);
636
637         inp = sotoinpcb(so);
638         KASSERT(inp != NULL, ("tcp6_usr_accept: inp == NULL"));
639         INP_WLOCK(inp);
640         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
641                 error = ECONNABORTED;
642                 goto out;
643         }
644         tp = intotcpcb(inp);
645         TCPDEBUG1();
646
647         /*
648          * We inline in6_mapped_peeraddr and COMMON_END here, so that we can
649          * copy the data of interest and defer the malloc until after we
650          * release the lock.
651          */
652         if (inp->inp_vflag & INP_IPV4) {
653                 v4 = 1;
654                 port = inp->inp_fport;
655                 addr = inp->inp_faddr;
656         } else {
657                 port = inp->inp_fport;
658                 addr6 = inp->in6p_faddr;
659         }
660
661 out:
662         TCPDEBUG2(PRU_ACCEPT);
663         INP_WUNLOCK(inp);
664         if (error == 0) {
665                 if (v4)
666                         *nam = in6_v4mapsin6_sockaddr(port, &addr);
667                 else
668                         *nam = in6_sockaddr(port, &addr6);
669         }
670         return error;
671 }
672 #endif /* INET6 */
673
674 /*
675  * Mark the connection as being incapable of further output.
676  */
677 static int
678 tcp_usr_shutdown(struct socket *so)
679 {
680         INIT_VNET_INET(so->so_vnet);
681         int error = 0;
682         struct inpcb *inp;
683         struct tcpcb *tp = NULL;
684
685         TCPDEBUG0;
686         INP_INFO_WLOCK(&V_tcbinfo);
687         inp = sotoinpcb(so);
688         KASSERT(inp != NULL, ("inp == NULL"));
689         INP_WLOCK(inp);
690         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
691                 error = ECONNRESET;
692                 goto out;
693         }
694         tp = intotcpcb(inp);
695         TCPDEBUG1();
696         socantsendmore(so);
697         tcp_usrclosed(tp);
698         if (!(inp->inp_flags & INP_DROPPED))
699                 error = tcp_output_disconnect(tp);
700
701 out:
702         TCPDEBUG2(PRU_SHUTDOWN);
703         INP_WUNLOCK(inp);
704         INP_INFO_WUNLOCK(&V_tcbinfo);
705
706         return (error);
707 }
708
709 /*
710  * After a receive, possibly send window update to peer.
711  */
712 static int
713 tcp_usr_rcvd(struct socket *so, int flags)
714 {
715         struct inpcb *inp;
716         struct tcpcb *tp = NULL;
717         int error = 0;
718
719         TCPDEBUG0;
720         inp = sotoinpcb(so);
721         KASSERT(inp != NULL, ("tcp_usr_rcvd: inp == NULL"));
722         INP_WLOCK(inp);
723         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
724                 error = ECONNRESET;
725                 goto out;
726         }
727         tp = intotcpcb(inp);
728         TCPDEBUG1();
729         tcp_output_rcvd(tp);
730
731 out:
732         TCPDEBUG2(PRU_RCVD);
733         INP_WUNLOCK(inp);
734         return (error);
735 }
736
737 /*
738  * Do a send by putting data in output queue and updating urgent
739  * marker if URG set.  Possibly send more data.  Unlike the other
740  * pru_*() routines, the mbuf chains are our responsibility.  We
741  * must either enqueue them or free them.  The other pru_* routines
742  * generally are caller-frees.
743  */
744 static int
745 tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
746     struct sockaddr *nam, struct mbuf *control, struct thread *td)
747 {
748         INIT_VNET_INET(so->so_vnet);
749         int error = 0;
750         struct inpcb *inp;
751         struct tcpcb *tp = NULL;
752         int headlocked = 0;
753 #ifdef INET6
754         int isipv6;
755 #endif
756         TCPDEBUG0;
757
758         /*
759          * We require the pcbinfo lock in two cases:
760          *
761          * (1) An implied connect is taking place, which can result in
762          *     binding IPs and ports and hence modification of the pcb hash
763          *     chains.
764          *
765          * (2) PRUS_EOF is set, resulting in explicit close on the send.
766          */
767         if ((nam != NULL) || (flags & PRUS_EOF)) {
768                 INP_INFO_WLOCK(&V_tcbinfo);
769                 headlocked = 1;
770         }
771         inp = sotoinpcb(so);
772         KASSERT(inp != NULL, ("tcp_usr_send: inp == NULL"));
773         INP_WLOCK(inp);
774         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
775                 if (control)
776                         m_freem(control);
777                 if (m)
778                         m_freem(m);
779                 error = ECONNRESET;
780                 goto out;
781         }
782 #ifdef INET6
783         isipv6 = nam && nam->sa_family == AF_INET6;
784 #endif /* INET6 */
785         tp = intotcpcb(inp);
786         TCPDEBUG1();
787         if (control) {
788                 /* TCP doesn't do control messages (rights, creds, etc) */
789                 if (control->m_len) {
790                         m_freem(control);
791                         if (m)
792                                 m_freem(m);
793                         error = EINVAL;
794                         goto out;
795                 }
796                 m_freem(control);       /* empty control, just free it */
797         }
798         if (!(flags & PRUS_OOB)) {
799                 sbappendstream(&so->so_snd, m);
800                 if (nam && tp->t_state < TCPS_SYN_SENT) {
801                         /*
802                          * Do implied connect if not yet connected,
803                          * initialize window to default value, and
804                          * initialize maxseg/maxopd using peer's cached
805                          * MSS.
806                          */
807                         INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
808 #ifdef INET6
809                         if (isipv6)
810                                 error = tcp6_connect(tp, nam, td);
811                         else
812 #endif /* INET6 */
813                         error = tcp_connect(tp, nam, td);
814                         if (error)
815                                 goto out;
816                         tp->snd_wnd = TTCP_CLIENT_SND_WND;
817                         tcp_mss(tp, -1);
818                 }
819                 if (flags & PRUS_EOF) {
820                         /*
821                          * Close the send side of the connection after
822                          * the data is sent.
823                          */
824                         INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
825                         socantsendmore(so);
826                         tcp_usrclosed(tp);
827                 }
828                 if (headlocked) {
829                         INP_INFO_WUNLOCK(&V_tcbinfo);
830                         headlocked = 0;
831                 }
832                 if (!(inp->inp_flags & INP_DROPPED)) {
833                         if (flags & PRUS_MORETOCOME)
834                                 tp->t_flags |= TF_MORETOCOME;
835                         error = tcp_output_send(tp);
836                         if (flags & PRUS_MORETOCOME)
837                                 tp->t_flags &= ~TF_MORETOCOME;
838                 }
839         } else {
840                 /*
841                  * XXXRW: PRUS_EOF not implemented with PRUS_OOB?
842                  */
843                 SOCKBUF_LOCK(&so->so_snd);
844                 if (sbspace(&so->so_snd) < -512) {
845                         SOCKBUF_UNLOCK(&so->so_snd);
846                         m_freem(m);
847                         error = ENOBUFS;
848                         goto out;
849                 }
850                 /*
851                  * According to RFC961 (Assigned Protocols),
852                  * the urgent pointer points to the last octet
853                  * of urgent data.  We continue, however,
854                  * to consider it to indicate the first octet
855                  * of data past the urgent section.
856                  * Otherwise, snd_up should be one lower.
857                  */
858                 sbappendstream_locked(&so->so_snd, m);
859                 SOCKBUF_UNLOCK(&so->so_snd);
860                 if (nam && tp->t_state < TCPS_SYN_SENT) {
861                         /*
862                          * Do implied connect if not yet connected,
863                          * initialize window to default value, and
864                          * initialize maxseg/maxopd using peer's cached
865                          * MSS.
866                          */
867                         INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
868 #ifdef INET6
869                         if (isipv6)
870                                 error = tcp6_connect(tp, nam, td);
871                         else
872 #endif /* INET6 */
873                         error = tcp_connect(tp, nam, td);
874                         if (error)
875                                 goto out;
876                         tp->snd_wnd = TTCP_CLIENT_SND_WND;
877                         tcp_mss(tp, -1);
878                         INP_INFO_WUNLOCK(&V_tcbinfo);
879                         headlocked = 0;
880                 } else if (nam) {
881                         INP_INFO_WUNLOCK(&V_tcbinfo);
882                         headlocked = 0;
883                 }
884                 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
885                 tp->t_flags |= TF_FORCEDATA;
886                 error = tcp_output_send(tp);
887                 tp->t_flags &= ~TF_FORCEDATA;
888         }
889 out:
890         TCPDEBUG2((flags & PRUS_OOB) ? PRU_SENDOOB :
891                   ((flags & PRUS_EOF) ? PRU_SEND_EOF : PRU_SEND));
892         INP_WUNLOCK(inp);
893         if (headlocked)
894                 INP_INFO_WUNLOCK(&V_tcbinfo);
895         return (error);
896 }
897
898 /*
899  * Abort the TCP.  Drop the connection abruptly.
900  */
901 static void
902 tcp_usr_abort(struct socket *so)
903 {
904         INIT_VNET_INET(so->so_vnet);
905         struct inpcb *inp;
906         struct tcpcb *tp = NULL;
907         TCPDEBUG0;
908
909         inp = sotoinpcb(so);
910         KASSERT(inp != NULL, ("tcp_usr_abort: inp == NULL"));
911
912         INP_INFO_WLOCK(&V_tcbinfo);
913         INP_WLOCK(inp);
914         KASSERT(inp->inp_socket != NULL,
915             ("tcp_usr_abort: inp_socket == NULL"));
916
917         /*
918          * If we still have full TCP state, and we're not dropped, drop.
919          */
920         if (!(inp->inp_flags & INP_TIMEWAIT) &&
921             !(inp->inp_flags & INP_DROPPED)) {
922                 tp = intotcpcb(inp);
923                 TCPDEBUG1();
924                 tcp_drop(tp, ECONNABORTED);
925                 TCPDEBUG2(PRU_ABORT);
926         }
927         if (!(inp->inp_flags & INP_DROPPED)) {
928                 SOCK_LOCK(so);
929                 so->so_state |= SS_PROTOREF;
930                 SOCK_UNLOCK(so);
931                 inp->inp_flags |= INP_SOCKREF;
932         }
933         INP_WUNLOCK(inp);
934         INP_INFO_WUNLOCK(&V_tcbinfo);
935 }
936
937 /*
938  * TCP socket is closed.  Start friendly disconnect.
939  */
940 static void
941 tcp_usr_close(struct socket *so)
942 {
943         INIT_VNET_INET(so->so_vnet);
944         struct inpcb *inp;
945         struct tcpcb *tp = NULL;
946         TCPDEBUG0;
947
948         inp = sotoinpcb(so);
949         KASSERT(inp != NULL, ("tcp_usr_close: inp == NULL"));
950
951         INP_INFO_WLOCK(&V_tcbinfo);
952         INP_WLOCK(inp);
953         KASSERT(inp->inp_socket != NULL,
954             ("tcp_usr_close: inp_socket == NULL"));
955
956         /*
957          * If we still have full TCP state, and we're not dropped, initiate
958          * a disconnect.
959          */
960         if (!(inp->inp_flags & INP_TIMEWAIT) &&
961             !(inp->inp_flags & INP_DROPPED)) {
962                 tp = intotcpcb(inp);
963                 TCPDEBUG1();
964                 tcp_disconnect(tp);
965                 TCPDEBUG2(PRU_CLOSE);
966         }
967         if (!(inp->inp_flags & INP_DROPPED)) {
968                 SOCK_LOCK(so);
969                 so->so_state |= SS_PROTOREF;
970                 SOCK_UNLOCK(so);
971                 inp->inp_flags |= INP_SOCKREF;
972         }
973         INP_WUNLOCK(inp);
974         INP_INFO_WUNLOCK(&V_tcbinfo);
975 }
976
977 /*
978  * Receive out-of-band data.
979  */
980 static int
981 tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int flags)
982 {
983         int error = 0;
984         struct inpcb *inp;
985         struct tcpcb *tp = NULL;
986
987         TCPDEBUG0;
988         inp = sotoinpcb(so);
989         KASSERT(inp != NULL, ("tcp_usr_rcvoob: inp == NULL"));
990         INP_WLOCK(inp);
991         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
992                 error = ECONNRESET;
993                 goto out;
994         }
995         tp = intotcpcb(inp);
996         TCPDEBUG1();
997         if ((so->so_oobmark == 0 &&
998              (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||
999             so->so_options & SO_OOBINLINE ||
1000             tp->t_oobflags & TCPOOB_HADDATA) {
1001                 error = EINVAL;
1002                 goto out;
1003         }
1004         if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
1005                 error = EWOULDBLOCK;
1006                 goto out;
1007         }
1008         m->m_len = 1;
1009         *mtod(m, caddr_t) = tp->t_iobc;
1010         if ((flags & MSG_PEEK) == 0)
1011                 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
1012
1013 out:
1014         TCPDEBUG2(PRU_RCVOOB);
1015         INP_WUNLOCK(inp);
1016         return (error);
1017 }
1018
1019 struct pr_usrreqs tcp_usrreqs = {
1020         .pru_abort =            tcp_usr_abort,
1021         .pru_accept =           tcp_usr_accept,
1022         .pru_attach =           tcp_usr_attach,
1023         .pru_bind =             tcp_usr_bind,
1024         .pru_connect =          tcp_usr_connect,
1025         .pru_control =          in_control,
1026         .pru_detach =           tcp_usr_detach,
1027         .pru_disconnect =       tcp_usr_disconnect,
1028         .pru_listen =           tcp_usr_listen,
1029         .pru_peeraddr =         in_getpeeraddr,
1030         .pru_rcvd =             tcp_usr_rcvd,
1031         .pru_rcvoob =           tcp_usr_rcvoob,
1032         .pru_send =             tcp_usr_send,
1033         .pru_shutdown =         tcp_usr_shutdown,
1034         .pru_sockaddr =         in_getsockaddr,
1035 #if 0
1036         .pru_soreceive =        soreceive_stream,
1037 #endif
1038         .pru_sosetlabel =       in_pcbsosetlabel,
1039         .pru_close =            tcp_usr_close,
1040 };
1041
1042 #ifdef INET6
1043 struct pr_usrreqs tcp6_usrreqs = {
1044         .pru_abort =            tcp_usr_abort,
1045         .pru_accept =           tcp6_usr_accept,
1046         .pru_attach =           tcp_usr_attach,
1047         .pru_bind =             tcp6_usr_bind,
1048         .pru_connect =          tcp6_usr_connect,
1049         .pru_control =          in6_control,
1050         .pru_detach =           tcp_usr_detach,
1051         .pru_disconnect =       tcp_usr_disconnect,
1052         .pru_listen =           tcp6_usr_listen,
1053         .pru_peeraddr =         in6_mapped_peeraddr,
1054         .pru_rcvd =             tcp_usr_rcvd,
1055         .pru_rcvoob =           tcp_usr_rcvoob,
1056         .pru_send =             tcp_usr_send,
1057         .pru_shutdown =         tcp_usr_shutdown,
1058         .pru_sockaddr =         in6_mapped_sockaddr,
1059 #if 0
1060         .pru_soreceive =        soreceive_stream,
1061 #endif
1062         .pru_sosetlabel =       in_pcbsosetlabel,
1063         .pru_close =            tcp_usr_close,
1064 };
1065 #endif /* INET6 */
1066
1067 /*
1068  * Common subroutine to open a TCP connection to remote host specified
1069  * by struct sockaddr_in in mbuf *nam.  Call in_pcbbind to assign a local
1070  * port number if needed.  Call in_pcbconnect_setup to do the routing and
1071  * to choose a local host address (interface).  If there is an existing
1072  * incarnation of the same connection in TIME-WAIT state and if the remote
1073  * host was sending CC options and if the connection duration was < MSL, then
1074  * truncate the previous TIME-WAIT state and proceed.
1075  * Initialize connection parameters and enter SYN-SENT state.
1076  */
1077 static int
1078 tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
1079 {
1080         struct inpcb *inp = tp->t_inpcb, *oinp;
1081         struct socket *so = inp->inp_socket;
1082         INIT_VNET_INET(so->so_vnet);
1083         struct in_addr laddr;
1084         u_short lport;
1085         int error;
1086
1087         INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
1088         INP_WLOCK_ASSERT(inp);
1089
1090         if (inp->inp_lport == 0) {
1091                 error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
1092                 if (error)
1093                         return error;
1094         }
1095
1096         /*
1097          * Cannot simply call in_pcbconnect, because there might be an
1098          * earlier incarnation of this same connection still in
1099          * TIME_WAIT state, creating an ADDRINUSE error.
1100          */
1101         laddr = inp->inp_laddr;
1102         lport = inp->inp_lport;
1103         error = in_pcbconnect_setup(inp, nam, &laddr.s_addr, &lport,
1104             &inp->inp_faddr.s_addr, &inp->inp_fport, &oinp, td->td_ucred);
1105         if (error && oinp == NULL)
1106                 return error;
1107         if (oinp)
1108                 return EADDRINUSE;
1109         inp->inp_laddr = laddr;
1110         in_pcbrehash(inp);
1111
1112         /*
1113          * Compute window scaling to request:
1114          * Scale to fit into sweet spot.  See tcp_syncache.c.
1115          * XXX: This should move to tcp_output().
1116          */
1117         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
1118             (TCP_MAXWIN << tp->request_r_scale) < sb_max)
1119                 tp->request_r_scale++;
1120
1121         soisconnecting(so);
1122         TCPSTAT_INC(tcps_connattempt);
1123         tp->t_state = TCPS_SYN_SENT;
1124         tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
1125         tp->iss = tcp_new_isn(tp);
1126         tp->t_bw_rtseq = tp->iss;
1127         tcp_sendseqinit(tp);
1128
1129         return 0;
1130 }
1131
1132 #ifdef INET6
1133 static int
1134 tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td)
1135 {
1136         struct inpcb *inp = tp->t_inpcb, *oinp;
1137         struct socket *so = inp->inp_socket;
1138         INIT_VNET_INET(so->so_vnet);
1139         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
1140         struct in6_addr addr6;
1141         int error;
1142
1143         INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
1144         INP_WLOCK_ASSERT(inp);
1145
1146         if (inp->inp_lport == 0) {
1147                 error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
1148                 if (error)
1149                         return error;
1150         }
1151
1152         /*
1153          * Cannot simply call in_pcbconnect, because there might be an
1154          * earlier incarnation of this same connection still in
1155          * TIME_WAIT state, creating an ADDRINUSE error.
1156          * in6_pcbladdr() also handles scope zone IDs.
1157          */
1158         error = in6_pcbladdr(inp, nam, &addr6);
1159         if (error)
1160                 return error;
1161         oinp = in6_pcblookup_hash(inp->inp_pcbinfo,
1162                                   &sin6->sin6_addr, sin6->sin6_port,
1163                                   IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)
1164                                   ? &addr6
1165                                   : &inp->in6p_laddr,
1166                                   inp->inp_lport,  0, NULL);
1167         if (oinp)
1168                 return EADDRINUSE;
1169         if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
1170                 inp->in6p_laddr = addr6;
1171         inp->in6p_faddr = sin6->sin6_addr;
1172         inp->inp_fport = sin6->sin6_port;
1173         /* update flowinfo - draft-itojun-ipv6-flowlabel-api-00 */
1174         inp->inp_flow &= ~IPV6_FLOWLABEL_MASK;
1175         if (inp->inp_flags & IN6P_AUTOFLOWLABEL)
1176                 inp->inp_flow |=
1177                     (htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
1178         in_pcbrehash(inp);
1179
1180         /* Compute window scaling to request.  */
1181         while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
1182             (TCP_MAXWIN << tp->request_r_scale) < sb_max)
1183                 tp->request_r_scale++;
1184
1185         soisconnecting(so);
1186         TCPSTAT_INC(tcps_connattempt);
1187         tp->t_state = TCPS_SYN_SENT;
1188         tcp_timer_activate(tp, TT_KEEP, tcp_keepinit);
1189         tp->iss = tcp_new_isn(tp);
1190         tp->t_bw_rtseq = tp->iss;
1191         tcp_sendseqinit(tp);
1192
1193         return 0;
1194 }
1195 #endif /* INET6 */
1196
1197 /*
1198  * Export TCP internal state information via a struct tcp_info, based on the
1199  * Linux 2.6 API.  Not ABI compatible as our constants are mapped differently
1200  * (TCP state machine, etc).  We export all information using FreeBSD-native
1201  * constants -- for example, the numeric values for tcpi_state will differ
1202  * from Linux.
1203  */
1204 static void
1205 tcp_fill_info(struct tcpcb *tp, struct tcp_info *ti)
1206 {
1207
1208         INP_WLOCK_ASSERT(tp->t_inpcb);
1209         bzero(ti, sizeof(*ti));
1210
1211         ti->tcpi_state = tp->t_state;
1212         if ((tp->t_flags & TF_REQ_TSTMP) && (tp->t_flags & TF_RCVD_TSTMP))
1213                 ti->tcpi_options |= TCPI_OPT_TIMESTAMPS;
1214         if (tp->t_flags & TF_SACK_PERMIT)
1215                 ti->tcpi_options |= TCPI_OPT_SACK;
1216         if ((tp->t_flags & TF_REQ_SCALE) && (tp->t_flags & TF_RCVD_SCALE)) {
1217                 ti->tcpi_options |= TCPI_OPT_WSCALE;
1218                 ti->tcpi_snd_wscale = tp->snd_scale;
1219                 ti->tcpi_rcv_wscale = tp->rcv_scale;
1220         }
1221
1222         ti->tcpi_rtt = ((u_int64_t)tp->t_srtt * tick) >> TCP_RTT_SHIFT;
1223         ti->tcpi_rttvar = ((u_int64_t)tp->t_rttvar * tick) >> TCP_RTTVAR_SHIFT;
1224
1225         ti->tcpi_snd_ssthresh = tp->snd_ssthresh;
1226         ti->tcpi_snd_cwnd = tp->snd_cwnd;
1227
1228         /*
1229          * FreeBSD-specific extension fields for tcp_info.
1230          */
1231         ti->tcpi_rcv_space = tp->rcv_wnd;
1232         ti->tcpi_rcv_nxt = tp->rcv_nxt;
1233         ti->tcpi_snd_wnd = tp->snd_wnd;
1234         ti->tcpi_snd_bwnd = tp->snd_bwnd;
1235         ti->tcpi_snd_nxt = tp->snd_nxt;
1236         ti->__tcpi_snd_mss = tp->t_maxseg;
1237         ti->__tcpi_rcv_mss = tp->t_maxseg;
1238         if (tp->t_flags & TF_TOE)
1239                 ti->tcpi_options |= TCPI_OPT_TOE;
1240 }
1241
1242 /*
1243  * tcp_ctloutput() must drop the inpcb lock before performing copyin on
1244  * socket option arguments.  When it re-acquires the lock after the copy, it
1245  * has to revalidate that the connection is still valid for the socket
1246  * option.
1247  */
1248 #define INP_WLOCK_RECHECK(inp) do {                                     \
1249         INP_WLOCK(inp);                                                 \
1250         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {            \
1251                 INP_WUNLOCK(inp);                                       \
1252                 return (ECONNRESET);                                    \
1253         }                                                               \
1254         tp = intotcpcb(inp);                                            \
1255 } while(0)
1256
1257 int
1258 tcp_ctloutput(struct socket *so, struct sockopt *sopt)
1259 {
1260         INIT_VNET_INET(so->so_vnet);
1261         int     error, opt, optval;
1262         struct  inpcb *inp;
1263         struct  tcpcb *tp;
1264         struct  tcp_info ti;
1265
1266         error = 0;
1267         inp = sotoinpcb(so);
1268         KASSERT(inp != NULL, ("tcp_ctloutput: inp == NULL"));
1269         INP_WLOCK(inp);
1270         if (sopt->sopt_level != IPPROTO_TCP) {
1271 #ifdef INET6
1272                 if (inp->inp_vflag & INP_IPV6PROTO) {
1273                         INP_WUNLOCK(inp);
1274                         error = ip6_ctloutput(so, sopt);
1275                 } else {
1276 #endif /* INET6 */
1277                         INP_WUNLOCK(inp);
1278                         error = ip_ctloutput(so, sopt);
1279 #ifdef INET6
1280                 }
1281 #endif
1282                 return (error);
1283         }
1284         if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
1285                 INP_WUNLOCK(inp);
1286                 return (ECONNRESET);
1287         }
1288
1289         switch (sopt->sopt_dir) {
1290         case SOPT_SET:
1291                 switch (sopt->sopt_name) {
1292 #ifdef TCP_SIGNATURE
1293                 case TCP_MD5SIG:
1294                         INP_WUNLOCK(inp);
1295                         error = sooptcopyin(sopt, &optval, sizeof optval,
1296                             sizeof optval);
1297                         if (error)
1298                                 return (error);
1299
1300                         INP_WLOCK_RECHECK(inp);
1301                         if (optval > 0)
1302                                 tp->t_flags |= TF_SIGNATURE;
1303                         else
1304                                 tp->t_flags &= ~TF_SIGNATURE;
1305                         INP_WUNLOCK(inp);
1306                         break;
1307 #endif /* TCP_SIGNATURE */
1308                 case TCP_NODELAY:
1309                 case TCP_NOOPT:
1310                         INP_WUNLOCK(inp);
1311                         error = sooptcopyin(sopt, &optval, sizeof optval,
1312                             sizeof optval);
1313                         if (error)
1314                                 return (error);
1315
1316                         INP_WLOCK_RECHECK(inp);
1317                         switch (sopt->sopt_name) {
1318                         case TCP_NODELAY:
1319                                 opt = TF_NODELAY;
1320                                 break;
1321                         case TCP_NOOPT:
1322                                 opt = TF_NOOPT;
1323                                 break;
1324                         default:
1325                                 opt = 0; /* dead code to fool gcc */
1326                                 break;
1327                         }
1328
1329                         if (optval)
1330                                 tp->t_flags |= opt;
1331                         else
1332                                 tp->t_flags &= ~opt;
1333                         INP_WUNLOCK(inp);
1334                         break;
1335
1336                 case TCP_NOPUSH:
1337                         INP_WUNLOCK(inp);
1338                         error = sooptcopyin(sopt, &optval, sizeof optval,
1339                             sizeof optval);
1340                         if (error)
1341                                 return (error);
1342
1343                         INP_WLOCK_RECHECK(inp);
1344                         if (optval)
1345                                 tp->t_flags |= TF_NOPUSH;
1346                         else {
1347                                 tp->t_flags &= ~TF_NOPUSH;
1348                                 error = tcp_output(tp);
1349                         }
1350                         INP_WUNLOCK(inp);
1351                         break;
1352
1353                 case TCP_MAXSEG:
1354                         INP_WUNLOCK(inp);
1355                         error = sooptcopyin(sopt, &optval, sizeof optval,
1356                             sizeof optval);
1357                         if (error)
1358                                 return (error);
1359
1360                         INP_WLOCK_RECHECK(inp);
1361                         if (optval > 0 && optval <= tp->t_maxseg &&
1362                             optval + 40 >= V_tcp_minmss)
1363                                 tp->t_maxseg = optval;
1364                         else
1365                                 error = EINVAL;
1366                         INP_WUNLOCK(inp);
1367                         break;
1368
1369                 case TCP_INFO:
1370                         INP_WUNLOCK(inp);
1371                         error = EINVAL;
1372                         break;
1373
1374                 default:
1375                         INP_WUNLOCK(inp);
1376                         error = ENOPROTOOPT;
1377                         break;
1378                 }
1379                 break;
1380
1381         case SOPT_GET:
1382                 tp = intotcpcb(inp);
1383                 switch (sopt->sopt_name) {
1384 #ifdef TCP_SIGNATURE
1385                 case TCP_MD5SIG:
1386                         optval = (tp->t_flags & TF_SIGNATURE) ? 1 : 0;
1387                         INP_WUNLOCK(inp);
1388                         error = sooptcopyout(sopt, &optval, sizeof optval);
1389                         break;
1390 #endif
1391
1392                 case TCP_NODELAY:
1393                         optval = tp->t_flags & TF_NODELAY;
1394                         INP_WUNLOCK(inp);
1395                         error = sooptcopyout(sopt, &optval, sizeof optval);
1396                         break;
1397                 case TCP_MAXSEG:
1398                         optval = tp->t_maxseg;
1399                         INP_WUNLOCK(inp);
1400                         error = sooptcopyout(sopt, &optval, sizeof optval);
1401                         break;
1402                 case TCP_NOOPT:
1403                         optval = tp->t_flags & TF_NOOPT;
1404                         INP_WUNLOCK(inp);
1405                         error = sooptcopyout(sopt, &optval, sizeof optval);
1406                         break;
1407                 case TCP_NOPUSH:
1408                         optval = tp->t_flags & TF_NOPUSH;
1409                         INP_WUNLOCK(inp);
1410                         error = sooptcopyout(sopt, &optval, sizeof optval);
1411                         break;
1412                 case TCP_INFO:
1413                         tcp_fill_info(tp, &ti);
1414                         INP_WUNLOCK(inp);
1415                         error = sooptcopyout(sopt, &ti, sizeof ti);
1416                         break;
1417                 default:
1418                         INP_WUNLOCK(inp);
1419                         error = ENOPROTOOPT;
1420                         break;
1421                 }
1422                 break;
1423         }
1424         return (error);
1425 }
1426 #undef INP_WLOCK_RECHECK
1427
1428 /*
1429  * tcp_sendspace and tcp_recvspace are the default send and receive window
1430  * sizes, respectively.  These are obsolescent (this information should
1431  * be set by the route).
1432  */
1433 u_long  tcp_sendspace = 1024*32;
1434 SYSCTL_ULONG(_net_inet_tcp, TCPCTL_SENDSPACE, sendspace, CTLFLAG_RW,
1435     &tcp_sendspace , 0, "Maximum outgoing TCP datagram size");
1436 u_long  tcp_recvspace = 1024*64;
1437 SYSCTL_ULONG(_net_inet_tcp, TCPCTL_RECVSPACE, recvspace, CTLFLAG_RW,
1438     &tcp_recvspace , 0, "Maximum incoming TCP datagram size");
1439
1440 /*
1441  * Attach TCP protocol to socket, allocating
1442  * internet protocol control block, tcp control block,
1443  * bufer space, and entering LISTEN state if to accept connections.
1444  */
1445 static int
1446 tcp_attach(struct socket *so)
1447 {
1448         INIT_VNET_INET(so->so_vnet);
1449         struct tcpcb *tp;
1450         struct inpcb *inp;
1451         int error;
1452
1453         if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
1454                 error = soreserve(so, tcp_sendspace, tcp_recvspace);
1455                 if (error)
1456                         return (error);
1457         }
1458         so->so_rcv.sb_flags |= SB_AUTOSIZE;
1459         so->so_snd.sb_flags |= SB_AUTOSIZE;
1460         INP_INFO_WLOCK(&V_tcbinfo);
1461         error = in_pcballoc(so, &V_tcbinfo);
1462         if (error) {
1463                 INP_INFO_WUNLOCK(&V_tcbinfo);
1464                 return (error);
1465         }
1466         inp = sotoinpcb(so);
1467 #ifdef INET6
1468         if (inp->inp_vflag & INP_IPV6PROTO) {
1469                 inp->inp_vflag |= INP_IPV6;
1470                 inp->in6p_hops = -1;    /* use kernel default */
1471         }
1472         else
1473 #endif
1474         inp->inp_vflag |= INP_IPV4;
1475         tp = tcp_newtcpcb(inp);
1476         if (tp == NULL) {
1477                 in_pcbdetach(inp);
1478                 in_pcbfree(inp);
1479                 INP_INFO_WUNLOCK(&V_tcbinfo);
1480                 return (ENOBUFS);
1481         }
1482         tp->t_state = TCPS_CLOSED;
1483         INP_WUNLOCK(inp);
1484         INP_INFO_WUNLOCK(&V_tcbinfo);
1485         return (0);
1486 }
1487
1488 /*
1489  * Initiate (or continue) disconnect.
1490  * If embryonic state, just send reset (once).
1491  * If in ``let data drain'' option and linger null, just drop.
1492  * Otherwise (hard), mark socket disconnecting and drop
1493  * current input data; switch states based on user close, and
1494  * send segment to peer (with FIN).
1495  */
1496 static void
1497 tcp_disconnect(struct tcpcb *tp)
1498 {
1499         struct inpcb *inp = tp->t_inpcb;
1500         struct socket *so = inp->inp_socket;
1501 #ifdef INVARIANTS
1502         INIT_VNET_INET(so->so_vnet);
1503 #endif
1504
1505         INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
1506         INP_WLOCK_ASSERT(inp);
1507
1508         /*
1509          * Neither tcp_close() nor tcp_drop() should return NULL, as the
1510          * socket is still open.
1511          */
1512         if (tp->t_state < TCPS_ESTABLISHED) {
1513                 tp = tcp_close(tp);
1514                 KASSERT(tp != NULL,
1515                     ("tcp_disconnect: tcp_close() returned NULL"));
1516         } else if ((so->so_options & SO_LINGER) && so->so_linger == 0) {
1517                 tp = tcp_drop(tp, 0);
1518                 KASSERT(tp != NULL,
1519                     ("tcp_disconnect: tcp_drop() returned NULL"));
1520         } else {
1521                 soisdisconnecting(so);
1522                 sbflush(&so->so_rcv);
1523                 tcp_usrclosed(tp);
1524                 if (!(inp->inp_flags & INP_DROPPED))
1525                         tcp_output_disconnect(tp);
1526         }
1527 }
1528
1529 /*
1530  * User issued close, and wish to trail through shutdown states:
1531  * if never received SYN, just forget it.  If got a SYN from peer,
1532  * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
1533  * If already got a FIN from peer, then almost done; go to LAST_ACK
1534  * state.  In all other cases, have already sent FIN to peer (e.g.
1535  * after PRU_SHUTDOWN), and just have to play tedious game waiting
1536  * for peer to send FIN or not respond to keep-alives, etc.
1537  * We can let the user exit from the close as soon as the FIN is acked.
1538  */
1539 static void
1540 tcp_usrclosed(struct tcpcb *tp)
1541 {
1542 #ifdef INVARIANTS
1543         INIT_VNET_INET(tp->t_inpcb->inp_vnet);
1544 #endif
1545
1546         INP_INFO_WLOCK_ASSERT(&V_tcbinfo);
1547         INP_WLOCK_ASSERT(tp->t_inpcb);
1548
1549         switch (tp->t_state) {
1550         case TCPS_LISTEN:
1551                 tcp_offload_listen_close(tp);
1552                 /* FALLTHROUGH */
1553         case TCPS_CLOSED:
1554                 tp->t_state = TCPS_CLOSED;
1555                 tp = tcp_close(tp);
1556                 /*
1557                  * tcp_close() should never return NULL here as the socket is
1558                  * still open.
1559                  */
1560                 KASSERT(tp != NULL,
1561                     ("tcp_usrclosed: tcp_close() returned NULL"));
1562                 break;
1563
1564         case TCPS_SYN_SENT:
1565         case TCPS_SYN_RECEIVED:
1566                 tp->t_flags |= TF_NEEDFIN;
1567                 break;
1568
1569         case TCPS_ESTABLISHED:
1570                 tp->t_state = TCPS_FIN_WAIT_1;
1571                 break;
1572
1573         case TCPS_CLOSE_WAIT:
1574                 tp->t_state = TCPS_LAST_ACK;
1575                 break;
1576         }
1577         if (tp->t_state >= TCPS_FIN_WAIT_2) {
1578                 soisdisconnected(tp->t_inpcb->inp_socket);
1579                 /* Prevent the connection hanging in FIN_WAIT_2 forever. */
1580                 if (tp->t_state == TCPS_FIN_WAIT_2) {
1581                         int timeout;
1582
1583                         timeout = (tcp_fast_finwait2_recycle) ? 
1584                             tcp_finwait2_timeout : tcp_maxidle;
1585                         tcp_timer_activate(tp, TT_2MSL, timeout);
1586                 }
1587         }
1588 }
1589
1590 #ifdef DDB
1591 static void
1592 db_print_indent(int indent)
1593 {
1594         int i;
1595
1596         for (i = 0; i < indent; i++)
1597                 db_printf(" ");
1598 }
1599
1600 static void
1601 db_print_tstate(int t_state)
1602 {
1603
1604         switch (t_state) {
1605         case TCPS_CLOSED:
1606                 db_printf("TCPS_CLOSED");
1607                 return;
1608
1609         case TCPS_LISTEN:
1610                 db_printf("TCPS_LISTEN");
1611                 return;
1612
1613         case TCPS_SYN_SENT:
1614                 db_printf("TCPS_SYN_SENT");
1615                 return;
1616
1617         case TCPS_SYN_RECEIVED:
1618                 db_printf("TCPS_SYN_RECEIVED");
1619                 return;
1620
1621         case TCPS_ESTABLISHED:
1622                 db_printf("TCPS_ESTABLISHED");
1623                 return;
1624
1625         case TCPS_CLOSE_WAIT:
1626                 db_printf("TCPS_CLOSE_WAIT");
1627                 return;
1628
1629         case TCPS_FIN_WAIT_1:
1630                 db_printf("TCPS_FIN_WAIT_1");
1631                 return;
1632
1633         case TCPS_CLOSING:
1634                 db_printf("TCPS_CLOSING");
1635                 return;
1636
1637         case TCPS_LAST_ACK:
1638                 db_printf("TCPS_LAST_ACK");
1639                 return;
1640
1641         case TCPS_FIN_WAIT_2:
1642                 db_printf("TCPS_FIN_WAIT_2");
1643                 return;
1644
1645         case TCPS_TIME_WAIT:
1646                 db_printf("TCPS_TIME_WAIT");
1647                 return;
1648
1649         default:
1650                 db_printf("unknown");
1651                 return;
1652         }
1653 }
1654
1655 static void
1656 db_print_tflags(u_int t_flags)
1657 {
1658         int comma;
1659
1660         comma = 0;
1661         if (t_flags & TF_ACKNOW) {
1662                 db_printf("%sTF_ACKNOW", comma ? ", " : "");
1663                 comma = 1;
1664         }
1665         if (t_flags & TF_DELACK) {
1666                 db_printf("%sTF_DELACK", comma ? ", " : "");
1667                 comma = 1;
1668         }
1669         if (t_flags & TF_NODELAY) {
1670                 db_printf("%sTF_NODELAY", comma ? ", " : "");
1671                 comma = 1;
1672         }
1673         if (t_flags & TF_NOOPT) {
1674                 db_printf("%sTF_NOOPT", comma ? ", " : "");
1675                 comma = 1;
1676         }
1677         if (t_flags & TF_SENTFIN) {
1678                 db_printf("%sTF_SENTFIN", comma ? ", " : "");
1679                 comma = 1;
1680         }
1681         if (t_flags & TF_REQ_SCALE) {
1682                 db_printf("%sTF_REQ_SCALE", comma ? ", " : "");
1683                 comma = 1;
1684         }
1685         if (t_flags & TF_RCVD_SCALE) {
1686                 db_printf("%sTF_RECVD_SCALE", comma ? ", " : "");
1687                 comma = 1;
1688         }
1689         if (t_flags & TF_REQ_TSTMP) {
1690                 db_printf("%sTF_REQ_TSTMP", comma ? ", " : "");
1691                 comma = 1;
1692         }
1693         if (t_flags & TF_RCVD_TSTMP) {
1694                 db_printf("%sTF_RCVD_TSTMP", comma ? ", " : "");
1695                 comma = 1;
1696         }
1697         if (t_flags & TF_SACK_PERMIT) {
1698                 db_printf("%sTF_SACK_PERMIT", comma ? ", " : "");
1699                 comma = 1;
1700         }
1701         if (t_flags & TF_NEEDSYN) {
1702                 db_printf("%sTF_NEEDSYN", comma ? ", " : "");
1703                 comma = 1;
1704         }
1705         if (t_flags & TF_NEEDFIN) {
1706                 db_printf("%sTF_NEEDFIN", comma ? ", " : "");
1707                 comma = 1;
1708         }
1709         if (t_flags & TF_NOPUSH) {
1710                 db_printf("%sTF_NOPUSH", comma ? ", " : "");
1711                 comma = 1;
1712         }
1713         if (t_flags & TF_NOPUSH) {
1714                 db_printf("%sTF_NOPUSH", comma ? ", " : "");
1715                 comma = 1;
1716         }
1717         if (t_flags & TF_MORETOCOME) {
1718                 db_printf("%sTF_MORETOCOME", comma ? ", " : "");
1719                 comma = 1;
1720         }
1721         if (t_flags & TF_LQ_OVERFLOW) {
1722                 db_printf("%sTF_LQ_OVERFLOW", comma ? ", " : "");
1723                 comma = 1;
1724         }
1725         if (t_flags & TF_LASTIDLE) {
1726                 db_printf("%sTF_LASTIDLE", comma ? ", " : "");
1727                 comma = 1;
1728         }
1729         if (t_flags & TF_RXWIN0SENT) {
1730                 db_printf("%sTF_RXWIN0SENT", comma ? ", " : "");
1731                 comma = 1;
1732         }
1733         if (t_flags & TF_FASTRECOVERY) {
1734                 db_printf("%sTF_FASTRECOVERY", comma ? ", " : "");
1735                 comma = 1;
1736         }
1737         if (t_flags & TF_WASFRECOVERY) {
1738                 db_printf("%sTF_WASFRECOVERY", comma ? ", " : "");
1739                 comma = 1;
1740         }
1741         if (t_flags & TF_SIGNATURE) {
1742                 db_printf("%sTF_SIGNATURE", comma ? ", " : "");
1743                 comma = 1;
1744         }
1745         if (t_flags & TF_FORCEDATA) {
1746                 db_printf("%sTF_FORCEDATA", comma ? ", " : "");
1747                 comma = 1;
1748         }
1749         if (t_flags & TF_TSO) {
1750                 db_printf("%sTF_TSO", comma ? ", " : "");
1751                 comma = 1;
1752         }
1753         if (t_flags & TF_ECN_PERMIT) {
1754                 db_printf("%sTF_ECN_PERMIT", comma ? ", " : "");
1755                 comma = 1;
1756         }
1757 }
1758
1759 static void
1760 db_print_toobflags(char t_oobflags)
1761 {
1762         int comma;
1763
1764         comma = 0;
1765         if (t_oobflags & TCPOOB_HAVEDATA) {
1766                 db_printf("%sTCPOOB_HAVEDATA", comma ? ", " : "");
1767                 comma = 1;
1768         }
1769         if (t_oobflags & TCPOOB_HADDATA) {
1770                 db_printf("%sTCPOOB_HADDATA", comma ? ", " : "");
1771                 comma = 1;
1772         }
1773 }
1774
1775 static void
1776 db_print_tcpcb(struct tcpcb *tp, const char *name, int indent)
1777 {
1778
1779         db_print_indent(indent);
1780         db_printf("%s at %p\n", name, tp);
1781
1782         indent += 2;
1783
1784         db_print_indent(indent);
1785         db_printf("t_segq first: %p   t_segqlen: %d   t_dupacks: %d\n",
1786            LIST_FIRST(&tp->t_segq), tp->t_segqlen, tp->t_dupacks);
1787
1788         db_print_indent(indent);
1789         db_printf("tt_rexmt: %p   tt_persist: %p   tt_keep: %p\n",
1790             &tp->t_timers->tt_rexmt, &tp->t_timers->tt_persist, &tp->t_timers->tt_keep);
1791
1792         db_print_indent(indent);
1793         db_printf("tt_2msl: %p   tt_delack: %p   t_inpcb: %p\n", &tp->t_timers->tt_2msl,
1794             &tp->t_timers->tt_delack, tp->t_inpcb);
1795
1796         db_print_indent(indent);
1797         db_printf("t_state: %d (", tp->t_state);
1798         db_print_tstate(tp->t_state);
1799         db_printf(")\n");
1800
1801         db_print_indent(indent);
1802         db_printf("t_flags: 0x%x (", tp->t_flags);
1803         db_print_tflags(tp->t_flags);
1804         db_printf(")\n");
1805
1806         db_print_indent(indent);
1807         db_printf("snd_una: 0x%08x   snd_max: 0x%08x   snd_nxt: x0%08x\n",
1808             tp->snd_una, tp->snd_max, tp->snd_nxt);
1809
1810         db_print_indent(indent);
1811         db_printf("snd_up: 0x%08x   snd_wl1: 0x%08x   snd_wl2: 0x%08x\n",
1812            tp->snd_up, tp->snd_wl1, tp->snd_wl2);
1813
1814         db_print_indent(indent);
1815         db_printf("iss: 0x%08x   irs: 0x%08x   rcv_nxt: 0x%08x\n",
1816             tp->iss, tp->irs, tp->rcv_nxt);
1817
1818         db_print_indent(indent);
1819         db_printf("rcv_adv: 0x%08x   rcv_wnd: %lu   rcv_up: 0x%08x\n",
1820             tp->rcv_adv, tp->rcv_wnd, tp->rcv_up);
1821
1822         db_print_indent(indent);
1823         db_printf("snd_wnd: %lu   snd_cwnd: %lu   snd_bwnd: %lu\n",
1824            tp->snd_wnd, tp->snd_cwnd, tp->snd_bwnd);
1825
1826         db_print_indent(indent);
1827         db_printf("snd_ssthresh: %lu   snd_bandwidth: %lu   snd_recover: "
1828             "0x%08x\n", tp->snd_ssthresh, tp->snd_bandwidth,
1829             tp->snd_recover);
1830
1831         db_print_indent(indent);
1832         db_printf("t_maxopd: %u   t_rcvtime: %u   t_startime: %u\n",
1833             tp->t_maxopd, tp->t_rcvtime, tp->t_starttime);
1834
1835         db_print_indent(indent);
1836         db_printf("t_rttime: %u   t_rtsq: 0x%08x   t_bw_rtttime: %u\n",
1837             tp->t_rtttime, tp->t_rtseq, tp->t_bw_rtttime);
1838
1839         db_print_indent(indent);
1840         db_printf("t_bw_rtseq: 0x%08x   t_rxtcur: %d   t_maxseg: %u   "
1841             "t_srtt: %d\n", tp->t_bw_rtseq, tp->t_rxtcur, tp->t_maxseg,
1842             tp->t_srtt);
1843
1844         db_print_indent(indent);
1845         db_printf("t_rttvar: %d   t_rxtshift: %d   t_rttmin: %u   "
1846             "t_rttbest: %u\n", tp->t_rttvar, tp->t_rxtshift, tp->t_rttmin,
1847             tp->t_rttbest);
1848
1849         db_print_indent(indent);
1850         db_printf("t_rttupdated: %lu   max_sndwnd: %lu   t_softerror: %d\n",
1851             tp->t_rttupdated, tp->max_sndwnd, tp->t_softerror);
1852
1853         db_print_indent(indent);
1854         db_printf("t_oobflags: 0x%x (", tp->t_oobflags);
1855         db_print_toobflags(tp->t_oobflags);
1856         db_printf(")   t_iobc: 0x%02x\n", tp->t_iobc);
1857
1858         db_print_indent(indent);
1859         db_printf("snd_scale: %u   rcv_scale: %u   request_r_scale: %u\n",
1860             tp->snd_scale, tp->rcv_scale, tp->request_r_scale);
1861
1862         db_print_indent(indent);
1863         db_printf("ts_recent: %u   ts_recent_age: %u\n",
1864             tp->ts_recent, tp->ts_recent_age);
1865
1866         db_print_indent(indent);
1867         db_printf("ts_offset: %u   last_ack_sent: 0x%08x   snd_cwnd_prev: "
1868             "%lu\n", tp->ts_offset, tp->last_ack_sent, tp->snd_cwnd_prev);
1869
1870         db_print_indent(indent);
1871         db_printf("snd_ssthresh_prev: %lu   snd_recover_prev: 0x%08x   "
1872             "t_badrxtwin: %u\n", tp->snd_ssthresh_prev,
1873             tp->snd_recover_prev, tp->t_badrxtwin);
1874
1875         db_print_indent(indent);
1876         db_printf("snd_numholes: %d  snd_holes first: %p\n",
1877             tp->snd_numholes, TAILQ_FIRST(&tp->snd_holes));
1878
1879         db_print_indent(indent);
1880         db_printf("snd_fack: 0x%08x   rcv_numsacks: %d   sack_newdata: "
1881             "0x%08x\n", tp->snd_fack, tp->rcv_numsacks, tp->sack_newdata);
1882
1883         /* Skip sackblks, sackhint. */
1884
1885         db_print_indent(indent);
1886         db_printf("t_rttlow: %d   rfbuf_ts: %u   rfbuf_cnt: %d\n",
1887             tp->t_rttlow, tp->rfbuf_ts, tp->rfbuf_cnt);
1888 }
1889
1890 DB_SHOW_COMMAND(tcpcb, db_show_tcpcb)
1891 {
1892         struct tcpcb *tp;
1893
1894         if (!have_addr) {
1895                 db_printf("usage: show tcpcb <addr>\n");
1896                 return;
1897         }
1898         tp = (struct tcpcb *)addr;
1899
1900         db_print_tcpcb(tp, "tcpcb", 0);
1901 }
1902 #endif