2 * Copyright (c) 2007, Chelsio Inc.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
11 * 2. Neither the name of the Chelsio Corporation nor the names of its
12 * contributors may be used to endorse or promote products derived from
13 * this software without specific prior written permission.
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
19 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
20 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
21 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
24 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
25 * POSSIBILITY OF SUCH DAMAGE.
30 #ifndef _NETINET_TCP_OFFLOAD_H_
31 #define _NETINET_TCP_OFFLOAD_H_
34 #error "no user-serviceable parts inside"
38 * A driver publishes that it provides offload services
39 * by setting IFCAP_TOE in the ifnet. The offload connect
40 * will bypass any further work if the interface that a
41 * connection would use does not support TCP offload.
43 * The TOE API assumes that the tcp offload engine can offload the
44 * the entire connection from set up to teardown, with some provision
45 * being made to allowing the software stack to handle time wait. If
46 * the device does not meet these criteria, it is the driver's responsibility
47 * to overload the functions that it needs to in tcp_usrreqs and make
48 * its own calls to tcp_output if it needs to do so.
50 * There is currently no provision for the device advertising the congestion
51 * control algorithms it supports as there is currently no API for querying
52 * an operating system for the protocols that it has loaded. This is a desirable
57 * It is assumed that individuals deploying TOE will want connections
58 * to be offloaded without software changes so all connections on an
59 * interface providing TOE are offloaded unless the SO_NO_OFFLOAD
60 * flag is set on the socket.
63 * The toe_usrreqs structure constitutes the TOE driver's
64 * interface to the TCP stack for functionality that doesn't
65 * interact directly with userspace. If one wants to provide
66 * (optional) functionality to do zero-copy to/from
67 * userspace one still needs to override soreceive/sosend
68 * with functions that fault in and pin the user buffers.
71 * - tells the driver that new data may have been added to the
72 * socket's send buffer - the driver should not fail if the
73 * buffer is in fact unchanged
74 * - the driver is responsible for providing credits (bytes in the send window)
75 * back to the socket by calling sbdrop() as segments are acknowledged.
76 * - The driver expects the inpcb lock to be held - the driver is expected
77 * not to drop the lock. Hence the driver is not allowed to acquire the
78 * pcbinfo lock during this call.
81 * - returns credits to the driver and triggers window updates
82 * to the peer (a credit as used here is a byte in the peer's receive window)
83 * - the driver is expected to determine how many bytes have been
84 * consumed and credit that back to the card so that it can grow
85 * the window again by maintaining its own state between invocations.
86 * - In principle this could be used to shrink the window as well as
87 * grow the window, although it is not used for that now.
88 * - this function needs to correctly handle being called any number of
89 * times without any bytes being consumed from the receive buffer.
90 * - The driver expects the inpcb lock to be held - the driver is expected
91 * not to drop the lock. Hence the driver is not allowed to acquire the
92 * pcbinfo lock during this call.
95 * - tells the driver to send FIN to peer
96 * - driver is expected to send the remaining data and then do a clean half close
97 * - disconnect implies at least half-close so only send, reset, and detach
99 * - the driver is expected to handle transition through the shutdown
100 * state machine and allow the stack to support SO_LINGER.
101 * - The driver expects the inpcb lock to be held - the driver is expected
102 * not to drop the lock. Hence the driver is not allowed to acquire the
103 * pcbinfo lock during this call.
106 * - closes the connection and sends a RST to peer
107 * - driver is expectd to trigger an RST and detach the toepcb
108 * - no further calls are legal after reset
109 * - The driver expects the inpcb lock to be held - the driver is expected
110 * not to drop the lock. Hence the driver is not allowed to acquire the
111 * pcbinfo lock during this call.
113 * The following fields in the tcpcb are expected to be referenced by the driver:
126 * The following fields in the inpcb are expected to be referenced by the driver:
134 * The following fields in the socket are expected to be referenced by the
145 * These functions all return 0 on success and can return the following errors
148 * + ENOBUFS: memory allocation failed
149 * + EMSGSIZE: MTU changed during the call
153 * * ENETUNREACH: the peer is no longer reachable
156 * - tells driver that the socket is going away so disconnect
157 * the toepcb and free appropriate resources
158 * - allows the driver to cleanly handle the case of connection state
159 * outliving the socket
160 * - no further calls are legal after detach
161 * - the driver is expected to provide its own synchronization between
162 * detach and receiving new data.
164 * + tu_syncache_event
165 * - even if it is not actually needed, the driver is expected to
166 * call syncache_add for the initial SYN and then syncache_expand
168 * - tells driver that a connection either has not been added or has
169 * been dropped from the syncache
170 * - the driver is expected to maintain state that lives outside the
171 * software stack so the syncache needs to be able to notify the
172 * toe driver that the software stack is not going to create a connection
174 * - The driver is responsible for any synchronization required between
175 * the syncache dropping an entry and the driver processing the SYN,ACK.
179 int (*tu_send)(struct tcpcb *tp);
180 int (*tu_rcvd)(struct tcpcb *tp);
181 int (*tu_disconnect)(struct tcpcb *tp);
182 int (*tu_reset)(struct tcpcb *tp);
183 void (*tu_detach)(struct tcpcb *tp);
184 void (*tu_syncache_event)(int event, void *toep);
188 * Proxy for struct tcpopt between TOE drivers and TCP functions.
191 u_int64_t to_flags; /* see tcpopt in tcp_var.h */
192 u_int16_t to_mss; /* maximum segment size */
193 u_int8_t to_wscale; /* window scaling */
195 u_int8_t _pad1; /* explicit pad for 64bit alignment */
196 u_int32_t _pad2; /* explicit pad for 64bit alignment */
197 u_int64_t _pad3[4]; /* TBD */
200 #define TOE_SC_ENTRY_PRESENT 1 /* 4-tuple already present */
201 #define TOE_SC_DROP 2 /* connection was timed out */
204 * Because listen is a one-to-many relationship (a socket can be listening
205 * on all interfaces on a machine some of which may be using different TCP
206 * offload devices), listen uses a publish/subscribe mechanism. The TCP
207 * offload driver registers a listen notification function with the stack.
208 * When a listen socket is created all TCP offload devices are notified
209 * so that they can do the appropriate set up to offload connections on the
210 * port to which the socket is bound. When the listen socket is closed,
211 * the offload devices are notified so that they will stop listening on that
212 * port and free any associated resources as well as sending RSTs on any
213 * connections in the SYN_RCVD state.
217 typedef void (*tcp_offload_listen_start_fn)(void *, struct tcpcb *);
218 typedef void (*tcp_offload_listen_stop_fn)(void *, struct tcpcb *);
220 EVENTHANDLER_DECLARE(tcp_offload_listen_start, tcp_offload_listen_start_fn);
221 EVENTHANDLER_DECLARE(tcp_offload_listen_stop, tcp_offload_listen_stop_fn);
224 * Check if the socket can be offloaded by the following steps:
225 * - determine the egress interface
226 * - check the interface for TOE capability and TOE is enabled
227 * - check if the device has resources to offload the connection
229 int tcp_offload_connect(struct socket *so, struct sockaddr *nam);
232 * The tcp_output_* routines are wrappers around the toe_usrreqs calls
233 * which trigger packet transmission. In the non-offloaded case they
234 * translate to tcp_output. The tcp_offload_* routines notify TOE
235 * of specific events. I the non-offloaded case they are no-ops.
237 * Listen is a special case because it is a 1 to many relationship
238 * and there can be more than one offload driver in the system.
242 * Connection is offloaded
244 #define tp_offload(tp) ((tp)->t_flags & TF_TOE)
247 * hackish way of allowing this file to also be included by TOE
248 * which needs to be kept ignorant of socket implementation details
250 #ifdef _SYS_SOCKETVAR_H_
252 * The socket has not been marked as "do not offload"
254 #define SO_OFFLOADABLE(so) ((so->so_options & SO_NO_OFFLOAD) == 0)
257 tcp_output_connect(struct socket *so, struct sockaddr *nam)
259 struct tcpcb *tp = sototcpcb(so);
263 * If offload has been disabled for this socket or the
264 * connection cannot be offloaded just call tcp_output
265 * to start the TCP state machine.
267 #ifndef TCP_OFFLOAD_DISABLE
268 if (!SO_OFFLOADABLE(so) || (error = tcp_offload_connect(so, nam)) != 0)
270 error = tcp_output(tp);
275 tcp_output_send(struct tcpcb *tp)
278 #ifndef TCP_OFFLOAD_DISABLE
280 return (tp->t_tu->tu_send(tp));
282 return (tcp_output(tp));
286 tcp_output_rcvd(struct tcpcb *tp)
289 #ifndef TCP_OFFLOAD_DISABLE
291 return (tp->t_tu->tu_rcvd(tp));
293 return (tcp_output(tp));
297 tcp_output_disconnect(struct tcpcb *tp)
300 #ifndef TCP_OFFLOAD_DISABLE
302 return (tp->t_tu->tu_disconnect(tp));
304 return (tcp_output(tp));
308 tcp_output_reset(struct tcpcb *tp)
311 #ifndef TCP_OFFLOAD_DISABLE
313 return (tp->t_tu->tu_reset(tp));
315 return (tcp_output(tp));
319 tcp_offload_detach(struct tcpcb *tp)
322 #ifndef TCP_OFFLOAD_DISABLE
324 tp->t_tu->tu_detach(tp);
329 tcp_offload_listen_open(struct tcpcb *tp)
332 #ifndef TCP_OFFLOAD_DISABLE
333 if (SO_OFFLOADABLE(tp->t_inpcb->inp_socket))
334 EVENTHANDLER_INVOKE(tcp_offload_listen_start, tp);
339 tcp_offload_listen_close(struct tcpcb *tp)
342 #ifndef TCP_OFFLOAD_DISABLE
343 EVENTHANDLER_INVOKE(tcp_offload_listen_stop, tp);
346 #undef SO_OFFLOADABLE
347 #endif /* _SYS_SOCKETVAR_H_ */
350 void tcp_offload_twstart(struct tcpcb *tp);
351 struct tcpcb *tcp_offload_close(struct tcpcb *tp);
352 struct tcpcb *tcp_offload_drop(struct tcpcb *tp, int error);
354 #endif /* _NETINET_TCP_OFFLOAD_H_ */