2 .\" Copyright (c) 2006 Robert N. M. Watson
3 .\" Copyright (c) 2014 Benjamin J. Kaduk
4 .\" All rights reserved.
6 .\" Redistribution and use in source and binary forms, with or without
7 .\" modification, are permitted provided that the following conditions
9 .\" 1. Redistributions of source code must retain the above copyright
10 .\" notice, this list of conditions and the following disclaimer.
11 .\" 2. Redistributions in binary form must reproduce the above copyright
12 .\" notice, this list of conditions and the following disclaimer in the
13 .\" documentation and/or other materials provided with the distribution.
15 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 .Nd "kernel socket interface"
39 .Fn soabort "struct socket *so"
41 .Fn soaccept "struct socket *so" "struct sockaddr **nam"
43 .Fn socheckuid "struct socket *so" "uid_t uid"
45 .Fn sobind "struct socket *so" "struct sockaddr *nam" "struct thread *td"
47 .Fn soclose "struct socket *so"
49 .Fn soconnect "struct socket *so" "struct sockaddr *nam" "struct thread *td"
52 .Fa "int dom" "struct socket **aso" "int type" "int proto"
53 .Fa "struct ucred *cred" "struct thread *td"
56 .Fn sodisconnect "struct socket *so"
58 .Fn sodupsockaddr "const struct sockaddr *sa" "int mflags"
60 .Fn sofree "struct socket *so"
62 .Fn sohasoutofband "struct socket *so"
64 .Fn solisten "struct socket *so" "int backlog" "struct thread *td"
66 .Fn solisten_proto "struct socket *so" "int backlog"
68 .Fn solisten_proto_check "struct socket *so"
70 .Fn sonewconn "struct socket *head" "int connstatus"
73 .Fa "struct socket *so" "int events" "struct ucred *active_cred"
74 .Fa "struct thread *td"
78 .Fa "struct socket *so" "int events" "struct ucred *active_cred"
79 .Fa "struct thread *td"
83 .Fa "struct socket *so" "struct sockaddr **psa" "struct uio *uio"
84 .Fa "struct mbuf **mp0" "struct mbuf **controlp" "int *flagsp"
88 .Fa "struct socket *so" "struct sockaddr **paddr"
89 .Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
94 .Fa "struct socket *so" "struct sockaddr **paddr"
95 .Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
100 .Fa "struct socket *so" "struct sockaddr **paddr"
101 .Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
105 .Fn soreserve "struct socket *so" "u_long sndcc" "u_long rcvcc"
107 .Fn sorflush "struct socket *so"
110 .Fa "struct socket *so" "struct sockaddr *addr" "struct uio *uio"
111 .Fa "struct mbuf *top" "struct mbuf *control" "int flags" "struct thread *td"
115 .Fa "struct socket *so" "struct sockaddr *addr"
116 .Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control"
117 .Fa "int flags" "struct thread *td"
121 .Fa "struct socket *so" "struct sockaddr *addr"
122 .Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control"
123 .Fa "int flags" "struct thread *td"
126 .Fn soshutdown "struct socket *so" "int how"
128 .Fn sotoxsocket "struct socket *so" "struct xsocket *xso"
130 .Fn soupcall_clear "struct socket *so" "int which"
133 .Fa "struct socket *so" "int which"
134 .Fa "int (*func)(struct socket *, void *, int)" "void *arg"
137 .Fn sowakeup "struct socket *so" "struct sockbuf *sb"
140 .Fn sosetopt "struct socket *so" "struct sockopt *sopt"
142 .Fn sogetopt "struct socket *so" "struct sockopt *sopt"
144 .Fn sooptcopyin "struct sockopt *sopt" "void *buf" "size_t len" "size_t minlen"
146 .Fn sooptcopyout "struct sockopt *sopt" "const void *buf" "size_t len"
150 programming interface permits in-kernel consumers to interact with
151 local and network socket objects in a manner similar to that permitted using
155 These interfaces are appropriate for use by distributed file systems and
156 other network-aware kernel services.
157 While the user API operates on file descriptors, the kernel interfaces
161 Some portions of the kernel API exist only to implement the user API,
162 and are not expected to be used by kernel code.
163 The portions of the socket API used by socket consumers and
164 implementations of network protocols will differ; some routines
165 are only useful for protocol implementors.
167 Except where otherwise indicated,
169 functions may sleep, and are not appropriate for use in an
171 context or while holding non-sleepable kernel locks.
172 .Ss Creating and Destroying Sockets
173 A new socket may be created using
177 arguments specify the requested domain, type, and protocol via
181 The socket is returned via
184 In addition, the credential used to authorize operations associated with the
185 socket will be passed via
187 (and will be cached for the lifetime of the socket), and the thread
188 performing the operation via
191 authorization of the socket creation operation will be performed
192 using the thread credential for some protocols (such as raw sockets).
194 Sockets may be closed and freed using
196 which has similar semantics to
199 In certain circumstances, it is appropriate to destroy a socket without
200 waiting for it to disconnect, for which
203 This is only appropriate for incoming connections which are in a
204 partially connected state.
205 It must be called on an unreferenced socket, by the thread which
206 removed the socket from its listen queue, to prevent races.
207 It will call into protocol code, so no socket locks may be held
211 is responsible for setting the VNET context.
212 The normal path to freeing a socket is
214 which handles reference counting on the socket.
215 It should be called whenever a reference is released, and also whenever
216 reference flags are cleared in socket or protocol code.
219 should not be made from outside the socket layer; outside callers
223 .Ss Connections and Addresses
226 function is equivalent to the
228 system call, and binds the socket
232 The operation would be authorized using the credential on thread
237 function is equivalent to the
239 system call, and initiates a connection on the socket
243 The operation will be authorized using the credential on thread
245 Unlike the user system call,
247 returns immediately; the caller may
251 while holding the socket mutex and waiting for the
258 fails, the caller must manually clear the
264 disconnects the socket without closing it.
268 function is equivalent to the
270 system call, and causes part or all of a connection on a socket to be closed
273 Sockets are transitioned from non-listening status to listening with
278 function is equivalent to the
280 system call, and retrieves a socket option on socket
284 function is equivalent to the
286 system call, and sets a socket option on socket
289 The second argument in both
297 describing the socket option operation.
298 The caller-allocated structure must be zeroed, and then have its fields
299 initialized to specify socket option operation arguments:
300 .Bl -tag -width ".Va sopt_valsize"
306 depending on whether this is a get or set operation.
308 Specify the level in the network stack the operation is targeted at; for
312 Specify the name of the socket option to set.
314 Kernel space pointer to the argument value for the socket option.
316 Size of the argument value in bytes.
319 In order for the owner of a socket to be notified when the socket
320 is ready to send or receive data, an upcall may be registered on
322 The upcall is a function that will be called by the socket framework
323 when a socket buffer associated with the given socket is ready for
326 is used to register a socket upcall.
329 is registered, and the pointer
331 will be passed as its second argument when it is called by the framework.
332 The possible values for
338 which register upcalls for receive and send events, respectively.
345 depending on whether or not a call to
347 should be made by the socket framework after the upcall returns.
352 itself due to lock ordering with the socket buffer lock.
355 upcalls should return
361 the upcall will be removed from the socket.
363 Upcalls are removed from their socket by
367 argument again specifies whether the sending or receiving upcall is to
375 function is equivalent to the
377 system call, and attempts to receive bytes of data from the socket
379 optionally blocking awaiting for data if none is ready to read.
380 Data may be retrieved directly to kernel or user memory via the
382 argument, or as an mbuf chain returned to the caller via
384 avoiding a data copy.
398 The caller may optionally retrieve a socket address on a protocol with the
400 capability by providing storage via
404 The caller may optionally retrieve control data mbufs via a
408 Optional flags may be passed to
413 argument, and use the same flag name space as the
419 function is equivalent to the
421 system call, and attempts to send bytes of data via the socket
423 optionally blocking if data cannot be immediately sent.
424 Data may be sent directly from kernel or user memory via the
426 argument, or as an mbuf chain via
428 avoiding a data copy.
435 An optional destination address may be specified via a
438 argument, which may result in an implicit connect if supported by the
440 The caller may optionally send control data mbufs via a
444 Flags may be passed to
448 argument, and use the same flag name space as the
452 Kernel callers running in
454 context, or with a mutex held, will wish to use non-blocking sockets and pass
457 flag in order to prevent these functions from sleeping.
459 A socket can be queried for readability, writability, out-of-band data,
462 The possible values for
480 pass through to the protocol's accept routine to accept an incoming connection.
481 .Ss Socket Utility Functions
482 The uid of a socket's credential may be compared against a
487 A copy of an existing
492 Protocol implementations notify the socket layer of the arrival of
493 out-of-band data using
495 so that the socket layer can notify socket consumers of the available data.
503 suitable for isolating user code from changes in the kernel structure.
504 .Ss Protocol Implementations
505 Protocols must supply an implementation for
507 such protocol implementations can call back into the socket layer using
508 .Fn solisten_proto_check
511 to check and set the socket-layer listen state.
512 These callbacks are provided so that the protocol implementation
513 can order the socket layer and protocol locks as necessary.
514 Protocols must supply an implementation of
517 .Fn soreceive_stream ,
518 .Fn soreceive_dgram ,
520 .Fn soreceive_generic
521 are supplied for use by such implementations.
523 Protocol implementations can use
525 to create a socket and attach protocol state to that socket.
526 This can be used to create new sockets available for
529 The returned socket has a reference count of zero.
531 Protocols must supply an implementation for
534 is provided for the use by protocol implementations.
540 are supplied to assist in protocol implementations of
543 When a protocol creates a new socket structure, it is necessary to
544 reserve socket buffer space for that socket, by calling
546 The rough inverse of this reservation is performed by
548 which is called automatically by the socket framework.
550 When a protocol needs to wake up threads waiting for the socket to
551 become ready to read or write, variants of
556 function should not be called directly by protocol code, instead use the
559 .Fn sorwakeup_locked ,
563 for readers and writers, with the corresponding socket buffer lock
564 not already locked, or already held, respectively.
570 are useful for transferring
572 data between user and kernel code.
590 system call appeared in
592 This manual page was introduced in
595 This manual page was written by
600 The use of explicitly passed credentials, credentials hung from explicitly
601 passed threads, the credential on
603 and the cached credential from
604 socket creation time is inconsistent, and may lead to unexpected behaviour.
605 It is possible that several of the
609 arguments, or simply not be present at all.
611 The caller may need to manually clear
619 flag is not implemented for
621 and may not always work with
623 when zero copy sockets are enabled.
625 This manual page does not describe how to register socket upcalls or monitor
626 a socket for readability/writability without using blocking I/O.
632 functions are not described, and in most cases should not be used, due to
633 confusing and potentially incorrect interactions when