2 .\" Copyright (c) 2006 Robert N. M. Watson
3 .\" Copyright (c) 2014 Benjamin J. Kaduk
4 .\" All rights reserved.
6 .\" Redistribution and use in source and binary forms, with or without
7 .\" modification, are permitted provided that the following conditions
9 .\" 1. Redistributions of source code must retain the above copyright
10 .\" notice, this list of conditions and the following disclaimer.
11 .\" 2. Redistributions in binary form must reproduce the above copyright
12 .\" notice, this list of conditions and the following disclaimer in the
13 .\" documentation and/or other materials provided with the distribution.
15 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 .\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 .Nd "kernel socket interface"
39 .Fn soabort "struct socket *so"
41 .Fn soaccept "struct socket *so" "struct sockaddr **nam"
43 .Fn socheckuid "struct socket *so" "uid_t uid"
45 .Fn sobind "struct socket *so" "struct sockaddr *nam" "struct thread *td"
47 .Fn soclose "struct socket *so"
49 .Fn soconnect "struct socket *so" "struct sockaddr *nam" "struct thread *td"
52 .Fa "int dom" "struct socket **aso" "int type" "int proto"
53 .Fa "struct ucred *cred" "struct thread *td"
56 .Fn sodisconnect "struct socket *so"
59 .Fa "struct socket *so"
60 .Fa "void (*func)(struct socket *)"
63 .Fn sodupsockaddr "const struct sockaddr *sa" "int mflags"
65 .Fn sofree "struct socket *so"
67 .Fn sohasoutofband "struct socket *so"
69 .Fn solisten "struct socket *so" "int backlog" "struct thread *td"
71 .Fn solisten_proto "struct socket *so" "int backlog"
73 .Fn solisten_proto_check "struct socket *so"
75 .Fn sonewconn "struct socket *head" "int connstatus"
78 .Fa "struct socket *so" "int events" "struct ucred *active_cred"
79 .Fa "struct thread *td"
83 .Fa "struct socket *so" "int events" "struct ucred *active_cred"
84 .Fa "struct thread *td"
88 .Fa "struct socket *so" "struct sockaddr **psa" "struct uio *uio"
89 .Fa "struct mbuf **mp0" "struct mbuf **controlp" "int *flagsp"
93 .Fa "struct socket *so" "struct sockaddr **paddr"
94 .Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
99 .Fa "struct socket *so" "struct sockaddr **paddr"
100 .Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
104 .Fo soreceive_generic
105 .Fa "struct socket *so" "struct sockaddr **paddr"
106 .Fa "struct uio *uio" "struct mbuf **mp0" "struct mbuf **controlp"
110 .Fn soreserve "struct socket *so" "u_long sndcc" "u_long rcvcc"
112 .Fn sorflush "struct socket *so"
115 .Fa "struct socket *so" "struct sockaddr *addr" "struct uio *uio"
116 .Fa "struct mbuf *top" "struct mbuf *control" "int flags" "struct thread *td"
120 .Fa "struct socket *so" "struct sockaddr *addr"
121 .Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control"
122 .Fa "int flags" "struct thread *td"
126 .Fa "struct socket *so" "struct sockaddr *addr"
127 .Fa "struct uio *uio" "struct mbuf *top" "struct mbuf *control"
128 .Fa "int flags" "struct thread *td"
131 .Fn soshutdown "struct socket *so" "int how"
133 .Fn sotoxsocket "struct socket *so" "struct xsocket *xso"
135 .Fn soupcall_clear "struct socket *so" "int which"
138 .Fa "struct socket *so" "int which"
139 .Fa "int (*func)(struct socket *, void *, int)" "void *arg"
142 .Fn sowakeup "struct socket *so" "struct sockbuf *sb"
145 .Fn sosetopt "struct socket *so" "struct sockopt *sopt"
147 .Fn sogetopt "struct socket *so" "struct sockopt *sopt"
149 .Fn sooptcopyin "struct sockopt *sopt" "void *buf" "size_t len" "size_t minlen"
151 .Fn sooptcopyout "struct sockopt *sopt" "const void *buf" "size_t len"
155 programming interface permits in-kernel consumers to interact with
156 local and network socket objects in a manner similar to that permitted using
160 These interfaces are appropriate for use by distributed file systems and
161 other network-aware kernel services.
162 While the user API operates on file descriptors, the kernel interfaces
166 Some portions of the kernel API exist only to implement the user API,
167 and are not expected to be used by kernel code.
168 The portions of the socket API used by socket consumers and
169 implementations of network protocols will differ; some routines
170 are only useful for protocol implementors.
172 Except where otherwise indicated,
174 functions may sleep, and are not appropriate for use in an
176 context or while holding non-sleepable kernel locks.
177 .Ss Creating and Destroying Sockets
178 A new socket may be created using
182 arguments specify the requested domain, type, and protocol via
186 The socket is returned via
189 In addition, the credential used to authorize operations associated with the
190 socket will be passed via
192 (and will be cached for the lifetime of the socket), and the thread
193 performing the operation via
196 authorization of the socket creation operation will be performed
197 using the thread credential for some protocols (such as raw sockets).
199 Sockets may be closed and freed using
201 which has similar semantics to
204 In certain circumstances, it is appropriate to destroy a socket without
205 waiting for it to disconnect, for which
208 This is only appropriate for incoming connections which are in a
209 partially connected state.
210 It must be called on an unreferenced socket, by the thread which
211 removed the socket from its listen queue, to prevent races.
212 It will call into protocol code, so no socket locks may be held
216 is responsible for setting the VNET context.
217 The normal path to freeing a socket is
219 which handles reference counting on the socket.
220 It should be called whenever a reference is released, and also whenever
221 reference flags are cleared in socket or protocol code.
224 should not be made from outside the socket layer; outside callers
228 .Ss Connections and Addresses
231 function is equivalent to the
233 system call, and binds the socket
237 The operation would be authorized using the credential on thread
242 function is equivalent to the
244 system call, and initiates a connection on the socket
248 The operation will be authorized using the credential on thread
250 Unlike the user system call,
252 returns immediately; the caller may
256 while holding the socket mutex and waiting for the
263 fails, the caller must manually clear the
269 disconnects the socket without closing it.
273 function is equivalent to the
275 system call, and causes part or all of a connection on a socket to be closed
278 Sockets are transitioned from non-listening status to listening with
283 function is equivalent to the
285 system call, and retrieves a socket option on socket
289 function is equivalent to the
291 system call, and sets a socket option on socket
294 The second argument in both
302 describing the socket option operation.
303 The caller-allocated structure must be zeroed, and then have its fields
304 initialized to specify socket option operation arguments:
305 .Bl -tag -width ".Va sopt_valsize"
311 depending on whether this is a get or set operation.
313 Specify the level in the network stack the operation is targeted at; for
317 Specify the name of the socket option to set.
319 Kernel space pointer to the argument value for the socket option.
321 Size of the argument value in bytes.
324 In order for the owner of a socket to be notified when the socket
325 is ready to send or receive data, an upcall may be registered on
327 The upcall is a function that will be called by the socket framework
328 when a socket buffer associated with the given socket is ready for
331 is used to register a socket upcall.
334 is registered, and the pointer
336 will be passed as its second argument when it is called by the framework.
337 The possible values for
343 which register upcalls for receive and send events, respectively.
350 depending on whether or not a call to
352 should be made by the socket framework after the upcall returns.
357 itself due to lock ordering with the socket buffer lock.
360 upcalls should return
366 the upcall will be removed from the socket.
368 Upcalls are removed from their socket by
372 argument again specifies whether the sending or receiving upcall is to
377 .Ss Socket Destructor Callback
378 A kernel system can use the
380 function to set a destructor for a socket.
381 The destructor is called when the socket is is about to be freed.
382 The destructor is called before the protocol detach routine.
383 The destructor can serve as a callback to initiate additional cleanup actions.
387 function is equivalent to the
389 system call, and attempts to receive bytes of data from the socket
391 optionally blocking awaiting for data if none is ready to read.
392 Data may be retrieved directly to kernel or user memory via the
394 argument, or as an mbuf chain returned to the caller via
396 avoiding a data copy.
410 The caller may optionally retrieve a socket address on a protocol with the
412 capability by providing storage via
416 The caller may optionally retrieve control data mbufs via a
420 Optional flags may be passed to
425 argument, and use the same flag name space as the
431 function is equivalent to the
433 system call, and attempts to send bytes of data via the socket
435 optionally blocking if data cannot be immediately sent.
436 Data may be sent directly from kernel or user memory via the
438 argument, or as an mbuf chain via
440 avoiding a data copy.
447 An optional destination address may be specified via a
450 argument, which may result in an implicit connect if supported by the
452 The caller may optionally send control data mbufs via a
456 Flags may be passed to
460 argument, and use the same flag name space as the
464 Kernel callers running in
466 context, or with a mutex held, will wish to use non-blocking sockets and pass
469 flag in order to prevent these functions from sleeping.
471 A socket can be queried for readability, writability, out-of-band data,
474 The possible values for
492 pass through to the protocol's accept routine to accept an incoming connection.
493 .Ss Socket Utility Functions
494 The uid of a socket's credential may be compared against a
499 A copy of an existing
504 Protocol implementations notify the socket layer of the arrival of
505 out-of-band data using
507 so that the socket layer can notify socket consumers of the available data.
515 suitable for isolating user code from changes in the kernel structure.
516 .Ss Protocol Implementations
517 Protocols must supply an implementation for
519 such protocol implementations can call back into the socket layer using
520 .Fn solisten_proto_check
523 to check and set the socket-layer listen state.
524 These callbacks are provided so that the protocol implementation
525 can order the socket layer and protocol locks as necessary.
526 Protocols must supply an implementation of
529 .Fn soreceive_stream ,
530 .Fn soreceive_dgram ,
532 .Fn soreceive_generic
533 are supplied for use by such implementations.
535 Protocol implementations can use
537 to create a socket and attach protocol state to that socket.
538 This can be used to create new sockets available for
541 The returned socket has a reference count of zero.
543 Protocols must supply an implementation for
546 is provided for the use by protocol implementations.
552 are supplied to assist in protocol implementations of
555 When a protocol creates a new socket structure, it is necessary to
556 reserve socket buffer space for that socket, by calling
558 The rough inverse of this reservation is performed by
560 which is called automatically by the socket framework.
562 When a protocol needs to wake up threads waiting for the socket to
563 become ready to read or write, variants of
568 function should not be called directly by protocol code, instead use the
571 .Fn sorwakeup_locked ,
575 for readers and writers, with the corresponding socket buffer lock
576 not already locked, or already held, respectively.
582 are useful for transferring
584 data between user and kernel code.
602 system call appeared in
604 This manual page was introduced in
607 This manual page was written by
612 The use of explicitly passed credentials, credentials hung from explicitly
613 passed threads, the credential on
615 and the cached credential from
616 socket creation time is inconsistent, and may lead to unexpected behaviour.
617 It is possible that several of the
621 arguments, or simply not be present at all.
623 The caller may need to manually clear
631 flag is not implemented for
633 and may not always work with
635 when zero copy sockets are enabled.
637 This manual page does not describe how to register socket upcalls or monitor
638 a socket for readability/writability without using blocking I/O.
644 functions are not described, and in most cases should not be used, due to
645 confusing and potentially incorrect interactions when