2 * Copyright (C) 2004-2006, 2008 Internet Systems Consortium, Inc. ("ISC")
3 * Copyright (C) 1998-2002 Internet Software Consortium.
5 * Permission to use, copy, modify, and/or distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
18 /* $Id: socket.h,v 1.57.18.15 2008/09/04 08:03:08 marka Exp $ */
21 #define ISC_SOCKET_H 1
28 * \brief Provides TCP and UDP sockets for network I/O. The sockets are event
29 * sources in the task system.
31 * When I/O completes, a completion event for the socket is posted to the
32 * event queue of the task which requested the I/O.
35 * The module ensures appropriate synchronization of data structures it
36 * creates and manipulates.
37 * Clients of this module must not be holding a socket's task's lock when
38 * making a call that affects that socket. Failure to follow this rule
39 * can result in deadlock.
40 * The caller must ensure that isc_socketmgr_destroy() is called only
41 * once for a given manager.
44 * No anticipated impact.
50 * No anticipated impact.
61 #include <isc/types.h>
62 #include <isc/event.h>
63 #include <isc/eventclass.h>
65 #include <isc/region.h>
66 #include <isc/sockaddr.h>
75 * Maximum number of buffers in a scatter/gather read/write. The operating
76 * system in use must support at least this number (plus one on some.)
78 #define ISC_SOCKET_MAXSCATTERGATHER 8
81 * In isc_socket_bind() set socket option SO_REUSEADDR prior to calling
82 * bind() if a non zero port is specified (AF_INET and AF_INET6).
84 #define ISC_SOCKET_REUSEADDRESS 0x01U
90 struct isc_socketevent {
91 ISC_EVENT_COMMON(isc_socketevent_t);
92 isc_result_t result; /*%< OK, EOF, whatever else */
93 unsigned int minimum; /*%< minimum i/o for event */
94 unsigned int n; /*%< bytes read or written */
95 unsigned int offset; /*%< offset into buffer list */
96 isc_region_t region; /*%< for single-buffer i/o */
97 isc_bufferlist_t bufferlist; /*%< list of buffers */
98 isc_sockaddr_t address; /*%< source address */
99 isc_time_t timestamp; /*%< timestamp of packet recv */
100 struct in6_pktinfo pktinfo; /*%< ipv6 pktinfo */
101 isc_uint32_t attributes; /*%< see below */
102 isc_eventdestructor_t destroy; /*%< original destructor */
105 typedef struct isc_socket_newconnev isc_socket_newconnev_t;
106 struct isc_socket_newconnev {
107 ISC_EVENT_COMMON(isc_socket_newconnev_t);
108 isc_socket_t * newsocket;
109 isc_result_t result; /*%< OK, EOF, whatever else */
110 isc_sockaddr_t address; /*%< source address */
113 typedef struct isc_socket_connev isc_socket_connev_t;
114 struct isc_socket_connev {
115 ISC_EVENT_COMMON(isc_socket_connev_t);
116 isc_result_t result; /*%< OK, EOF, whatever else */
121 * _ATTACHED: Internal use only.
122 * _TRUNC: Packet was truncated on receive.
123 * _CTRUNC: Packet control information was truncated. This can
124 * indicate that the packet is not complete, even though
125 * all the data is valid.
126 * _TIMESTAMP: The timestamp member is valid.
127 * _PKTINFO: The pktinfo member is valid.
128 * _MULTICAST: The UDP packet was received via a multicast transmission.
130 #define ISC_SOCKEVENTATTR_ATTACHED 0x80000000U /* internal */
131 #define ISC_SOCKEVENTATTR_TRUNC 0x00800000U /* public */
132 #define ISC_SOCKEVENTATTR_CTRUNC 0x00400000U /* public */
133 #define ISC_SOCKEVENTATTR_TIMESTAMP 0x00200000U /* public */
134 #define ISC_SOCKEVENTATTR_PKTINFO 0x00100000U /* public */
135 #define ISC_SOCKEVENTATTR_MULTICAST 0x00080000U /* public */
138 #define ISC_SOCKEVENT_ANYEVENT (0)
139 #define ISC_SOCKEVENT_RECVDONE (ISC_EVENTCLASS_SOCKET + 1)
140 #define ISC_SOCKEVENT_SENDDONE (ISC_EVENTCLASS_SOCKET + 2)
141 #define ISC_SOCKEVENT_NEWCONN (ISC_EVENTCLASS_SOCKET + 3)
142 #define ISC_SOCKEVENT_CONNECT (ISC_EVENTCLASS_SOCKET + 4)
147 #define ISC_SOCKEVENT_INTR (ISC_EVENTCLASS_SOCKET + 256)
148 #define ISC_SOCKEVENT_INTW (ISC_EVENTCLASS_SOCKET + 257)
151 isc_sockettype_udp = 1,
152 isc_sockettype_tcp = 2,
153 isc_sockettype_unix = 3
158 * How a socket should be shutdown in isc_socket_shutdown() calls.
160 #define ISC_SOCKSHUT_RECV 0x00000001 /*%< close read side */
161 #define ISC_SOCKSHUT_SEND 0x00000002 /*%< close write side */
162 #define ISC_SOCKSHUT_ALL 0x00000003 /*%< close them all */
167 * What I/O events to cancel in isc_socket_cancel() calls.
169 #define ISC_SOCKCANCEL_RECV 0x00000001 /*%< cancel recv */
170 #define ISC_SOCKCANCEL_SEND 0x00000002 /*%< cancel send */
171 #define ISC_SOCKCANCEL_ACCEPT 0x00000004 /*%< cancel accept */
172 #define ISC_SOCKCANCEL_CONNECT 0x00000008 /*%< cancel connect */
173 #define ISC_SOCKCANCEL_ALL 0x0000000f /*%< cancel everything */
178 * Flags for isc_socket_send() and isc_socket_recv() calls.
180 #define ISC_SOCKFLAG_IMMEDIATE 0x00000001 /*%< send event only if needed */
181 #define ISC_SOCKFLAG_NORETRY 0x00000002 /*%< drop failed UDP sends */
185 *** Socket and Socket Manager Functions
187 *** Note: all Ensures conditions apply only if the result is success for
188 *** those functions which return an isc_result.
192 isc_socket_create(isc_socketmgr_t *manager,
194 isc_sockettype_t type,
195 isc_socket_t **socketp);
197 * Create a new 'type' socket managed by 'manager'.
201 *\li 'pf' is the desired protocol family, e.g. PF_INET or PF_INET6.
205 *\li 'manager' is a valid manager
207 *\li 'socketp' is a valid pointer, and *socketp == NULL
211 * '*socketp' is attached to the newly created socket
217 *\li #ISC_R_NORESOURCES
218 *\li #ISC_R_UNEXPECTED
222 isc_socket_cancel(isc_socket_t *sock, isc_task_t *task,
225 * Cancel pending I/O of the type specified by "how".
227 * Note: if "task" is NULL, then the cancel applies to all tasks using the
232 * \li "socket" is a valid socket
234 * \li "task" is NULL or a valid task
236 * "how" is a bitmask describing the type of cancelation to perform.
237 * The type ISC_SOCKCANCEL_ALL will cancel all pending I/O on this
240 * \li ISC_SOCKCANCEL_RECV:
241 * Cancel pending isc_socket_recv() calls.
243 * \li ISC_SOCKCANCEL_SEND:
244 * Cancel pending isc_socket_send() and isc_socket_sendto() calls.
246 * \li ISC_SOCKCANCEL_ACCEPT:
247 * Cancel pending isc_socket_accept() calls.
249 * \li ISC_SOCKCANCEL_CONNECT:
250 * Cancel pending isc_socket_connect() call.
254 isc_socket_shutdown(isc_socket_t *sock, unsigned int how);
256 * Shutdown 'socket' according to 'how'.
260 * \li 'socket' is a valid socket.
262 * \li 'task' is NULL or is a valid task.
264 * \li If 'how' is 'ISC_SOCKSHUT_RECV' or 'ISC_SOCKSHUT_ALL' then
266 * The read queue must be empty.
268 * No further read requests may be made.
270 * \li If 'how' is 'ISC_SOCKSHUT_SEND' or 'ISC_SOCKSHUT_ALL' then
272 * The write queue must be empty.
274 * No further write requests may be made.
278 isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp);
280 * Attach *socketp to socket.
284 * \li 'socket' is a valid socket.
286 * \li 'socketp' points to a NULL socket.
290 * \li *socketp is attached to socket.
294 isc_socket_detach(isc_socket_t **socketp);
296 * Detach *socketp from its socket.
300 * \li 'socketp' points to a valid socket.
302 * \li If '*socketp' is the last reference to the socket,
305 * There must be no pending I/O requests.
309 * \li *socketp is NULL.
311 * \li If '*socketp' is the last reference to the socket,
314 * The socket will be shutdown (both reading and writing)
317 * All resources used by the socket have been freed
321 isc_socket_open(isc_socket_t *sock);
323 * Open a new socket file descriptor of the given socket structure. It simply
324 * opens a new descriptor; all of the other parameters including the socket
325 * type are inherited from the existing socket. This function is provided to
326 * avoid overhead of destroying and creating sockets when many short-lived
327 * sockets are frequently opened and closed. When the efficiency is not an
328 * issue, it should be safer to detach the unused socket and re-create a new
329 * one. This optimization may not be available for some systems, in which
330 * case this function will return ISC_R_NOTIMPLEMENTED and must not be used.
334 * \li there must be no other reference to this socket.
336 * \li 'socket' is a valid and previously closed by isc_socket_close()
339 * Same as isc_socket_create().
340 * \li ISC_R_NOTIMPLEMENTED
344 isc_socket_close(isc_socket_t *sock);
346 * Close a socket file descriptor of the given socket structure. This function
347 * is provided as an alternative to destroying an unused socket when overhead
348 * destroying/re-creating sockets can be significant, and is expected to be
349 * used with isc_socket_open(). This optimization may not be available for some
350 * systems, in which case this function will return ISC_R_NOTIMPLEMENTED and
355 * \li The socket must have a valid descriptor.
357 * \li There must be no other reference to this socket.
359 * \li There must be no pending I/O requests.
362 * \li #ISC_R_NOTIMPLEMENTED
366 isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *addressp,
367 unsigned int options);
369 * Bind 'socket' to '*addressp'.
373 * \li 'socket' is a valid socket
375 * \li 'addressp' points to a valid isc_sockaddr.
381 * \li ISC_R_ADDRNOTAVAIL
382 * \li ISC_R_ADDRINUSE
384 * \li ISC_R_UNEXPECTED
388 isc_socket_filter(isc_socket_t *sock, const char *filter);
390 * Inform the kernel that it should perform accept filtering.
391 * If filter is NULL the current filter will be removed.:w
395 isc_socket_listen(isc_socket_t *sock, unsigned int backlog);
397 * Set listen mode on the socket. After this call, the only function that
398 * can be used (other than attach and detach) is isc_socket_accept().
402 * \li 'backlog' is as in the UNIX system call listen() and may be
403 * ignored by non-UNIX implementations.
405 * \li If 'backlog' is zero, a reasonable system default is used, usually
410 * \li 'socket' is a valid, bound TCP socket or a valid, bound UNIX socket.
415 * \li ISC_R_UNEXPECTED
419 isc_socket_accept(isc_socket_t *sock,
420 isc_task_t *task, isc_taskaction_t action, const void *arg);
422 * Queue accept event. When a new connection is received, the task will
423 * get an ISC_SOCKEVENT_NEWCONN event with the sender set to the listen
424 * socket. The new socket structure is sent inside the isc_socket_newconnev_t
425 * event type, and is attached to the task 'task'.
428 * \li 'socket' is a valid TCP socket that isc_socket_listen() was called
431 * \li 'task' is a valid task
433 * \li 'action' is a valid action
438 * \li ISC_R_UNEXPECTED
442 isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addressp,
443 isc_task_t *task, isc_taskaction_t action,
446 * Connect 'socket' to peer with address *saddr. When the connection
447 * succeeds, or when an error occurs, a CONNECT event with action 'action'
448 * and arg 'arg' will be posted to the event queue for 'task'.
452 * \li 'socket' is a valid TCP socket
454 * \li 'addressp' points to a valid isc_sockaddr
456 * \li 'task' is a valid task
458 * \li 'action' is a valid action
464 * \li ISC_R_UNEXPECTED
466 * Posted event's result code:
470 * \li ISC_R_CONNREFUSED
471 * \li ISC_R_NETUNREACH
472 * \li ISC_R_UNEXPECTED
476 isc_socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp);
478 * Get the name of the peer connected to 'socket'.
482 * \li 'socket' is a valid TCP socket.
488 * \li ISC_R_UNEXPECTED
492 isc_socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp);
494 * Get the name of 'socket'.
498 * \li 'socket' is a valid socket.
504 * \li ISC_R_UNEXPECTED
509 isc_socket_recv(isc_socket_t *sock, isc_region_t *region,
510 unsigned int minimum,
511 isc_task_t *task, isc_taskaction_t action, const void *arg);
513 isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
514 unsigned int minimum,
515 isc_task_t *task, isc_taskaction_t action, const void *arg);
518 isc_socket_recv2(isc_socket_t *sock, isc_region_t *region,
519 unsigned int minimum, isc_task_t *task,
520 isc_socketevent_t *event, unsigned int flags);
523 * Receive from 'socket', storing the results in region.
527 *\li Let 'length' refer to the length of 'region' or to the sum of all
528 * available regions in the list of buffers '*buflist'.
530 *\li If 'minimum' is non-zero and at least that many bytes are read,
531 * the completion event will be posted to the task 'task.' If minimum
532 * is zero, the exact number of bytes requested in the region must
533 * be read for an event to be posted. This only makes sense for TCP
534 * connections, and is always set to 1 byte for UDP.
536 *\li The read will complete when the desired number of bytes have been
537 * read, if end-of-input occurs, or if an error occurs. A read done
538 * event with the given 'action' and 'arg' will be posted to the
539 * event queue of 'task'.
541 *\li The caller may not modify 'region', the buffers which are passed
542 * into this function, or any data they refer to until the completion
545 *\li For isc_socket_recvv():
546 * On successful completion, '*buflist' will be empty, and the list of
547 * all buffers will be returned in the done event's 'bufferlist'
548 * member. On error return, '*buflist' will be unchanged.
550 *\li For isc_socket_recv2():
551 * 'event' is not NULL, and the non-socket specific fields are
552 * expected to be initialized.
554 *\li For isc_socket_recv2():
555 * The only defined value for 'flags' is ISC_SOCKFLAG_IMMEDIATE. If
556 * set and the operation completes, the return value will be
557 * ISC_R_SUCCESS and the event will be filled in and not sent. If the
558 * operation does not complete, the return value will be
559 * ISC_R_INPROGRESS and the event will be sent when the operation
564 *\li 'socket' is a valid, bound socket.
566 *\li For isc_socket_recv():
567 * 'region' is a valid region
569 *\li For isc_socket_recvv():
570 * 'buflist' is non-NULL, and '*buflist' contain at least one buffer.
572 *\li 'task' is a valid task
574 *\li For isc_socket_recv() and isc_socket_recvv():
575 * action != NULL and is a valid action
577 *\li For isc_socket_recv2():
583 *\li #ISC_R_INPROGRESS
585 *\li #ISC_R_UNEXPECTED
590 *\li #ISC_R_UNEXPECTED
591 *\li XXX needs other net-type errors
597 isc_socket_send(isc_socket_t *sock, isc_region_t *region,
598 isc_task_t *task, isc_taskaction_t action, const void *arg);
600 isc_socket_sendto(isc_socket_t *sock, isc_region_t *region,
601 isc_task_t *task, isc_taskaction_t action, const void *arg,
602 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo);
604 isc_socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist,
605 isc_task_t *task, isc_taskaction_t action, const void *arg);
607 isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
608 isc_task_t *task, isc_taskaction_t action, const void *arg,
609 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo);
611 isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region,
613 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
614 isc_socketevent_t *event, unsigned int flags);
617 * Send the contents of 'region' to the socket's peer.
621 *\li Shutting down the requestor's task *may* result in any
622 * still pending writes being dropped or completed, depending on the
623 * underlying OS implementation.
625 *\li If 'action' is NULL, then no completion event will be posted.
627 *\li The caller may not modify 'region', the buffers which are passed
628 * into this function, or any data they refer to until the completion
631 *\li For isc_socket_sendv() and isc_socket_sendtov():
632 * On successful completion, '*buflist' will be empty, and the list of
633 * all buffers will be returned in the done event's 'bufferlist'
634 * member. On error return, '*buflist' will be unchanged.
636 *\li For isc_socket_sendto2():
637 * 'event' is not NULL, and the non-socket specific fields are
638 * expected to be initialized.
640 *\li For isc_socket_sendto2():
641 * The only defined values for 'flags' are ISC_SOCKFLAG_IMMEDIATE
642 * and ISC_SOCKFLAG_NORETRY.
644 *\li If ISC_SOCKFLAG_IMMEDIATE is set and the operation completes, the
645 * return value will be ISC_R_SUCCESS and the event will be filled
646 * in and not sent. If the operation does not complete, the return
647 * value will be ISC_R_INPROGRESS and the event will be sent when
648 * the operation completes.
650 *\li ISC_SOCKFLAG_NORETRY can only be set for UDP sockets. If set
651 * and the send operation fails due to a transient error, the send
652 * will not be retried and the error will be indicated in the event.
653 * Using this option along with ISC_SOCKFLAG_IMMEDIATE allows the caller
654 * to specify a region that is allocated on the stack.
658 *\li 'socket' is a valid, bound socket.
660 *\li For isc_socket_send():
661 * 'region' is a valid region
663 *\li For isc_socket_sendv() and isc_socket_sendtov():
664 * 'buflist' is non-NULL, and '*buflist' contain at least one buffer.
666 *\li 'task' is a valid task
668 *\li For isc_socket_sendv(), isc_socket_sendtov(), isc_socket_send(), and
669 * isc_socket_sendto():
670 * action == NULL or is a valid action
672 *\li For isc_socket_sendto2():
678 *\li #ISC_R_INPROGRESS
680 *\li #ISC_R_UNEXPECTED
685 *\li #ISC_R_UNEXPECTED
686 *\li XXX needs other net-type errors
691 isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp);
694 isc_socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp,
695 unsigned int maxsocks);
697 * Create a socket manager. If "maxsocks" is non-zero, it specifies the
698 * maximum number of sockets that the created manager should handle.
699 * isc_socketmgr_create() is equivalent of isc_socketmgr_create2() with
700 * "maxsocks" being zero.
704 *\li All memory will be allocated in memory context 'mctx'.
708 *\li 'mctx' is a valid memory context.
710 *\li 'managerp' points to a NULL isc_socketmgr_t.
714 *\li '*managerp' is a valid isc_socketmgr_t.
720 *\li #ISC_R_UNEXPECTED
721 *\li #ISC_R_NOTIMPLEMENTED
725 isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp);
727 * Returns in "*nsockp" the maximum number of sockets this manager may open.
731 *\li '*manager' is a valid isc_socketmgr_t.
732 *\li 'nsockp' is not NULL.
737 *\li #ISC_R_NOTIMPLEMENTED
741 isc_socketmgr_destroy(isc_socketmgr_t **managerp);
743 * Destroy a socket manager.
747 *\li This routine blocks until there are no sockets left in the manager,
748 * so if the caller holds any socket references using the manager, it
749 * must detach them before calling isc_socketmgr_destroy() or it will
754 *\li '*managerp' is a valid isc_socketmgr_t.
756 *\li All sockets managed by this manager are fully detached.
760 *\li *managerp == NULL
762 *\li All resources used by the manager have been freed.
766 isc_socket_gettype(isc_socket_t *sock);
768 * Returns the socket type for "sock."
772 *\li "sock" is a valid socket.
777 isc_socket_isbound(isc_socket_t *sock);
780 isc_socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes);
782 * If the socket is an IPv6 socket set/clear the IPV6_IPV6ONLY socket
783 * option if the host OS supports this option.
786 *\li 'sock' is a valid socket.
791 isc_socket_cleanunix(isc_sockaddr_t *addr, isc_boolean_t active);
794 * Cleanup UNIX domain sockets in the file-system. If 'active' is true
795 * then just unlink the socket. If 'active' is false try to determine
796 * if there is a listener of the socket or not. If no listener is found
797 * then unlink socket.
799 * Prior to unlinking the path is tested to see if it a socket.
801 * Note: there are a number of race conditions which cannot be avoided
802 * both in the filesystem and any application using UNIX domain
803 * sockets (e.g. socket is tested between bind() and listen(),
804 * the socket is deleted and replaced in the file-system between
805 * stat() and unlink()).
809 isc_socket_permunix(isc_sockaddr_t *sockaddr, isc_uint32_t perm,
810 isc_uint32_t owner, isc_uint32_t group);
812 * Set ownership and file permissions on the UNIX domain socket.
814 * Note: On Solaris and SunOS this secures the directory containing
815 * the socket as Solaris and SunOS do not honour the filesytem
816 * permissions on the socket.
819 * \li 'sockaddr' to be a valid UNIX domain sockaddr.
827 isc__socketmgr_setreserved(isc_socketmgr_t *mgr, isc_uint32_t);
829 * Temporary. For use by named only.
834 #endif /* ISC_SOCKET_H */