2 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2005-2014 Intel Corporation. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #if !defined(RDMA_CMA_H)
37 #include <netinet/in.h>
38 #include <sys/socket.h>
39 #include <infiniband/verbs.h>
40 #include <infiniband/sa.h>
47 * Upon receiving a device removal event, users must destroy the associated
48 * RDMA identifier and release all resources allocated with the device.
50 enum rdma_cm_event_type {
51 RDMA_CM_EVENT_ADDR_RESOLVED,
52 RDMA_CM_EVENT_ADDR_ERROR,
53 RDMA_CM_EVENT_ROUTE_RESOLVED,
54 RDMA_CM_EVENT_ROUTE_ERROR,
55 RDMA_CM_EVENT_CONNECT_REQUEST,
56 RDMA_CM_EVENT_CONNECT_RESPONSE,
57 RDMA_CM_EVENT_CONNECT_ERROR,
58 RDMA_CM_EVENT_UNREACHABLE,
59 RDMA_CM_EVENT_REJECTED,
60 RDMA_CM_EVENT_ESTABLISHED,
61 RDMA_CM_EVENT_DISCONNECTED,
62 RDMA_CM_EVENT_DEVICE_REMOVAL,
63 RDMA_CM_EVENT_MULTICAST_JOIN,
64 RDMA_CM_EVENT_MULTICAST_ERROR,
65 RDMA_CM_EVENT_ADDR_CHANGE,
66 RDMA_CM_EVENT_TIMEWAIT_EXIT
69 enum rdma_port_space {
70 RDMA_PS_IPOIB = 0x0002,
76 #define RDMA_IB_IP_PS_MASK 0xFFFFFFFFFFFF0000ULL
77 #define RDMA_IB_IP_PORT_MASK 0x000000000000FFFFULL
78 #define RDMA_IB_IP_PS_TCP 0x0000000001060000ULL
79 #define RDMA_IB_IP_PS_UDP 0x0000000001110000ULL
80 #define RDMA_IB_PS_IB 0x00000000013F0000ULL
83 * Global qkey value for UDP QPs and multicast groups created via the
86 #define RDMA_UDP_QKEY 0x01234567
96 struct sockaddr src_addr;
97 struct sockaddr_in src_sin;
98 struct sockaddr_in6 src_sin6;
99 struct sockaddr_storage src_storage;
102 struct sockaddr dst_addr;
103 struct sockaddr_in dst_sin;
104 struct sockaddr_in6 dst_sin6;
105 struct sockaddr_storage dst_storage;
108 struct rdma_ib_addr ibaddr;
113 struct rdma_addr addr;
114 struct ibv_sa_path_rec *path_rec;
118 struct rdma_event_channel {
123 struct ibv_context *verbs;
124 struct rdma_event_channel *channel;
127 struct rdma_route route;
128 enum rdma_port_space ps;
130 struct rdma_cm_event *event;
131 struct ibv_comp_channel *send_cq_channel;
132 struct ibv_cq *send_cq;
133 struct ibv_comp_channel *recv_cq_channel;
134 struct ibv_cq *recv_cq;
137 enum ibv_qp_type qp_type;
141 RDMA_MAX_RESP_RES = 0xFF,
142 RDMA_MAX_INIT_DEPTH = 0xFF
145 struct rdma_conn_param {
146 const void *private_data;
147 uint8_t private_data_len;
148 uint8_t responder_resources;
149 uint8_t initiator_depth;
150 uint8_t flow_control;
151 uint8_t retry_count; /* ignored when accepting */
152 uint8_t rnr_retry_count;
153 /* Fields below ignored if a QP is created on the rdma_cm_id. */
158 struct rdma_ud_param {
159 const void *private_data;
160 uint8_t private_data_len;
161 struct ibv_ah_attr ah_attr;
166 struct rdma_cm_event {
167 struct rdma_cm_id *id;
168 struct rdma_cm_id *listen_id;
169 enum rdma_cm_event_type event;
172 struct rdma_conn_param conn;
173 struct rdma_ud_param ud;
177 #define RAI_PASSIVE 0x00000001
178 #define RAI_NUMERICHOST 0x00000002
179 #define RAI_NOROUTE 0x00000004
180 #define RAI_FAMILY 0x00000008
182 struct rdma_addrinfo {
187 socklen_t ai_src_len;
188 socklen_t ai_dst_len;
189 struct sockaddr *ai_src_addr;
190 struct sockaddr *ai_dst_addr;
191 char *ai_src_canonname;
192 char *ai_dst_canonname;
195 size_t ai_connect_len;
197 struct rdma_addrinfo *ai_next;
201 * rdma_create_event_channel - Open a channel used to report communication events.
203 * Asynchronous events are reported to users through event channels. Each
204 * event channel maps to a file descriptor.
206 * All created event channels must be destroyed by calling
207 * rdma_destroy_event_channel. Users should call rdma_get_cm_event to
208 * retrieve events on an event channel.
210 * rdma_get_cm_event, rdma_destroy_event_channel
212 struct rdma_event_channel *rdma_create_event_channel(void);
215 * rdma_destroy_event_channel - Close an event communication channel.
216 * @channel: The communication channel to destroy.
218 * Release all resources associated with an event channel and closes the
219 * associated file descriptor.
221 * All rdma_cm_id's associated with the event channel must be destroyed,
222 * and all returned events must be acked before calling this function.
224 * rdma_create_event_channel, rdma_get_cm_event, rdma_ack_cm_event
226 void rdma_destroy_event_channel(struct rdma_event_channel *channel);
229 * rdma_create_id - Allocate a communication identifier.
230 * @channel: The communication channel that events associated with the
231 * allocated rdma_cm_id will be reported on.
232 * @id: A reference where the allocated communication identifier will be
234 * @context: User specified context associated with the rdma_cm_id.
235 * @ps: RDMA port space.
237 * Creates an identifier that is used to track communication information.
239 * Rdma_cm_id's are conceptually equivalent to a socket for RDMA
240 * communication. The difference is that RDMA communication requires
241 * explicitly binding to a specified RDMA device before communication
242 * can occur, and most operations are asynchronous in nature. Communication
243 * events on an rdma_cm_id are reported through the associated event
244 * channel. Users must release the rdma_cm_id by calling rdma_destroy_id.
246 * rdma_create_event_channel, rdma_destroy_id, rdma_get_devices,
247 * rdma_bind_addr, rdma_resolve_addr, rdma_connect, rdma_listen,
249 int rdma_create_id(struct rdma_event_channel *channel,
250 struct rdma_cm_id **id, void *context,
251 enum rdma_port_space ps);
254 * rdma_create_ep - Allocate a communication identifier and qp.
255 * @id: A reference where the allocated communication identifier will be
257 * @res: Result from rdma_getaddrinfo, which specifies the source and
258 * destination addresses, plus optional routing and connection information.
259 * @pd: Optional protection domain. This parameter is ignored if qp_init_attr
261 * @qp_init_attr: Optional attributes for a QP created on the rdma_cm_id.
263 * Create an identifier and option QP used for communication.
265 * If qp_init_attr is provided, then a queue pair will be allocated and
266 * associated with the rdma_cm_id. If a pd is provided, the QP will be
267 * created on that PD. Otherwise, the QP will be allocated on a default
269 * The rdma_cm_id will be set to use synchronous operations (connect,
270 * listen, and get_request). To convert to asynchronous operation, the
271 * rdma_cm_id should be migrated to a user allocated event channel.
273 * rdma_create_id, rdma_create_qp, rdma_migrate_id, rdma_connect,
276 int rdma_create_ep(struct rdma_cm_id **id, struct rdma_addrinfo *res,
277 struct ibv_pd *pd, struct ibv_qp_init_attr *qp_init_attr);
280 * rdma_destroy_ep - Deallocates a communication identifier and qp.
281 * @id: The communication identifier to destroy.
283 * Destroys the specified rdma_cm_id and any associated QP created
288 void rdma_destroy_ep(struct rdma_cm_id *id);
291 * rdma_destroy_id - Release a communication identifier.
292 * @id: The communication identifier to destroy.
294 * Destroys the specified rdma_cm_id and cancels any outstanding
295 * asynchronous operation.
297 * Users must free any associated QP with the rdma_cm_id before
298 * calling this routine and ack an related events.
300 * rdma_create_id, rdma_destroy_qp, rdma_ack_cm_event
302 int rdma_destroy_id(struct rdma_cm_id *id);
305 * rdma_bind_addr - Bind an RDMA identifier to a source address.
306 * @id: RDMA identifier.
307 * @addr: Local address information. Wildcard values are permitted.
309 * Associates a source address with an rdma_cm_id. The address may be
310 * wildcarded. If binding to a specific local address, the rdma_cm_id
311 * will also be bound to a local RDMA device.
313 * Typically, this routine is called before calling rdma_listen to bind
314 * to a specific port number, but it may also be called on the active side
315 * of a connection before calling rdma_resolve_addr to bind to a specific
318 * rdma_create_id, rdma_listen, rdma_resolve_addr, rdma_create_qp
320 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr);
323 * rdma_resolve_addr - Resolve destination and optional source addresses.
324 * @id: RDMA identifier.
325 * @src_addr: Source address information. This parameter may be NULL.
326 * @dst_addr: Destination address information.
327 * @timeout_ms: Time to wait for resolution to complete.
329 * Resolve destination and optional source addresses from IP addresses
330 * to an RDMA address. If successful, the specified rdma_cm_id will
331 * be bound to a local device.
333 * This call is used to map a given destination IP address to a usable RDMA
334 * address. If a source address is given, the rdma_cm_id is bound to that
335 * address, the same as if rdma_bind_addr were called. If no source
336 * address is given, and the rdma_cm_id has not yet been bound to a device,
337 * then the rdma_cm_id will be bound to a source address based on the
338 * local routing tables. After this call, the rdma_cm_id will be bound to
339 * an RDMA device. This call is typically made from the active side of a
340 * connection before calling rdma_resolve_route and rdma_connect.
342 * rdma_create_id, rdma_resolve_route, rdma_connect, rdma_create_qp,
343 * rdma_get_cm_event, rdma_bind_addr
345 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
346 struct sockaddr *dst_addr, int timeout_ms);
349 * rdma_resolve_route - Resolve the route information needed to establish a connection.
350 * @id: RDMA identifier.
351 * @timeout_ms: Time to wait for resolution to complete.
353 * Resolves an RDMA route to the destination address in order to establish
354 * a connection. The destination address must have already been resolved
355 * by calling rdma_resolve_addr.
357 * This is called on the client side of a connection after calling
358 * rdma_resolve_addr, but before calling rdma_connect.
360 * rdma_resolve_addr, rdma_connect, rdma_get_cm_event
362 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms);
365 * rdma_create_qp - Allocate a QP.
366 * @id: RDMA identifier.
367 * @pd: Optional protection domain for the QP.
368 * @qp_init_attr: initial QP attributes.
370 * Allocate a QP associated with the specified rdma_cm_id and transition it
371 * for sending and receiving.
373 * The rdma_cm_id must be bound to a local RDMA device before calling this
374 * function, and the protection domain must be for that same device.
375 * QPs allocated to an rdma_cm_id are automatically transitioned by the
376 * librdmacm through their states. After being allocated, the QP will be
377 * ready to handle posting of receives. If the QP is unconnected, it will
378 * be ready to post sends.
379 * If pd is NULL, then the QP will be allocated using a default protection
380 * domain associated with the underlying RDMA device.
382 * rdma_bind_addr, rdma_resolve_addr, rdma_destroy_qp, ibv_create_qp,
385 int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd,
386 struct ibv_qp_init_attr *qp_init_attr);
387 int rdma_create_qp_ex(struct rdma_cm_id *id,
388 struct ibv_qp_init_attr_ex *qp_init_attr);
391 * rdma_destroy_qp - Deallocate a QP.
392 * @id: RDMA identifier.
394 * Destroy a QP allocated on the rdma_cm_id.
396 * Users must destroy any QP associated with an rdma_cm_id before
399 * rdma_create_qp, rdma_destroy_id, ibv_destroy_qp
401 void rdma_destroy_qp(struct rdma_cm_id *id);
404 * rdma_connect - Initiate an active connection request.
405 * @id: RDMA identifier.
406 * @conn_param: optional connection parameters.
408 * For a connected rdma_cm_id, this call initiates a connection request
409 * to a remote destination. For an unconnected rdma_cm_id, it initiates
410 * a lookup of the remote QP providing the datagram service.
412 * Users must have resolved a route to the destination address
413 * by having called rdma_resolve_route before calling this routine.
414 * A user may override the default connection parameters and exchange
415 * private data as part of the connection by using the conn_param parameter.
417 * rdma_resolve_route, rdma_disconnect, rdma_listen, rdma_get_cm_event
419 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
422 * rdma_listen - Listen for incoming connection requests.
423 * @id: RDMA identifier.
424 * @backlog: backlog of incoming connection requests.
426 * Initiates a listen for incoming connection requests or datagram service
427 * lookup. The listen will be restricted to the locally bound source
430 * Users must have bound the rdma_cm_id to a local address by calling
431 * rdma_bind_addr before calling this routine. If the rdma_cm_id is
432 * bound to a specific IP address, the listen will be restricted to that
433 * address and the associated RDMA device. If the rdma_cm_id is bound
434 * to an RDMA port number only, the listen will occur across all RDMA
437 * rdma_bind_addr, rdma_connect, rdma_accept, rdma_reject, rdma_get_cm_event
439 int rdma_listen(struct rdma_cm_id *id, int backlog);
444 int rdma_get_request(struct rdma_cm_id *listen, struct rdma_cm_id **id);
447 * rdma_accept - Called to accept a connection request.
448 * @id: Connection identifier associated with the request.
449 * @conn_param: Optional information needed to establish the connection.
451 * Called from the listening side to accept a connection or datagram
452 * service lookup request.
454 * Unlike the socket accept routine, rdma_accept is not called on a
455 * listening rdma_cm_id. Instead, after calling rdma_listen, the user
456 * waits for a connection request event to occur. Connection request
457 * events give the user a newly created rdma_cm_id, similar to a new
458 * socket, but the rdma_cm_id is bound to a specific RDMA device.
459 * rdma_accept is called on the new rdma_cm_id.
460 * A user may override the default connection parameters and exchange
461 * private data as part of the connection by using the conn_param parameter.
463 * rdma_listen, rdma_reject, rdma_get_cm_event
465 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
468 * rdma_reject - Called to reject a connection request.
469 * @id: Connection identifier associated with the request.
470 * @private_data: Optional private data to send with the reject message.
471 * @private_data_len: Size of the private_data to send, in bytes.
473 * Called from the listening side to reject a connection or datagram
474 * service lookup request.
476 * After receiving a connection request event, a user may call rdma_reject
477 * to reject the request. If the underlying RDMA transport supports
478 * private data in the reject message, the specified data will be passed to
481 * rdma_listen, rdma_accept, rdma_get_cm_event
483 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
484 uint8_t private_data_len);
487 * rdma_notify - Notifies the librdmacm of an asynchronous event.
488 * @id: RDMA identifier.
489 * @event: Asynchronous event.
491 * Used to notify the librdmacm of asynchronous events that have occurred
492 * on a QP associated with the rdma_cm_id.
494 * Asynchronous events that occur on a QP are reported through the user's
495 * device event handler. This routine is used to notify the librdmacm of
496 * communication events. In most cases, use of this routine is not
497 * necessary, however if connection establishment is done out of band
498 * (such as done through Infiniband), it's possible to receive data on a
499 * QP that is not yet considered connected. This routine forces the
500 * connection into an established state in this case in order to handle
501 * the rare situation where the connection never forms on its own.
502 * Events that should be reported to the CM are: IB_EVENT_COMM_EST.
504 * rdma_connect, rdma_accept, rdma_listen
506 int rdma_notify(struct rdma_cm_id *id, enum ibv_event_type event);
509 * rdma_disconnect - This function disconnects a connection.
510 * @id: RDMA identifier.
512 * Disconnects a connection and transitions any associated QP to the
515 * rdma_connect, rdma_listen, rdma_accept
517 int rdma_disconnect(struct rdma_cm_id *id);
520 * rdma_join_multicast - Joins a multicast group.
521 * @id: Communication identifier associated with the request.
522 * @addr: Multicast address identifying the group to join.
523 * @context: User-defined context associated with the join request.
525 * Joins a multicast group and attaches an associated QP to the group.
527 * Before joining a multicast group, the rdma_cm_id must be bound to
528 * an RDMA device by calling rdma_bind_addr or rdma_resolve_addr. Use of
529 * rdma_resolve_addr requires the local routing tables to resolve the
530 * multicast address to an RDMA device. The user must call
531 * rdma_leave_multicast to leave the multicast group and release any
532 * multicast resources. The context is returned to the user through
533 * the private_data field in the rdma_cm_event.
535 * rdma_leave_multicast, rdma_bind_addr, rdma_resolve_addr, rdma_create_qp
537 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
541 * rdma_leave_multicast - Leaves a multicast group.
542 * @id: Communication identifier associated with the request.
543 * @addr: Multicast address identifying the group to leave.
545 * Leaves a multicast group and detaches an associated QP from the group.
547 * Calling this function before a group has been fully joined results in
548 * canceling the join operation. Users should be aware that messages
549 * received from the multicast group may stilled be queued for
550 * completion processing immediately after leaving a multicast group.
551 * Destroying an rdma_cm_id will automatically leave all multicast groups.
553 * rdma_join_multicast, rdma_destroy_qp
555 int rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
558 * rdma_get_cm_event - Retrieves the next pending communication event.
559 * @channel: Event channel to check for events.
560 * @event: Allocated information about the next communication event.
562 * Retrieves a communication event. If no events are pending, by default,
563 * the call will block until an event is received.
565 * The default synchronous behavior of this routine can be changed by
566 * modifying the file descriptor associated with the given channel. All
567 * events that are reported must be acknowledged by calling rdma_ack_cm_event.
568 * Destruction of an rdma_cm_id will block until related events have been
571 * rdma_ack_cm_event, rdma_create_event_channel, rdma_event_str
573 int rdma_get_cm_event(struct rdma_event_channel *channel,
574 struct rdma_cm_event **event);
577 * rdma_ack_cm_event - Free a communication event.
578 * @event: Event to be released.
580 * All events which are allocated by rdma_get_cm_event must be released,
581 * there should be a one-to-one correspondence between successful gets
584 * rdma_get_cm_event, rdma_destroy_id
586 int rdma_ack_cm_event(struct rdma_cm_event *event);
588 __be16 rdma_get_src_port(struct rdma_cm_id *id);
589 __be16 rdma_get_dst_port(struct rdma_cm_id *id);
591 static inline struct sockaddr *rdma_get_local_addr(struct rdma_cm_id *id)
593 return &id->route.addr.src_addr;
596 static inline struct sockaddr *rdma_get_peer_addr(struct rdma_cm_id *id)
598 return &id->route.addr.dst_addr;
602 * rdma_get_devices - Get list of RDMA devices currently available.
603 * @num_devices: If non-NULL, set to the number of devices returned.
605 * Return a NULL-terminated array of opened RDMA devices. Callers can use
606 * this routine to allocate resources on specific RDMA devices that will be
607 * shared across multiple rdma_cm_id's.
609 * The returned array must be released by calling rdma_free_devices. Devices
610 * remain opened while the librdmacm is loaded.
614 struct ibv_context **rdma_get_devices(int *num_devices);
617 * rdma_free_devices - Frees the list of devices returned by rdma_get_devices.
618 * @list: List of devices returned from rdma_get_devices.
620 * Frees the device array returned by rdma_get_devices.
624 void rdma_free_devices(struct ibv_context **list);
627 * rdma_event_str - Returns a string representation of an rdma cm event.
628 * @event: Asynchronous event.
630 * Returns a string representation of an asynchronous event.
634 const char *rdma_event_str(enum rdma_cm_event_type event);
644 RDMA_OPTION_ID_TOS = 0, /* uint8_t: RFC 2474 */
645 RDMA_OPTION_ID_REUSEADDR = 1, /* int: ~SO_REUSEADDR */
646 RDMA_OPTION_ID_AFONLY = 2, /* int: ~IPV6_V6ONLY */
647 RDMA_OPTION_IB_PATH = 1 /* struct ibv_path_data[] */
651 * rdma_set_option - Set options for an rdma_cm_id.
652 * @id: Communication identifier to set option for.
653 * @level: Protocol level of the option to set.
654 * @optname: Name of the option to set.
655 * @optval: Reference to the option data.
656 * @optlen: The size of the %optval buffer.
658 int rdma_set_option(struct rdma_cm_id *id, int level, int optname,
659 void *optval, size_t optlen);
662 * rdma_migrate_id - Move an rdma_cm_id to a new event channel.
663 * @id: Communication identifier to migrate.
664 * @channel: New event channel for rdma_cm_id events.
666 int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel);
669 * rdma_getaddrinfo - RDMA address and route resolution service.
671 int rdma_getaddrinfo(const char *node, const char *service,
672 const struct rdma_addrinfo *hints,
673 struct rdma_addrinfo **res);
675 void rdma_freeaddrinfo(struct rdma_addrinfo *res);
681 #endif /* RDMA_CMA_H */