2 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2005-2007 Intel Corporation. All rights reserved.
5 * This software is available to you under a choice of one of two
6 * licenses. You may choose to be licensed under the terms of the GNU
7 * General Public License (GPL) Version 2, available from the file
8 * COPYING in the main directory of this source tree, or the
9 * OpenIB.org BSD license below:
11 * Redistribution and use in source and binary forms, with or
12 * without modification, are permitted provided that the following
15 * - Redistributions of source code must retain the above
16 * copyright notice, this list of conditions and the following
19 * - Redistributions in binary form must reproduce the above
20 * copyright notice, this list of conditions and the following
21 * disclaimer in the documentation and/or other materials
22 * provided with the distribution.
24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #if !defined(RDMA_CMA_H)
37 #include <netinet/in.h>
38 #include <sys/socket.h>
39 #include <infiniband/verbs.h>
40 #include <infiniband/sa.h>
47 * Upon receiving a device removal event, users must destroy the associated
48 * RDMA identifier and release all resources allocated with the device.
50 enum rdma_cm_event_type {
51 RDMA_CM_EVENT_ADDR_RESOLVED,
52 RDMA_CM_EVENT_ADDR_ERROR,
53 RDMA_CM_EVENT_ROUTE_RESOLVED,
54 RDMA_CM_EVENT_ROUTE_ERROR,
55 RDMA_CM_EVENT_CONNECT_REQUEST,
56 RDMA_CM_EVENT_CONNECT_RESPONSE,
57 RDMA_CM_EVENT_CONNECT_ERROR,
58 RDMA_CM_EVENT_UNREACHABLE,
59 RDMA_CM_EVENT_REJECTED,
60 RDMA_CM_EVENT_ESTABLISHED,
61 RDMA_CM_EVENT_DISCONNECTED,
62 RDMA_CM_EVENT_DEVICE_REMOVAL,
63 RDMA_CM_EVENT_MULTICAST_JOIN,
64 RDMA_CM_EVENT_MULTICAST_ERROR,
65 RDMA_CM_EVENT_ADDR_CHANGE,
66 RDMA_CM_EVENT_TIMEWAIT_EXIT
69 enum rdma_port_space {
70 RDMA_PS_IPOIB= 0x0002,
76 * Global qkey value for UDP QPs and multicast groups created via the
79 #define RDMA_UDP_QKEY 0x01234567
88 struct sockaddr src_addr;
89 uint8_t src_pad[sizeof(struct sockaddr_storage) -
90 sizeof(struct sockaddr)];
91 struct sockaddr dst_addr;
92 uint8_t dst_pad[sizeof(struct sockaddr_storage) -
93 sizeof(struct sockaddr)];
95 struct ib_addr ibaddr;
100 struct rdma_addr addr;
101 struct ibv_sa_path_rec *path_rec;
105 struct rdma_event_channel {
110 struct ibv_context *verbs;
111 struct rdma_event_channel *channel;
114 struct rdma_route route;
115 enum rdma_port_space ps;
119 struct rdma_conn_param {
120 const void *private_data;
121 uint8_t private_data_len;
122 uint8_t responder_resources;
123 uint8_t initiator_depth;
124 uint8_t flow_control;
125 uint8_t retry_count; /* ignored when accepting */
126 uint8_t rnr_retry_count;
127 /* Fields below ignored if a QP is created on the rdma_cm_id. */
132 struct rdma_ud_param {
133 const void *private_data;
134 uint8_t private_data_len;
135 struct ibv_ah_attr ah_attr;
140 struct rdma_cm_event {
141 struct rdma_cm_id *id;
142 struct rdma_cm_id *listen_id;
143 enum rdma_cm_event_type event;
146 struct rdma_conn_param conn;
147 struct rdma_ud_param ud;
152 * rdma_create_event_channel - Open a channel used to report communication events.
154 * Asynchronous events are reported to users through event channels. Each
155 * event channel maps to a file descriptor.
157 * All created event channels must be destroyed by calling
158 * rdma_destroy_event_channel. Users should call rdma_get_cm_event to
159 * retrieve events on an event channel.
161 * rdma_get_cm_event, rdma_destroy_event_channel
163 struct rdma_event_channel *rdma_create_event_channel(void);
166 * rdma_destroy_event_channel - Close an event communication channel.
167 * @channel: The communication channel to destroy.
169 * Release all resources associated with an event channel and closes the
170 * associated file descriptor.
172 * All rdma_cm_id's associated with the event channel must be destroyed,
173 * and all returned events must be acked before calling this function.
175 * rdma_create_event_channel, rdma_get_cm_event, rdma_ack_cm_event
177 void rdma_destroy_event_channel(struct rdma_event_channel *channel);
180 * rdma_create_id - Allocate a communication identifier.
181 * @channel: The communication channel that events associated with the
182 * allocated rdma_cm_id will be reported on.
183 * @id: A reference where the allocated communication identifier will be
185 * @context: User specified context associated with the rdma_cm_id.
186 * @ps: RDMA port space.
188 * Creates an identifier that is used to track communication information.
190 * Rdma_cm_id's are conceptually equivalent to a socket for RDMA
191 * communication. The difference is that RDMA communication requires
192 * explicitly binding to a specified RDMA device before communication
193 * can occur, and most operations are asynchronous in nature. Communication
194 * events on an rdma_cm_id are reported through the associated event
195 * channel. Users must release the rdma_cm_id by calling rdma_destroy_id.
197 * rdma_create_event_channel, rdma_destroy_id, rdma_get_devices,
198 * rdma_bind_addr, rdma_resolve_addr, rdma_connect, rdma_listen,
200 int rdma_create_id(struct rdma_event_channel *channel,
201 struct rdma_cm_id **id, void *context,
202 enum rdma_port_space ps);
205 * rdma_destroy_id - Release a communication identifier.
206 * @id: The communication identifier to destroy.
208 * Destroys the specified rdma_cm_id and cancels any outstanding
209 * asynchronous operation.
211 * Users must free any associated QP with the rdma_cm_id before
212 * calling this routine and ack an related events.
214 * rdma_create_id, rdma_destroy_qp, rdma_ack_cm_event
216 int rdma_destroy_id(struct rdma_cm_id *id);
219 * rdma_bind_addr - Bind an RDMA identifier to a source address.
220 * @id: RDMA identifier.
221 * @addr: Local address information. Wildcard values are permitted.
223 * Associates a source address with an rdma_cm_id. The address may be
224 * wildcarded. If binding to a specific local address, the rdma_cm_id
225 * will also be bound to a local RDMA device.
227 * Typically, this routine is called before calling rdma_listen to bind
228 * to a specific port number, but it may also be called on the active side
229 * of a connection before calling rdma_resolve_addr to bind to a specific
232 * rdma_create_id, rdma_listen, rdma_resolve_addr, rdma_create_qp
234 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr);
237 * rdma_resolve_addr - Resolve destination and optional source addresses.
238 * @id: RDMA identifier.
239 * @src_addr: Source address information. This parameter may be NULL.
240 * @dst_addr: Destination address information.
241 * @timeout_ms: Time to wait for resolution to complete.
243 * Resolve destination and optional source addresses from IP addresses
244 * to an RDMA address. If successful, the specified rdma_cm_id will
245 * be bound to a local device.
247 * This call is used to map a given destination IP address to a usable RDMA
248 * address. If a source address is given, the rdma_cm_id is bound to that
249 * address, the same as if rdma_bind_addr were called. If no source
250 * address is given, and the rdma_cm_id has not yet been bound to a device,
251 * then the rdma_cm_id will be bound to a source address based on the
252 * local routing tables. After this call, the rdma_cm_id will be bound to
253 * an RDMA device. This call is typically made from the active side of a
254 * connection before calling rdma_resolve_route and rdma_connect.
256 * rdma_create_id, rdma_resolve_route, rdma_connect, rdma_create_qp,
257 * rdma_get_cm_event, rdma_bind_addr
259 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
260 struct sockaddr *dst_addr, int timeout_ms);
263 * rdma_resolve_route - Resolve the route information needed to establish a connection.
264 * @id: RDMA identifier.
265 * @timeout_ms: Time to wait for resolution to complete.
267 * Resolves an RDMA route to the destination address in order to establish
268 * a connection. The destination address must have already been resolved
269 * by calling rdma_resolve_addr.
271 * This is called on the client side of a connection after calling
272 * rdma_resolve_addr, but before calling rdma_connect.
274 * rdma_resolve_addr, rdma_connect, rdma_get_cm_event
276 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms);
279 * rdma_create_qp - Allocate a QP.
280 * @id: RDMA identifier.
281 * @pd: protection domain for the QP.
282 * @qp_init_attr: initial QP attributes.
284 * Allocate a QP associated with the specified rdma_cm_id and transition it
285 * for sending and receiving.
287 * The rdma_cm_id must be bound to a local RDMA device before calling this
288 * function, and the protection domain must be for that same device.
289 * QPs allocated to an rdma_cm_id are automatically transitioned by the
290 * librdmacm through their states. After being allocated, the QP will be
291 * ready to handle posting of receives. If the QP is unconnected, it will
292 * be ready to post sends.
294 * rdma_bind_addr, rdma_resolve_addr, rdma_destroy_qp, ibv_create_qp,
297 int rdma_create_qp(struct rdma_cm_id *id, struct ibv_pd *pd,
298 struct ibv_qp_init_attr *qp_init_attr);
301 * rdma_destroy_qp - Deallocate a QP.
302 * @id: RDMA identifier.
304 * Destroy a QP allocated on the rdma_cm_id.
306 * Users must destroy any QP associated with an rdma_cm_id before
309 * rdma_create_qp, rdma_destroy_id, ibv_destroy_qp
311 void rdma_destroy_qp(struct rdma_cm_id *id);
314 * rdma_connect - Initiate an active connection request.
315 * @id: RDMA identifier.
316 * @conn_param: connection parameters.
318 * For a connected rdma_cm_id, this call initiates a connection request
319 * to a remote destination. For an unconnected rdma_cm_id, it initiates
320 * a lookup of the remote QP providing the datagram service.
322 * Users must have resolved a route to the destination address
323 * by having called rdma_resolve_route before calling this routine.
325 * rdma_resolve_route, rdma_disconnect, rdma_listen, rdma_get_cm_event
327 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
330 * rdma_listen - Listen for incoming connection requests.
331 * @id: RDMA identifier.
332 * @backlog: backlog of incoming connection requests.
334 * Initiates a listen for incoming connection requests or datagram service
335 * lookup. The listen will be restricted to the locally bound source
338 * Users must have bound the rdma_cm_id to a local address by calling
339 * rdma_bind_addr before calling this routine. If the rdma_cm_id is
340 * bound to a specific IP address, the listen will be restricted to that
341 * address and the associated RDMA device. If the rdma_cm_id is bound
342 * to an RDMA port number only, the listen will occur across all RDMA
345 * rdma_bind_addr, rdma_connect, rdma_accept, rdma_reject, rdma_get_cm_event
347 int rdma_listen(struct rdma_cm_id *id, int backlog);
350 * rdma_accept - Called to accept a connection request.
351 * @id: Connection identifier associated with the request.
352 * @conn_param: Information needed to establish the connection.
354 * Called from the listening side to accept a connection or datagram
355 * service lookup request.
357 * Unlike the socket accept routine, rdma_accept is not called on a
358 * listening rdma_cm_id. Instead, after calling rdma_listen, the user
359 * waits for a connection request event to occur. Connection request
360 * events give the user a newly created rdma_cm_id, similar to a new
361 * socket, but the rdma_cm_id is bound to a specific RDMA device.
362 * rdma_accept is called on the new rdma_cm_id.
364 * rdma_listen, rdma_reject, rdma_get_cm_event
366 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param);
369 * rdma_reject - Called to reject a connection request.
370 * @id: Connection identifier associated with the request.
371 * @private_data: Optional private data to send with the reject message.
372 * @private_data_len: Size of the private_data to send, in bytes.
374 * Called from the listening side to reject a connection or datagram
375 * service lookup request.
377 * After receiving a connection request event, a user may call rdma_reject
378 * to reject the request. If the underlying RDMA transport supports
379 * private data in the reject message, the specified data will be passed to
382 * rdma_listen, rdma_accept, rdma_get_cm_event
384 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
385 uint8_t private_data_len);
388 * rdma_notify - Notifies the librdmacm of an asynchronous event.
389 * @id: RDMA identifier.
390 * @event: Asynchronous event.
392 * Used to notify the librdmacm of asynchronous events that have occurred
393 * on a QP associated with the rdma_cm_id.
395 * Asynchronous events that occur on a QP are reported through the user's
396 * device event handler. This routine is used to notify the librdmacm of
397 * communication events. In most cases, use of this routine is not
398 * necessary, however if connection establishment is done out of band
399 * (such as done through Infiniband), it's possible to receive data on a
400 * QP that is not yet considered connected. This routine forces the
401 * connection into an established state in this case in order to handle
402 * the rare situation where the connection never forms on its own.
403 * Events that should be reported to the CM are: IB_EVENT_COMM_EST.
405 * rdma_connect, rdma_accept, rdma_listen
407 int rdma_notify(struct rdma_cm_id *id, enum ibv_event_type event);
410 * rdma_disconnect - This function disconnects a connection.
411 * @id: RDMA identifier.
413 * Disconnects a connection and transitions any associated QP to the
416 * rdma_connect, rdma_listen, rdma_accept
418 int rdma_disconnect(struct rdma_cm_id *id);
421 * rdma_join_multicast - Joins a multicast group.
422 * @id: Communication identifier associated with the request.
423 * @addr: Multicast address identifying the group to join.
424 * @context: User-defined context associated with the join request.
426 * Joins a multicast group and attaches an associated QP to the group.
428 * Before joining a multicast group, the rdma_cm_id must be bound to
429 * an RDMA device by calling rdma_bind_addr or rdma_resolve_addr. Use of
430 * rdma_resolve_addr requires the local routing tables to resolve the
431 * multicast address to an RDMA device. The user must call
432 * rdma_leave_multicast to leave the multicast group and release any
433 * multicast resources. The context is returned to the user through
434 * the private_data field in the rdma_cm_event.
436 * rdma_leave_multicast, rdma_bind_addr, rdma_resolve_addr, rdma_create_qp
438 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
442 * rdma_leave_multicast - Leaves a multicast group.
443 * @id: Communication identifier associated with the request.
444 * @addr: Multicast address identifying the group to leave.
446 * Leaves a multicast group and detaches an associated QP from the group.
448 * Calling this function before a group has been fully joined results in
449 * canceling the join operation. Users should be aware that messages
450 * received from the multicast group may stilled be queued for
451 * completion processing immediately after leaving a multicast group.
452 * Destroying an rdma_cm_id will automatically leave all multicast groups.
454 * rdma_join_multicast, rdma_destroy_qp
456 int rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr);
459 * rdma_get_cm_event - Retrieves the next pending communication event.
460 * @channel: Event channel to check for events.
461 * @event: Allocated information about the next communication event.
463 * Retrieves a communication event. If no events are pending, by default,
464 * the call will block until an event is received.
466 * The default synchronous behavior of this routine can be changed by
467 * modifying the file descriptor associated with the given channel. All
468 * events that are reported must be acknowledged by calling rdma_ack_cm_event.
469 * Destruction of an rdma_cm_id will block until related events have been
472 * rdma_ack_cm_event, rdma_create_event_channel, rdma_event_str
474 int rdma_get_cm_event(struct rdma_event_channel *channel,
475 struct rdma_cm_event **event);
478 * rdma_ack_cm_event - Free a communication event.
479 * @event: Event to be released.
481 * All events which are allocated by rdma_get_cm_event must be released,
482 * there should be a one-to-one correspondence between successful gets
485 * rdma_get_cm_event, rdma_destroy_id
487 int rdma_ack_cm_event(struct rdma_cm_event *event);
489 static inline uint16_t rdma_get_src_port(struct rdma_cm_id *id)
491 return id->route.addr.src_addr.sa_family == PF_INET6 ?
492 ((struct sockaddr_in6 *) &id->route.addr.src_addr)->sin6_port :
493 ((struct sockaddr_in *) &id->route.addr.src_addr)->sin_port;
496 static inline uint16_t rdma_get_dst_port(struct rdma_cm_id *id)
498 return id->route.addr.dst_addr.sa_family == PF_INET6 ?
499 ((struct sockaddr_in6 *) &id->route.addr.dst_addr)->sin6_port :
500 ((struct sockaddr_in *) &id->route.addr.dst_addr)->sin_port;
503 static inline struct sockaddr *rdma_get_local_addr(struct rdma_cm_id *id)
505 return &id->route.addr.src_addr;
508 static inline struct sockaddr *rdma_get_peer_addr(struct rdma_cm_id *id)
510 return &id->route.addr.dst_addr;
514 * rdma_get_devices - Get list of RDMA devices currently available.
515 * @num_devices: If non-NULL, set to the number of devices returned.
517 * Return a NULL-terminated array of opened RDMA devices. Callers can use
518 * this routine to allocate resources on specific RDMA devices that will be
519 * shared across multiple rdma_cm_id's.
521 * The returned array must be released by calling rdma_free_devices. Devices
522 * remain opened while the librdmacm is loaded.
526 struct ibv_context **rdma_get_devices(int *num_devices);
529 * rdma_free_devices - Frees the list of devices returned by rdma_get_devices.
530 * @list: List of devices returned from rdma_get_devices.
532 * Frees the device array returned by rdma_get_devices.
536 void rdma_free_devices(struct ibv_context **list);
539 * rdma_event_str - Returns a string representation of an rdma cm event.
540 * @event: Asynchronous event.
542 * Returns a string representation of an asynchronous event.
546 const char *rdma_event_str(enum rdma_cm_event_type event);
555 RDMA_OPTION_ID_TOS = 0 /* uint8_t: RFC 2474 */
559 * rdma_set_option - Set options for an rdma_cm_id.
560 * @id: Communication identifier to set option for.
561 * @level: Protocol level of the option to set.
562 * @optname: Name of the option to set.
563 * @optval: Reference to the option data.
564 * @optlen: The size of the %optval buffer.
566 int rdma_set_option(struct rdma_cm_id *id, int level, int optname,
567 void *optval, size_t optlen);
570 * rdma_migrate_id - Move an rdma_cm_id to a new event channel.
571 * @id: Communication identifier to migrate.
572 * @channel: New event channel for rdma_cm_id events.
574 int rdma_migrate_id(struct rdma_cm_id *id, struct rdma_event_channel *channel);
580 #endif /* RDMA_CMA_H */