2 * Copyright (c) 2005 Voltaire Inc. All rights reserved.
3 * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4 * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5 * Copyright (c) 2005-2006 Intel Corporation. All rights reserved.
7 * This Software is licensed under one of the following licenses:
9 * 1) under the terms of the "Common Public License 1.0" a copy of which is
10 * available from the Open Source Initiative, see
11 * http://www.opensource.org/licenses/cpl.php.
13 * 2) under the terms of the "The BSD License" a copy of which is
14 * available from the Open Source Initiative, see
15 * http://www.opensource.org/licenses/bsd-license.php.
17 * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
18 * copy of which is available from the Open Source Initiative, see
19 * http://www.opensource.org/licenses/gpl-license.php.
21 * Licensee has the right to choose one of the above licenses.
23 * Redistributions of source code must retain the above copyright
24 * notice and one of the license notices.
26 * Redistributions in binary form must reproduce both the above copyright
27 * notice, one of the license notices in the documentation
28 * and/or other materials provided with the distribution.
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
35 #include <sys/param.h>
36 #include <sys/condvar.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/libkern.h>
40 #include <sys/socket.h>
41 #include <sys/module.h>
43 #include <sys/mutex.h>
44 #include <sys/rwlock.h>
45 #include <sys/queue.h>
46 #include <sys/taskqueue.h>
48 #include <sys/syslog.h>
50 #include <netinet/in.h>
51 #include <netinet/in_pcb.h>
53 #include <contrib/rdma/rdma_cm.h>
54 #include <contrib/rdma/ib_cache.h>
55 #include <contrib/rdma/ib_cm.h>
56 #include <contrib/rdma/ib_sa.h>
57 #include <contrib/rdma/iw_cm.h>
59 #define CMA_CM_RESPONSE_TIMEOUT 20
60 #define CMA_MAX_CM_RETRIES 15
62 static void cma_add_one(struct ib_device *device);
63 static void cma_remove_one(struct ib_device *device);
65 static struct ib_client cma_client = {
68 .remove = cma_remove_one
72 static struct ib_sa_client sa_client;
74 static struct rdma_addr_client addr_client;
75 static TAILQ_HEAD(, cma_device) dev_list;
76 static LIST_HEAD(, rdma_id_private) listen_any_list;
77 static struct mtx lock;
78 static struct taskqueue *cma_wq;
79 static DEFINE_KVL(sdp_ps);
80 static DEFINE_KVL(tcp_ps);
81 static DEFINE_KVL(udp_ps);
82 static DEFINE_KVL(ipoib_ps);
86 struct ib_device *device;
91 LIST_HEAD(, rdma_id_private) id_list;
92 TAILQ_ENTRY(cma_device) list;
109 struct rdma_bind_list {
111 TAILQ_HEAD(, rdma_id_private) owners;
116 * Device removal can occur at anytime, so we need extra handling to
117 * serialize notifying the user of device removal with other callbacks.
118 * We do this by disabling removal notification while a callback is in process,
119 * and reporting it after the callback completes.
121 struct rdma_id_private {
122 struct rdma_cm_id id;
124 struct rdma_bind_list *bind_list;
126 TAILQ_ENTRY(rdma_id_private) node;
127 LIST_ENTRY(rdma_id_private) list; /* listen_any_list or cma_dev.list */
128 LIST_HEAD(, rdma_id_private) listen_list; /* per-device listens */
129 LIST_ENTRY(rdma_id_private) listen_entry;
130 struct cma_device *cma_dev;
132 LIST_HEAD(, cma_multicast) mc_list;
134 enum cma_state state;
138 struct cv wait_remove;
143 struct ib_sa_query *query;
157 struct cma_multicast {
158 struct rdma_id_private *id_priv;
160 struct ib_sa_multicast *ib;
162 struct list_head list;
164 struct sockaddr addr;
165 u8 pad[sizeof(struct sockaddr_in6) -
166 sizeof(struct sockaddr)];
172 struct rdma_id_private *id;
173 enum cma_state old_state;
174 enum cma_state new_state;
175 struct rdma_cm_event event;
188 u8 ip_version; /* IP version: 7:4 */
190 union cma_ip_addr src_addr;
191 union cma_ip_addr dst_addr;
196 u8 sdp_version; /* Major version: 7:4 */
197 u8 ip_version; /* IP version: 7:4 */
198 u8 sdp_specific1[10];
201 union cma_ip_addr src_addr;
202 union cma_ip_addr dst_addr;
210 #define CMA_VERSION 0x00
211 #define SDP_MAJ_VERSION 0x2
213 static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
217 mtx_lock(&id_priv->lock);
218 ret = (id_priv->state == comp);
219 mtx_unlock(&id_priv->lock);
223 static int cma_comp_exch(struct rdma_id_private *id_priv,
224 enum cma_state comp, enum cma_state exch)
228 mtx_lock(&id_priv->lock);
229 if ((ret = (id_priv->state == comp)))
230 id_priv->state = exch;
231 mtx_unlock(&id_priv->lock);
235 static enum cma_state cma_exch(struct rdma_id_private *id_priv,
240 mtx_lock(&id_priv->lock);
241 old = id_priv->state;
242 id_priv->state = exch;
243 mtx_unlock(&id_priv->lock);
247 static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
249 return hdr->ip_version >> 4;
252 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
254 hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
257 static inline u8 sdp_get_majv(u8 sdp_version)
259 return sdp_version >> 4;
262 static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
264 return hh->ip_version >> 4;
267 static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
269 hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
272 static inline int cma_is_ud_ps(enum rdma_port_space ps)
274 return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
277 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
278 struct cma_device *cma_dev)
280 mtx_lock(&cma_dev->lock);
282 mtx_unlock(&cma_dev->lock);
283 id_priv->cma_dev = cma_dev;
284 id_priv->id.device = cma_dev->device;
285 LIST_INSERT_HEAD(&cma_dev->id_list, id_priv, list);
288 static inline void cma_deref_dev(struct cma_device *cma_dev)
290 mtx_lock(&cma_dev->lock);
291 if (--cma_dev->refcount == 0)
292 cv_broadcast(&cma_dev->comp);
293 mtx_unlock(&cma_dev->lock);
296 static void cma_detach_from_dev(struct rdma_id_private *id_priv)
298 LIST_REMOVE(id_priv, list);
299 cma_deref_dev(id_priv->cma_dev);
300 id_priv->cma_dev = NULL;
304 static int cma_set_qkey(struct ib_device *device, u8 port_num,
305 enum rdma_port_space ps,
306 struct rdma_dev_addr *dev_addr, u32 *qkey)
308 struct ib_sa_mcmember_rec rec;
313 *qkey = RDMA_UDP_QKEY;
316 ib_addr_get_mgid(dev_addr, &rec.mgid);
317 ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
318 *qkey = be32_to_cpu(rec.qkey);
327 static int cma_acquire_dev(struct rdma_id_private *id_priv)
329 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
330 struct cma_device *cma_dev;
334 switch (rdma_node_get_transport(dev_addr->dev_type)) {
336 case RDMA_TRANSPORT_IB:
337 ib_addr_get_sgid(dev_addr, &gid);
340 case RDMA_TRANSPORT_IWARP:
341 iw_addr_get_sgid(dev_addr, &gid);
347 TAILQ_FOREACH(cma_dev, &dev_list, list) {
348 ret = ib_find_cached_gid(cma_dev->device, &gid,
349 &id_priv->id.port_num, NULL);
352 ret = cma_set_qkey(cma_dev->device,
353 id_priv->id.port_num,
354 id_priv->id.ps, dev_addr,
358 cma_attach_to_dev(id_priv, cma_dev);
365 static void cma_deref_id(struct rdma_id_private *id_priv)
367 mtx_lock(&id_priv->lock);
368 if (--id_priv->refcount == 0) {
369 cv_broadcast(&id_priv->comp);
371 mtx_unlock(&id_priv->lock);
374 static int cma_disable_remove(struct rdma_id_private *id_priv,
375 enum cma_state state)
379 mtx_lock(&id_priv->lock);
380 if (id_priv->state == state) {
381 id_priv->dev_remove++;
385 mtx_unlock(&id_priv->lock);
389 static void cma_enable_remove(struct rdma_id_private *id_priv)
391 mtx_lock(&id_priv->lock);
392 if (--id_priv->dev_remove == 0)
393 cv_broadcast(&id_priv->wait_remove);
394 mtx_unlock(&id_priv->lock);
397 static int cma_has_cm_dev(struct rdma_id_private *id_priv)
399 return (id_priv->id.device && id_priv->cm_id.ib);
402 struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
403 void *context, enum rdma_port_space ps)
405 struct rdma_id_private *id_priv;
407 id_priv = malloc(sizeof *id_priv, M_DEVBUF, M_NOWAIT);
409 return ERR_PTR(-ENOMEM);
410 bzero(id_priv, sizeof *id_priv);
412 id_priv->state = CMA_IDLE;
413 id_priv->id.context = context;
414 id_priv->id.event_handler = event_handler;
416 mtx_init(&id_priv->lock, "rdma_cm_id_priv", NULL, MTX_DUPOK|MTX_DEF);
417 cv_init(&id_priv->comp, "rdma_cm_id_priv");
418 id_priv->refcount = 1;
419 cv_init(&id_priv->wait_remove, "id priv wait remove");
420 LIST_INIT(&id_priv->listen_list);
421 arc4rand(&id_priv->seq_num, sizeof id_priv->seq_num, 0);
426 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
428 struct ib_qp_attr qp_attr;
429 int qp_attr_mask, ret;
431 qp_attr.qp_state = IB_QPS_INIT;
432 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
436 ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
440 qp_attr.qp_state = IB_QPS_RTR;
441 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
445 qp_attr.qp_state = IB_QPS_RTS;
447 ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
452 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
454 struct ib_qp_attr qp_attr;
455 int qp_attr_mask, ret;
457 qp_attr.qp_state = IB_QPS_INIT;
458 ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
462 return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
465 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
466 struct ib_qp_init_attr *qp_init_attr)
468 struct rdma_id_private *id_priv;
472 id_priv = container_of(id, struct rdma_id_private, id);
473 if (id->device != pd->device)
476 qp = ib_create_qp(pd, qp_init_attr);
479 if (cma_is_ud_ps(id_priv->id.ps))
480 ret = cma_init_ud_qp(id_priv, qp);
482 ret = cma_init_conn_qp(id_priv, qp);
487 id_priv->qp_num = qp->qp_num;
488 id_priv->srq = (qp->srq != NULL);
495 void rdma_destroy_qp(struct rdma_cm_id *id)
497 ib_destroy_qp(id->qp);
500 static int cma_modify_qp_rtr(struct rdma_cm_id *id)
502 struct ib_qp_attr qp_attr;
503 int qp_attr_mask, ret;
508 /* Need to update QP attributes from default values. */
509 qp_attr.qp_state = IB_QPS_INIT;
510 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
514 ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
518 qp_attr.qp_state = IB_QPS_RTR;
519 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
523 return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
527 static int cma_modify_qp_rts(struct rdma_cm_id *id)
529 struct ib_qp_attr qp_attr;
530 int qp_attr_mask, ret;
535 qp_attr.qp_state = IB_QPS_RTS;
536 ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
540 return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
544 static int cma_modify_qp_err(struct rdma_cm_id *id)
546 struct ib_qp_attr qp_attr;
551 qp_attr.qp_state = IB_QPS_ERR;
552 return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
556 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
557 struct ib_qp_attr *qp_attr, int *qp_attr_mask)
559 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
562 ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
563 ib_addr_get_pkey(dev_addr),
564 &qp_attr->pkey_index);
568 qp_attr->port_num = id_priv->id.port_num;
569 *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
571 if (cma_is_ud_ps(id_priv->id.ps)) {
572 qp_attr->qkey = id_priv->qkey;
573 *qp_attr_mask |= IB_QP_QKEY;
575 qp_attr->qp_access_flags = 0;
576 *qp_attr_mask |= IB_QP_ACCESS_FLAGS;
582 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
585 struct rdma_id_private *id_priv;
588 id_priv = container_of(id, struct rdma_id_private, id);
590 switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
591 case RDMA_TRANSPORT_IB:
592 if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
593 ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
595 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
597 if (qp_attr->qp_state == IB_QPS_RTR)
598 qp_attr->rq_psn = id_priv->seq_num;
600 case RDMA_TRANSPORT_IWARP:
602 if (!id_priv->cm_id.iw) {
603 qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
604 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
606 ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
619 static inline int cma_zero_addr(struct sockaddr *addr)
621 struct in6_addr *ip6;
623 if (addr->sa_family == AF_INET)
624 return in_nullhost(((struct sockaddr_in *) addr)->sin_addr);
626 ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
627 return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
628 ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
632 static inline int cma_loopback_addr(struct sockaddr *addr)
634 return ((struct sockaddr_in *)addr)->sin_addr.s_addr == INADDR_LOOPBACK;
637 static inline int cma_any_addr(struct sockaddr *addr)
639 return cma_zero_addr(addr) || cma_loopback_addr(addr);
642 static inline __be16 cma_port(struct sockaddr *addr)
644 if (addr->sa_family == AF_INET)
645 return ((struct sockaddr_in *) addr)->sin_port;
647 return ((struct sockaddr_in6 *) addr)->sin6_port;
650 static inline int cma_any_port(struct sockaddr *addr)
652 return !cma_port(addr);
656 static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
657 u8 *ip_ver, __u16 *port,
658 union cma_ip_addr **src, union cma_ip_addr **dst)
662 if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
666 *ip_ver = sdp_get_ip_ver(hdr);
667 *port = ((struct sdp_hh *) hdr)->port;
668 *src = &((struct sdp_hh *) hdr)->src_addr;
669 *dst = &((struct sdp_hh *) hdr)->dst_addr;
672 if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
675 *ip_ver = cma_get_ip_ver(hdr);
676 *port = ((struct cma_hdr *) hdr)->port;
677 *src = &((struct cma_hdr *) hdr)->src_addr;
678 *dst = &((struct cma_hdr *) hdr)->dst_addr;
682 if (*ip_ver != 4 && *ip_ver != 6)
687 static void cma_save_net_info(struct rdma_addr *addr,
688 struct rdma_addr *listen_addr,
689 u8 ip_ver, __u16 port,
690 union cma_ip_addr *src, union cma_ip_addr *dst)
692 struct sockaddr_in *listen4, *ip4;
693 struct sockaddr_in6 *listen6, *ip6;
697 listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
698 ip4 = (struct sockaddr_in *) &addr->src_addr;
699 ip4->sin_family = listen4->sin_family;
700 ip4->sin_addr.s_addr = dst->ip4.addr;
701 ip4->sin_port = listen4->sin_port;
703 ip4 = (struct sockaddr_in *) &addr->dst_addr;
704 ip4->sin_family = listen4->sin_family;
705 ip4->sin_addr.s_addr = src->ip4.addr;
706 ip4->sin_port = port;
709 listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
710 ip6 = (struct sockaddr_in6 *) &addr->src_addr;
711 ip6->sin6_family = listen6->sin6_family;
712 ip6->sin6_addr = dst->ip6;
713 ip6->sin6_port = listen6->sin6_port;
715 ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
716 ip6->sin6_family = listen6->sin6_family;
717 ip6->sin6_addr = src->ip6;
718 ip6->sin6_port = port;
726 static inline int cma_user_data_offset(enum rdma_port_space ps)
732 return sizeof(struct cma_hdr);
736 static void cma_cancel_route(struct rdma_id_private *id_priv)
739 switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
740 case RDMA_TRANSPORT_IB:
742 ib_sa_cancel_query(id_priv->query_id, id_priv->query);
750 static inline int cma_internal_listen(struct rdma_id_private *id_priv)
752 return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
753 cma_any_addr(&id_priv->id.route.addr.src_addr);
756 static void cma_destroy_listen(struct rdma_id_private *id_priv)
758 cma_exch(id_priv, CMA_DESTROYING);
760 if (id_priv->cma_dev) {
762 switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
763 case RDMA_TRANSPORT_IB:
764 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
765 ib_destroy_cm_id(id_priv->cm_id.ib);
767 case RDMA_TRANSPORT_IWARP:
769 if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
770 iw_destroy_cm_id(id_priv->cm_id.iw);
777 cma_detach_from_dev(id_priv);
779 LIST_REMOVE(id_priv, listen_entry);
781 cma_deref_id(id_priv);
782 mtx_lock(&id_priv->lock);
783 if (id_priv->refcount)
784 cv_wait(&id_priv->comp, &id_priv->lock);
785 mtx_unlock(&id_priv->lock);
787 free(id_priv, M_DEVBUF);
790 static void cma_cancel_listens(struct rdma_id_private *id_priv)
792 struct rdma_id_private *dev_id_priv;
795 LIST_REMOVE(id_priv, list);
797 while (!LIST_EMPTY(&id_priv->listen_list)) {
798 dev_id_priv = LIST_FIRST(&id_priv->listen_list);
799 cma_destroy_listen(dev_id_priv);
804 static void cma_cancel_operation(struct rdma_id_private *id_priv,
805 enum cma_state state)
809 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
811 case CMA_ROUTE_QUERY:
812 cma_cancel_route(id_priv);
815 if (cma_any_addr(&id_priv->id.route.addr.src_addr) &&
817 cma_cancel_listens(id_priv);
824 static void cma_release_port(struct rdma_id_private *id_priv)
826 struct rdma_bind_list *bind_list = id_priv->bind_list;
832 TAILQ_REMOVE(&bind_list->owners, id_priv, node);
833 if (TAILQ_EMPTY(&bind_list->owners)) {
834 kvl_delete(bind_list->ps, bind_list->port);
835 free(bind_list, M_DEVBUF);
839 soclose(id_priv->so);
843 static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
845 struct cma_multicast *mc;
847 while (!LIST_EMPTY(&id_priv->mc_list)) {
848 mc = LIST_FIRST(&id_priv->mc_list);
849 LIST_REMOVE(mc, list);
850 ib_sa_free_multicast(mc->multicast.ib);
856 void rdma_destroy_id(struct rdma_cm_id *id)
858 struct rdma_id_private *id_priv;
859 enum cma_state state;
861 id_priv = container_of(id, struct rdma_id_private, id);
862 state = cma_exch(id_priv, CMA_DESTROYING);
863 cma_cancel_operation(id_priv, state);
866 if (id_priv->cma_dev) {
869 switch (rdma_node_get_transport(id->device->node_type)) {
870 case RDMA_TRANSPORT_IB:
871 if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
872 ib_destroy_cm_id(id_priv->cm_id.ib);
874 case RDMA_TRANSPORT_IWARP:
876 if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
877 iw_destroy_cm_id(id_priv->cm_id.iw);
883 cma_leave_mc_groups(id_priv);
886 cma_detach_from_dev(id_priv);
889 cma_release_port(id_priv);
890 cma_deref_id(id_priv);
891 mtx_lock(&id_priv->lock);
892 PANIC_IF(id_priv->refcount < 0);
893 if (id_priv->refcount)
894 cv_wait(&id_priv->comp, &id_priv->lock);
895 mtx_unlock(&id_priv->lock);
896 free(id_priv->id.route.path_rec, M_DEVBUF);
897 free(id_priv, M_DEVBUF);
901 static int cma_rep_recv(struct rdma_id_private *id_priv)
905 ret = cma_modify_qp_rtr(&id_priv->id);
909 ret = cma_modify_qp_rts(&id_priv->id);
913 ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
919 cma_modify_qp_err(&id_priv->id);
920 ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
925 static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
927 if (id_priv->id.ps == RDMA_PS_SDP &&
928 sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
935 static void cma_set_rep_event_data(struct rdma_cm_event *event,
936 struct ib_cm_rep_event_param *rep_data,
939 event->param.conn.private_data = private_data;
940 event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
941 event->param.conn.responder_resources = rep_data->responder_resources;
942 event->param.conn.initiator_depth = rep_data->initiator_depth;
943 event->param.conn.flow_control = rep_data->flow_control;
944 event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
945 event->param.conn.srq = rep_data->srq;
946 event->param.conn.qp_num = rep_data->remote_qpn;
949 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
951 struct rdma_id_private *id_priv = cm_id->context;
952 struct rdma_cm_event event;
955 if (cma_disable_remove(id_priv, CMA_CONNECT))
958 memset(&event, 0, sizeof event);
959 switch (ib_event->event) {
960 case IB_CM_REQ_ERROR:
961 case IB_CM_REP_ERROR:
962 event.event = RDMA_CM_EVENT_UNREACHABLE;
963 event.status = ETIMEDOUT;
965 case IB_CM_REP_RECEIVED:
966 event.status = cma_verify_rep(id_priv, ib_event->private_data);
968 event.event = RDMA_CM_EVENT_CONNECT_ERROR;
969 else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
970 event.status = cma_rep_recv(id_priv);
971 event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
972 RDMA_CM_EVENT_ESTABLISHED;
974 event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
975 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
976 ib_event->private_data);
978 case IB_CM_RTU_RECEIVED:
979 case IB_CM_USER_ESTABLISHED:
980 event.event = RDMA_CM_EVENT_ESTABLISHED;
982 case IB_CM_DREQ_ERROR:
983 event.status = ETIMEDOUT; /* fall through */
984 case IB_CM_DREQ_RECEIVED:
985 case IB_CM_DREP_RECEIVED:
986 if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
988 event.event = RDMA_CM_EVENT_DISCONNECTED;
990 case IB_CM_TIMEWAIT_EXIT:
991 case IB_CM_MRA_RECEIVED:
994 case IB_CM_REJ_RECEIVED:
995 cma_modify_qp_err(&id_priv->id);
996 event.status = ib_event->param.rej_rcvd.reason;
997 event.event = RDMA_CM_EVENT_REJECTED;
998 event.param.conn.private_data = ib_event->private_data;
999 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
1002 log(LOG_ERR, "RDMA CMA: unexpected IB CM event: %d",
1007 ret = id_priv->id.event_handler(&id_priv->id, &event);
1009 /* Destroy the CM ID by returning a non-zero value. */
1010 id_priv->cm_id.ib = NULL;
1011 cma_exch(id_priv, CMA_DESTROYING);
1012 cma_enable_remove(id_priv);
1013 rdma_destroy_id(&id_priv->id);
1017 cma_enable_remove(id_priv);
1021 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1022 struct ib_cm_event *ib_event)
1024 struct rdma_id_private *id_priv;
1025 struct rdma_cm_id *id;
1026 struct rdma_route *rt;
1027 union cma_ip_addr *src, *dst;
1031 if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1032 &ip_ver, &port, &src, &dst))
1035 id = rdma_create_id(listen_id->event_handler, listen_id->context,
1040 cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1041 ip_ver, port, src, dst);
1044 rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
1045 rt->path_rec = malloc(sizeof *rt->path_rec * rt->num_paths,
1046 M_DEVBUF, M_NOWAIT);
1050 rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
1051 if (rt->num_paths == 2)
1052 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1054 ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1055 ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1056 ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
1057 rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;
1059 id_priv = container_of(id, struct rdma_id_private, id);
1060 id_priv->state = CMA_CONNECT;
1064 rdma_destroy_id(id);
1069 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1070 struct ib_cm_event *ib_event)
1072 struct rdma_id_private *id_priv;
1073 struct rdma_cm_id *id;
1074 union cma_ip_addr *src, *dst;
1079 id = rdma_create_id(listen_id->event_handler, listen_id->context,
1085 if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1086 &ip_ver, &port, &src, &dst))
1089 cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1090 ip_ver, port, src, dst);
1092 ret = rdma_translate_ip(&id->route.addr.src_addr,
1093 &id->route.addr.dev_addr);
1097 id_priv = container_of(id, struct rdma_id_private, id);
1098 id_priv->state = CMA_CONNECT;
1101 rdma_destroy_id(id);
1105 static void cma_set_req_event_data(struct rdma_cm_event *event,
1106 struct ib_cm_req_event_param *req_data,
1107 void *private_data, int offset)
1109 event->param.conn.private_data = private_data + offset;
1110 event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
1111 event->param.conn.responder_resources = req_data->responder_resources;
1112 event->param.conn.initiator_depth = req_data->initiator_depth;
1113 event->param.conn.flow_control = req_data->flow_control;
1114 event->param.conn.retry_count = req_data->retry_count;
1115 event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
1116 event->param.conn.srq = req_data->srq;
1117 event->param.conn.qp_num = req_data->remote_qpn;
1120 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1122 struct rdma_id_private *listen_id, *conn_id;
1123 struct rdma_cm_event event;
1126 listen_id = cm_id->context;
1127 if (cma_disable_remove(listen_id, CMA_LISTEN))
1128 return (ECONNABORTED);
1130 memset(&event, 0, sizeof event);
1131 offset = cma_user_data_offset(listen_id->id.ps);
1132 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1133 if (cma_is_ud_ps(listen_id->id.ps)) {
1134 conn_id = cma_new_udp_id(&listen_id->id, ib_event);
1135 event.param.ud.private_data = ib_event->private_data + offset;
1136 event.param.ud.private_data_len =
1137 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1139 conn_id = cma_new_conn_id(&listen_id->id, ib_event);
1140 cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1141 ib_event->private_data, offset);
1148 mtx_lock(&conn_id->lock);
1149 conn_id->dev_remove++;
1150 mtx_unlock(&conn_id->lock);
1152 ret = cma_acquire_dev(conn_id);
1155 goto release_conn_id;
1157 conn_id->cm_id.ib = cm_id;
1158 cm_id->context = conn_id;
1159 cm_id->cm_handler = cma_ib_handler;
1161 ret = conn_id->id.event_handler(&conn_id->id, &event);
1165 /* Destroy the CM ID by returning a non-zero value. */
1166 conn_id->cm_id.ib = NULL;
1169 cma_exch(conn_id, CMA_DESTROYING);
1170 cma_enable_remove(conn_id);
1171 rdma_destroy_id(&conn_id->id);
1174 cma_enable_remove(listen_id);
1178 static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
1180 return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
1183 static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1184 struct ib_cm_compare_data *compare)
1186 struct cma_hdr *cma_data, *cma_mask;
1187 struct sdp_hh *sdp_data, *sdp_mask;
1189 struct in6_addr ip6_addr;
1191 memset(compare, 0, sizeof *compare);
1192 cma_data = (void *) compare->data;
1193 cma_mask = (void *) compare->mask;
1194 sdp_data = (void *) compare->data;
1195 sdp_mask = (void *) compare->mask;
1197 switch (addr->sa_family) {
1199 ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
1200 if (ps == RDMA_PS_SDP) {
1201 sdp_set_ip_ver(sdp_data, 4);
1202 sdp_set_ip_ver(sdp_mask, 0xF);
1203 sdp_data->dst_addr.ip4.addr = ip4_addr;
1204 sdp_mask->dst_addr.ip4.addr = ~0;
1206 cma_set_ip_ver(cma_data, 4);
1207 cma_set_ip_ver(cma_mask, 0xF);
1208 cma_data->dst_addr.ip4.addr = ip4_addr;
1209 cma_mask->dst_addr.ip4.addr = ~0;
1213 ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
1214 if (ps == RDMA_PS_SDP) {
1215 sdp_set_ip_ver(sdp_data, 6);
1216 sdp_set_ip_ver(sdp_mask, 0xF);
1217 sdp_data->dst_addr.ip6 = ip6_addr;
1218 memset(&sdp_mask->dst_addr.ip6, 0xFF,
1219 sizeof sdp_mask->dst_addr.ip6);
1221 cma_set_ip_ver(cma_data, 6);
1222 cma_set_ip_ver(cma_mask, 0xF);
1223 cma_data->dst_addr.ip6 = ip6_addr;
1224 memset(&cma_mask->dst_addr.ip6, 0xFF,
1225 sizeof cma_mask->dst_addr.ip6);
1232 #endif /* IB_SUPPORTED */
1234 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1236 struct rdma_id_private *id_priv = iw_id->context;
1237 struct rdma_cm_event event;
1238 struct sockaddr_in *sin;
1241 if (cma_disable_remove(id_priv, CMA_CONNECT))
1244 memset(&event, 0, sizeof event);
1245 switch (iw_event->event) {
1246 case IW_CM_EVENT_CLOSE:
1247 event.event = RDMA_CM_EVENT_DISCONNECTED;
1249 case IW_CM_EVENT_CONNECT_REPLY:
1250 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1251 *sin = iw_event->local_addr;
1252 sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
1253 *sin = iw_event->remote_addr;
1254 switch (iw_event->status) {
1256 event.event = RDMA_CM_EVENT_ESTABLISHED;
1260 event.event = RDMA_CM_EVENT_REJECTED;
1263 event.event = RDMA_CM_EVENT_UNREACHABLE;
1266 event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1270 case IW_CM_EVENT_ESTABLISHED:
1271 event.event = RDMA_CM_EVENT_ESTABLISHED;
1274 panic("unknown event type %d", iw_event->event);
1278 event.status = iw_event->status;
1279 event.param.conn.private_data = iw_event->private_data;
1280 event.param.conn.private_data_len = iw_event->private_data_len;
1281 ret = id_priv->id.event_handler(&id_priv->id, &event);
1283 /* Destroy the CM ID by returning a non-zero value. */
1284 id_priv->cm_id.iw = NULL;
1285 cma_exch(id_priv, CMA_DESTROYING);
1286 cma_enable_remove(id_priv);
1287 rdma_destroy_id(&id_priv->id);
1291 cma_enable_remove(id_priv);
1295 static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1296 struct iw_cm_event *iw_event)
1298 struct rdma_cm_id *new_cm_id;
1299 struct rdma_id_private *listen_id, *conn_id;
1300 struct sockaddr_in *sin;
1302 struct rdma_cm_event event;
1307 listen_id = cm_id->context;
1308 if (cma_disable_remove(listen_id, CMA_LISTEN))
1309 return (ECONNABORTED);
1311 /* Create a new RDMA id for the new IW CM ID */
1312 new_cm_id = rdma_create_id(listen_id->id.event_handler,
1313 listen_id->id.context,
1319 conn_id = container_of(new_cm_id, struct rdma_id_private, id);
1320 mtx_lock(&conn_id->lock);
1321 ++conn_id->dev_remove;
1322 mtx_unlock(&conn_id->lock);
1323 conn_id->state = CMA_CONNECT;
1325 port = iw_event->local_addr.sin_port;
1326 iw_event->local_addr.sin_port = 0;
1327 ifa = ifa_ifwithaddr((struct sockaddr *)&iw_event->local_addr);
1328 iw_event->local_addr.sin_port = port;
1330 ret = EADDRNOTAVAIL;
1331 cma_enable_remove(conn_id);
1332 rdma_destroy_id(new_cm_id);
1336 ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1338 cma_enable_remove(conn_id);
1339 rdma_destroy_id(new_cm_id);
1344 ret = cma_acquire_dev(conn_id);
1347 cma_enable_remove(conn_id);
1348 rdma_destroy_id(new_cm_id);
1352 conn_id->cm_id.iw = cm_id;
1353 cm_id->context = conn_id;
1354 cm_id->cm_handler = cma_iw_handler;
1356 sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
1357 *sin = iw_event->local_addr;
1358 sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
1359 *sin = iw_event->remote_addr;
1360 conn_id->so = cm_id->so;
1362 memset(&event, 0, sizeof event);
1363 event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1364 event.param.conn.private_data = iw_event->private_data;
1365 event.param.conn.private_data_len = iw_event->private_data_len;
1366 ret = conn_id->id.event_handler(&conn_id->id, &event);
1368 /* User wants to destroy the CM ID */
1369 conn_id->cm_id.iw = NULL;
1370 cma_exch(conn_id, CMA_DESTROYING);
1371 cma_enable_remove(conn_id);
1372 rdma_destroy_id(&conn_id->id);
1376 cma_enable_remove(listen_id);
1381 static int cma_ib_listen(struct rdma_id_private *id_priv)
1383 struct ib_cm_compare_data compare_data;
1384 struct sockaddr *addr;
1388 id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
1390 if (IS_ERR(id_priv->cm_id.ib))
1391 return PTR_ERR(id_priv->cm_id.ib);
1393 addr = &id_priv->id.route.addr.src_addr;
1394 svc_id = cma_get_service_id(id_priv->id.ps, addr);
1395 if (cma_any_addr(addr))
1396 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1398 cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1399 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1403 ib_destroy_cm_id(id_priv->cm_id.ib);
1404 id_priv->cm_id.ib = NULL;
1411 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1414 struct sockaddr_in *sin;
1416 id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device, id_priv->so,
1417 iw_conn_req_handler, id_priv);
1418 if (IS_ERR(id_priv->cm_id.iw))
1419 return PTR_ERR(id_priv->cm_id.iw);
1421 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1422 id_priv->cm_id.iw->local_addr = *sin;
1424 ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
1427 iw_destroy_cm_id(id_priv->cm_id.iw);
1428 id_priv->cm_id.iw = NULL;
1434 static int cma_listen_handler(struct rdma_cm_id *id,
1435 struct rdma_cm_event *event)
1437 struct rdma_id_private *id_priv = id->context;
1439 id->context = id_priv->id.context;
1440 id->event_handler = id_priv->id.event_handler;
1441 return id_priv->id.event_handler(id, event);
1444 static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1445 struct cma_device *cma_dev)
1447 struct rdma_id_private *dev_id_priv;
1448 struct rdma_cm_id *id;
1451 id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
1455 dev_id_priv = container_of(id, struct rdma_id_private, id);
1457 dev_id_priv->state = CMA_ADDR_BOUND;
1458 memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1459 ip_addr_size(&id_priv->id.route.addr.src_addr));
1460 dev_id_priv->so = id_priv->so; /* XXX */
1462 cma_attach_to_dev(dev_id_priv, cma_dev);
1463 LIST_INSERT_HEAD(&id_priv->listen_list, dev_id_priv, listen_entry);
1465 ret = rdma_listen(id, id_priv->backlog);
1471 cma_destroy_listen(dev_id_priv);
1474 static void cma_listen_on_all(struct rdma_id_private *id_priv)
1476 struct cma_device *cma_dev;
1479 LIST_INSERT_HEAD(&listen_any_list, id_priv, list);
1480 TAILQ_FOREACH(cma_dev, &dev_list, list)
1481 cma_listen_on_dev(id_priv, cma_dev);
1485 static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
1487 struct sockaddr_in addr_in;
1489 memset(&addr_in, 0, sizeof addr_in);
1490 addr_in.sin_family = af;
1491 addr_in.sin_len = sizeof addr_in;
1492 return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
1495 int rdma_listen(struct rdma_cm_id *id, int backlog)
1497 struct rdma_id_private *id_priv;
1500 id_priv = container_of(id, struct rdma_id_private, id);
1501 if (id_priv->state == CMA_IDLE) {
1502 ret = cma_bind_any(id, AF_INET);
1507 if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
1510 id_priv->backlog = backlog;
1513 switch (rdma_node_get_transport(id->device->node_type)) {
1514 case RDMA_TRANSPORT_IB:
1515 ret = cma_ib_listen(id_priv);
1519 case RDMA_TRANSPORT_IWARP:
1521 ret = cma_iw_listen(id_priv, backlog);
1532 cma_listen_on_all(id_priv);
1536 id_priv->backlog = 0;
1537 cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
1542 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1545 struct cma_work *work = context;
1546 struct rdma_route *route;
1548 route = &work->id->id.route;
1551 route->num_paths = 1;
1552 *route->path_rec = *path_rec;
1554 work->old_state = CMA_ROUTE_QUERY;
1555 work->new_state = CMA_ADDR_RESOLVED;
1556 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1557 work->event.status = status;
1560 taskqueue_enqueue(cma_wq, &work->task);
1563 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1564 struct cma_work *work)
1566 struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr;
1567 struct ib_sa_path_rec path_rec;
1569 memset(&path_rec, 0, sizeof path_rec);
1570 ib_addr_get_sgid(addr, &path_rec.sgid);
1571 ib_addr_get_dgid(addr, &path_rec.dgid);
1572 path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
1573 path_rec.numb_path = 1;
1574 path_rec.reversible = 1;
1576 id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
1577 id_priv->id.port_num, &path_rec,
1578 IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1579 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
1580 IB_SA_PATH_REC_REVERSIBLE,
1581 timeout_ms, M_NOWAIT,
1582 cma_query_handler, work, &id_priv->query);
1584 return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1588 static void cma_work_handler(void *context, int pending)
1590 struct cma_work *work = context;
1591 struct rdma_id_private *id_priv = work->id;
1594 mtx_lock(&id_priv->lock);
1595 ++id_priv->dev_remove;
1596 mtx_unlock(&id_priv->lock);
1597 if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1600 if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1601 cma_exch(id_priv, CMA_DESTROYING);
1605 cma_enable_remove(id_priv);
1606 cma_deref_id(id_priv);
1608 rdma_destroy_id(&id_priv->id);
1609 free(work, M_DEVBUF);
1613 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1615 struct rdma_route *route = &id_priv->id.route;
1616 struct cma_work *work;
1619 work = malloc(sizeof *work, M_DEVBUF, M_NOWAIT);
1622 bzero(work, sizeof *work);
1625 TASK_INIT(&work->task, 0, cma_work_handler, work);
1626 work->old_state = CMA_ROUTE_QUERY;
1627 work->new_state = CMA_ROUTE_RESOLVED;
1628 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1630 route->path_rec = malloc(sizeof *route->path_rec, M_DEVBUF, M_NOWAIT);
1631 if (!route->path_rec) {
1636 ret = cma_query_ib_route(id_priv, timeout_ms, work);
1642 free(route->path_rec, M_DEVBUF);
1643 route->path_rec = NULL;
1645 free(work, M_DEVBUF);
1649 int rdma_set_ib_paths(struct rdma_cm_id *id,
1650 struct ib_sa_path_rec *path_rec, int num_paths)
1652 struct rdma_id_private *id_priv;
1655 id_priv = container_of(id, struct rdma_id_private, id);
1656 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
1659 id->route.path_rec = malloc(sizeof *path_rec * num_paths, M_DEVBUF, M_NOWAIT);
1660 if (!id->route.path_rec) {
1665 memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
1668 cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
1673 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
1675 struct cma_work *work;
1677 work = malloc(sizeof *work, M_DEVBUF, M_NOWAIT);
1680 bzero(work, sizeof *work);
1683 TASK_INIT(&work->task, 0, cma_work_handler, work);
1684 work->old_state = CMA_ROUTE_QUERY;
1685 work->new_state = CMA_ROUTE_RESOLVED;
1686 work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1687 taskqueue_enqueue(cma_wq, &work->task);
1691 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1693 struct rdma_id_private *id_priv;
1696 id_priv = container_of(id, struct rdma_id_private, id);
1697 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
1700 mtx_lock(&id_priv->lock);
1701 id_priv->refcount++;
1702 mtx_unlock(&id_priv->lock);
1704 switch (rdma_node_get_transport(id->device->node_type)) {
1705 case RDMA_TRANSPORT_IB:
1706 ret = cma_resolve_ib_route(id_priv, timeout_ms);
1708 case RDMA_TRANSPORT_IWARP:
1710 ret = cma_resolve_iw_route(id_priv, timeout_ms);
1723 cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
1724 cma_deref_id(id_priv);
1728 static int cma_bind_loopback(struct rdma_id_private *id_priv)
1730 struct cma_device *cma_dev;
1731 struct ib_port_attr port_attr;
1738 if (TAILQ_EMPTY(&dev_list)) {
1742 TAILQ_FOREACH(cma_dev, &dev_list, list)
1743 for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1744 if (!ib_query_port(cma_dev->device, p, &port_attr) &&
1745 port_attr.state == IB_PORT_ACTIVE)
1749 cma_dev = TAILQ_FIRST(&dev_list);
1752 ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
1756 ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
1760 ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1761 ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1762 id_priv->id.port_num = p;
1763 cma_attach_to_dev(id_priv, cma_dev);
1769 static void addr_handler(int status, struct sockaddr *src_addr,
1770 struct rdma_dev_addr *dev_addr, void *context)
1772 struct rdma_id_private *id_priv = context;
1773 struct rdma_cm_event event;
1775 memset(&event, 0, sizeof event);
1776 mtx_lock(&id_priv->lock);
1777 ++id_priv->dev_remove;
1778 mtx_unlock(&id_priv->lock);
1781 * Grab mutex to block rdma_destroy_id() from removing the device while
1782 * we're trying to acquire it.
1785 if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
1790 if (!status && !id_priv->cma_dev)
1791 status = cma_acquire_dev(id_priv);
1795 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
1797 event.event = RDMA_CM_EVENT_ADDR_ERROR;
1798 event.status = status;
1800 memcpy(&id_priv->id.route.addr.src_addr, src_addr,
1801 ip_addr_size(src_addr));
1802 event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1805 if (id_priv->id.event_handler(&id_priv->id, &event)) {
1806 cma_exch(id_priv, CMA_DESTROYING);
1807 cma_enable_remove(id_priv);
1808 cma_deref_id(id_priv);
1809 rdma_destroy_id(&id_priv->id);
1813 cma_enable_remove(id_priv);
1814 cma_deref_id(id_priv);
1817 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1819 struct cma_work *work;
1820 struct sockaddr_in *src_in, *dst_in;
1824 work = malloc(sizeof *work, M_DEVBUF, M_NOWAIT);
1827 bzero(work, sizeof *work);
1829 if (!id_priv->cma_dev) {
1830 ret = cma_bind_loopback(id_priv);
1835 ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1836 ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
1838 if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) {
1839 src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
1840 dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
1841 src_in->sin_family = dst_in->sin_family;
1842 src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
1846 TASK_INIT(&work->task, 0, cma_work_handler, work);
1847 work->old_state = CMA_ADDR_QUERY;
1848 work->new_state = CMA_ADDR_RESOLVED;
1849 work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1850 taskqueue_enqueue(cma_wq, &work->task);
1853 free(work, M_DEVBUF);
1857 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1858 struct sockaddr *dst_addr)
1860 if (src_addr && src_addr->sa_family)
1861 return rdma_bind_addr(id, src_addr);
1863 return cma_bind_any(id, dst_addr->sa_family);
1866 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1867 struct sockaddr *dst_addr, int timeout_ms)
1869 struct rdma_id_private *id_priv;
1872 id_priv = container_of(id, struct rdma_id_private, id);
1873 if (id_priv->state == CMA_IDLE) {
1874 ret = cma_bind_addr(id, src_addr, dst_addr);
1879 if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
1882 mtx_lock(&id_priv->lock);
1883 id_priv->refcount++;
1884 mtx_unlock(&id_priv->lock);
1885 memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
1886 if (cma_any_addr(dst_addr))
1887 ret = cma_resolve_loopback(id_priv);
1889 ret = rdma_resolve_ip(&addr_client, &id->route.addr.src_addr,
1890 dst_addr, &id->route.addr.dev_addr,
1891 timeout_ms, addr_handler, id_priv);
1897 cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
1898 cma_deref_id(id_priv);
1902 static void cma_bind_port(struct rdma_bind_list *bind_list,
1903 struct rdma_id_private *id_priv)
1905 struct sockaddr_in *sin;
1907 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1908 sin->sin_port = htons(bind_list->port);
1909 id_priv->bind_list = bind_list;
1910 TAILQ_INSERT_HEAD(&bind_list->owners, id_priv, node);
1913 static int cma_alloc_port(struct kvl *ps, struct rdma_id_private *id_priv,
1914 unsigned short snum)
1916 struct rdma_bind_list *bind_list;
1919 bind_list = malloc(sizeof *bind_list, M_DEVBUF, M_NOWAIT);
1922 bzero(bind_list, sizeof *bind_list);
1925 ret = kvl_alloc_above(ps, bind_list, snum, &port);
1926 } while (ret == EAGAIN);
1932 ret = EADDRNOTAVAIL;
1937 bind_list->port = (unsigned short) port;
1938 cma_bind_port(bind_list, id_priv);
1941 kvl_delete(ps, port);
1943 free(bind_list, M_DEVBUF);
1947 static int cma_alloc_any_port(struct kvl *ps, struct rdma_id_private *id_priv)
1949 struct rdma_bind_list *bind_list;
1952 bind_list = malloc(sizeof *bind_list, M_DEVBUF, M_NOWAIT);
1955 bzero(bind_list, sizeof *bind_list);
1959 ret = kvl_alloc_above(ps, bind_list, next_port, &port);
1960 } while (ret == EAGAIN);
1965 if (port > ipport_lastauto) {
1966 if (next_port != ipport_firstauto) {
1967 kvl_delete(ps, port);
1968 next_port = ipport_firstauto;
1971 ret = EADDRNOTAVAIL;
1975 if (port == ipport_lastauto)
1976 next_port = ipport_firstauto;
1978 next_port = port + 1;
1981 bind_list->port = (unsigned short) port;
1982 cma_bind_port(bind_list, id_priv);
1985 kvl_delete(ps, port);
1987 free(bind_list, M_DEVBUF);
1991 static int cma_use_port(struct kvl *ps, struct rdma_id_private *id_priv)
1993 struct rdma_id_private *cur_id;
1994 struct sockaddr_in *sin, *cur_sin;
1995 struct rdma_bind_list *bind_list;
1996 unsigned short snum;
1998 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1999 snum = ntohs(sin->sin_port);
2000 if (snum <= ipport_reservedhigh && snum >= ipport_reservedlow &&
2001 priv_check(curthread, PRIV_NETINET_RESERVEDPORT))
2004 bind_list = kvl_lookup(ps, snum);
2006 return cma_alloc_port(ps, id_priv, snum);
2009 * We don't support binding to any address if anyone is bound to
2010 * a specific address on the same port.
2012 if (cma_any_addr(&id_priv->id.route.addr.src_addr))
2013 return (EADDRNOTAVAIL);
2015 TAILQ_FOREACH(cur_id, &bind_list->owners, node) {
2016 if (cma_any_addr(&cur_id->id.route.addr.src_addr))
2017 return (EADDRNOTAVAIL);
2019 cur_sin = (struct sockaddr_in *)&cur_id->id.route.addr.src_addr;
2020 if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
2021 return (EADDRINUSE);
2024 cma_bind_port(bind_list, id_priv);
2028 static int cma_get_tcp_port(struct rdma_id_private *id_priv)
2033 ret = socreate(AF_INET, &so, SOCK_STREAM, IPPROTO_TCP,
2034 curthread->td_ucred, curthread);
2036 printf("%s socreate err %d\n", __FUNCTION__, ret);
2040 ret = sobind(so, (struct sockaddr *)&id_priv->id.route.addr.src_addr,
2050 static int cma_get_port(struct rdma_id_private *id_priv)
2055 switch (id_priv->id.ps) {
2061 ret = cma_get_tcp_port(id_priv); /* Synch with native stack */
2072 return (EPROTONOSUPPORT);
2076 if (cma_any_port(&id_priv->id.route.addr.src_addr))
2077 ret = cma_alloc_any_port(ps, id_priv);
2079 ret = cma_use_port(ps, id_priv);
2085 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2087 struct rdma_id_private *id_priv;
2090 if (addr->sa_family != AF_INET)
2091 return (EAFNOSUPPORT);
2093 id_priv = container_of(id, struct rdma_id_private, id);
2094 if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
2097 if (!cma_any_addr(addr)) {
2098 ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
2103 ret = cma_acquire_dev(id_priv);
2109 memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
2110 ret = cma_get_port(id_priv);
2116 if (!cma_any_addr(addr)) {
2118 cma_detach_from_dev(id_priv);
2122 cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
2127 static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
2128 struct rdma_route *route)
2130 struct sockaddr_in *src4, *dst4;
2131 struct cma_hdr *cma_hdr;
2132 struct sdp_hh *sdp_hdr;
2134 src4 = (struct sockaddr_in *) &route->addr.src_addr;
2135 dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
2140 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2142 sdp_set_ip_ver(sdp_hdr, 4);
2143 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2144 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2145 sdp_hdr->port = src4->sin_port;
2149 cma_hdr->cma_version = CMA_VERSION;
2150 cma_set_ip_ver(cma_hdr, 4);
2151 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2152 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2153 cma_hdr->port = src4->sin_port;
2159 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2160 struct ib_cm_event *ib_event)
2162 struct rdma_id_private *id_priv = cm_id->context;
2163 struct rdma_cm_event event;
2164 struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
2167 if (cma_disable_remove(id_priv, CMA_CONNECT))
2170 memset(&event, 0, sizeof event);
2171 switch (ib_event->event) {
2172 case IB_CM_SIDR_REQ_ERROR:
2173 event.event = RDMA_CM_EVENT_UNREACHABLE;
2174 event.status = ETIMEDOUT;
2176 case IB_CM_SIDR_REP_RECEIVED:
2177 event.param.ud.private_data = ib_event->private_data;
2178 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
2179 if (rep->status != IB_SIDR_SUCCESS) {
2180 event.event = RDMA_CM_EVENT_UNREACHABLE;
2181 event.status = ib_event->param.sidr_rep_rcvd.status;
2184 if (id_priv->qkey != rep->qkey) {
2185 event.event = RDMA_CM_EVENT_UNREACHABLE;
2186 event.status = EINVAL;
2189 ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
2190 id_priv->id.route.path_rec,
2191 &event.param.ud.ah_attr);
2192 event.param.ud.qp_num = rep->qpn;
2193 event.param.ud.qkey = rep->qkey;
2194 event.event = RDMA_CM_EVENT_ESTABLISHED;
2198 log(LOG_ERR, "RDMA CMA: unexpected IB CM event: %d",
2203 ret = id_priv->id.event_handler(&id_priv->id, &event);
2205 /* Destroy the CM ID by returning a non-zero value. */
2206 id_priv->cm_id.ib = NULL;
2207 cma_exch(id_priv, CMA_DESTROYING);
2208 cma_enable_remove(id_priv);
2209 rdma_destroy_id(&id_priv->id);
2213 cma_enable_remove(id_priv);
2217 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2218 struct rdma_conn_param *conn_param)
2220 struct ib_cm_sidr_req_param req;
2221 struct rdma_route *route;
2224 req.private_data_len = sizeof(struct cma_hdr) +
2225 conn_param->private_data_len;
2226 req.private_data = malloc(req.private_data_len, M_DEVBUF, M_NOWAIT);
2227 if (!req.private_data)
2229 bzero((void *)req.private_data, req.private_data_len);
2231 if (conn_param->private_data && conn_param->private_data_len)
2232 memcpy((caddr_t) req.private_data + sizeof(struct cma_hdr),
2233 conn_param->private_data, conn_param->private_data_len);
2235 route = &id_priv->id.route;
2236 ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
2240 id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
2241 cma_sidr_rep_handler, id_priv);
2242 if (IS_ERR(id_priv->cm_id.ib)) {
2243 ret = PTR_ERR(id_priv->cm_id.ib);
2247 req.path = route->path_rec;
2248 req.service_id = cma_get_service_id(id_priv->id.ps,
2249 &route->addr.dst_addr);
2250 req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
2251 req.max_cm_retries = CMA_MAX_CM_RETRIES;
2253 ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
2255 ib_destroy_cm_id(id_priv->cm_id.ib);
2256 id_priv->cm_id.ib = NULL;
2259 free(req.private_data, M_DEVBUF);
2263 static int cma_connect_ib(struct rdma_id_private *id_priv,
2264 struct rdma_conn_param *conn_param)
2266 struct ib_cm_req_param req;
2267 struct rdma_route *route;
2271 memset(&req, 0, sizeof req);
2272 offset = cma_user_data_offset(id_priv->id.ps);
2273 req.private_data_len = offset + conn_param->private_data_len;
2274 private_data = malloc(req.private_data_len, M_DEVBUF, M_NOWAIT);
2277 bzero(private_data, req.private_data_len);
2279 if (conn_param->private_data && conn_param->private_data_len)
2280 memcpy(private_data + offset, conn_param->private_data,
2281 conn_param->private_data_len);
2283 id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
2285 if (IS_ERR(id_priv->cm_id.ib)) {
2286 ret = PTR_ERR(id_priv->cm_id.ib);
2290 route = &id_priv->id.route;
2291 ret = cma_format_hdr(private_data, id_priv->id.ps, route);
2294 req.private_data = private_data;
2296 req.primary_path = &route->path_rec[0];
2297 if (route->num_paths == 2)
2298 req.alternate_path = &route->path_rec[1];
2300 req.service_id = cma_get_service_id(id_priv->id.ps,
2301 &route->addr.dst_addr);
2302 req.qp_num = id_priv->qp_num;
2303 req.qp_type = IB_QPT_RC;
2304 req.starting_psn = id_priv->seq_num;
2305 req.responder_resources = conn_param->responder_resources;
2306 req.initiator_depth = conn_param->initiator_depth;
2307 req.flow_control = conn_param->flow_control;
2308 req.retry_count = conn_param->retry_count;
2309 req.rnr_retry_count = conn_param->rnr_retry_count;
2310 req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2311 req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2312 req.max_cm_retries = CMA_MAX_CM_RETRIES;
2313 req.srq = id_priv->srq ? 1 : 0;
2315 ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
2317 if (ret && !IS_ERR(id_priv->cm_id.ib)) {
2318 ib_destroy_cm_id(id_priv->cm_id.ib);
2319 id_priv->cm_id.ib = NULL;
2322 free(private_data, M_DEVBUF);
2327 static int cma_connect_iw(struct rdma_id_private *id_priv,
2328 struct rdma_conn_param *conn_param)
2330 struct iw_cm_id *cm_id;
2331 struct sockaddr_in* sin;
2333 struct iw_cm_conn_param iw_param;
2335 cm_id = iw_create_cm_id(id_priv->id.device, id_priv->so,
2336 cma_iw_handler, id_priv);
2337 if (IS_ERR(cm_id)) {
2338 ret = PTR_ERR(cm_id);
2342 id_priv->cm_id.iw = cm_id;
2344 sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
2345 cm_id->local_addr = *sin;
2347 sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2348 cm_id->remote_addr = *sin;
2350 ret = cma_modify_qp_rtr(&id_priv->id);
2354 iw_param.ord = conn_param->initiator_depth;
2355 iw_param.ird = conn_param->responder_resources;
2356 iw_param.private_data = conn_param->private_data;
2357 iw_param.private_data_len = conn_param->private_data_len;
2359 iw_param.qpn = id_priv->qp_num;
2361 iw_param.qpn = conn_param->qp_num;
2362 ret = iw_cm_connect(cm_id, &iw_param);
2364 if (ret && !IS_ERR(cm_id)) {
2365 iw_destroy_cm_id(cm_id);
2366 id_priv->cm_id.iw = NULL;
2371 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2373 struct rdma_id_private *id_priv;
2376 id_priv = container_of(id, struct rdma_id_private, id);
2377 if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
2381 id_priv->qp_num = conn_param->qp_num;
2382 id_priv->srq = conn_param->srq;
2386 switch (rdma_node_get_transport(id->device->node_type)) {
2387 case RDMA_TRANSPORT_IB:
2388 if (cma_is_ud_ps(id->ps))
2389 ret = cma_resolve_ib_udp(id_priv, conn_param);
2391 ret = cma_connect_ib(id_priv, conn_param);
2393 case RDMA_TRANSPORT_IWARP:
2395 ret = cma_connect_iw(id_priv, conn_param);
2408 cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
2413 static int cma_accept_ib(struct rdma_id_private *id_priv,
2414 struct rdma_conn_param *conn_param)
2416 struct ib_cm_rep_param rep;
2417 struct ib_qp_attr qp_attr;
2418 int qp_attr_mask, ret;
2420 if (id_priv->id.qp) {
2421 ret = cma_modify_qp_rtr(&id_priv->id);
2425 qp_attr.qp_state = IB_QPS_RTS;
2426 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, &qp_attr,
2431 qp_attr.max_rd_atomic = conn_param->initiator_depth;
2432 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
2437 memset(&rep, 0, sizeof rep);
2438 rep.qp_num = id_priv->qp_num;
2439 rep.starting_psn = id_priv->seq_num;
2440 rep.private_data = conn_param->private_data;
2441 rep.private_data_len = conn_param->private_data_len;
2442 rep.responder_resources = conn_param->responder_resources;
2443 rep.initiator_depth = conn_param->initiator_depth;
2444 rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT;
2445 rep.failover_accepted = 0;
2446 rep.flow_control = conn_param->flow_control;
2447 rep.rnr_retry_count = conn_param->rnr_retry_count;
2448 rep.srq = id_priv->srq ? 1 : 0;
2450 ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
2456 static int cma_accept_iw(struct rdma_id_private *id_priv,
2457 struct rdma_conn_param *conn_param)
2459 struct iw_cm_conn_param iw_param;
2462 ret = cma_modify_qp_rtr(&id_priv->id);
2466 iw_param.ord = conn_param->initiator_depth;
2467 iw_param.ird = conn_param->responder_resources;
2468 iw_param.private_data = conn_param->private_data;
2469 iw_param.private_data_len = conn_param->private_data_len;
2470 if (id_priv->id.qp) {
2471 iw_param.qpn = id_priv->qp_num;
2473 iw_param.qpn = conn_param->qp_num;
2475 return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
2479 static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2480 enum ib_cm_sidr_status status,
2481 const void *private_data, int private_data_len)
2483 struct ib_cm_sidr_rep_param rep;
2485 memset(&rep, 0, sizeof rep);
2486 rep.status = status;
2487 if (status == IB_SIDR_SUCCESS) {
2488 rep.qp_num = id_priv->qp_num;
2489 rep.qkey = id_priv->qkey;
2491 rep.private_data = private_data;
2492 rep.private_data_len = private_data_len;
2494 return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
2498 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2500 struct rdma_id_private *id_priv;
2503 id_priv = container_of(id, struct rdma_id_private, id);
2504 if (!cma_comp(id_priv, CMA_CONNECT))
2507 if (!id->qp && conn_param) {
2508 id_priv->qp_num = conn_param->qp_num;
2509 id_priv->srq = conn_param->srq;
2513 switch (rdma_node_get_transport(id->device->node_type)) {
2514 case RDMA_TRANSPORT_IB:
2515 if (cma_is_ud_ps(id->ps))
2516 ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2517 conn_param->private_data,
2518 conn_param->private_data_len);
2519 else if (conn_param)
2520 ret = cma_accept_ib(id_priv, conn_param);
2522 ret = cma_rep_recv(id_priv);
2524 case RDMA_TRANSPORT_IWARP:
2526 ret = cma_accept_iw(id_priv, conn_param);
2540 cma_modify_qp_err(id);
2541 rdma_reject(id, NULL, 0);
2545 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
2547 struct rdma_id_private *id_priv;
2550 id_priv = container_of(id, struct rdma_id_private, id);
2551 if (!cma_has_cm_dev(id_priv))
2554 switch (id->device->node_type) {
2555 case RDMA_NODE_IB_CA:
2556 ret = ib_cm_notify(id_priv->cm_id.ib, event);
2568 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2569 u8 private_data_len)
2571 struct rdma_id_private *id_priv;
2574 id_priv = container_of(id, struct rdma_id_private, id);
2575 if (!cma_has_cm_dev(id_priv))
2579 switch (rdma_node_get_transport(id->device->node_type)) {
2580 case RDMA_TRANSPORT_IB:
2581 if (cma_is_ud_ps(id->ps))
2582 ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
2583 private_data, private_data_len);
2585 ret = ib_send_cm_rej(id_priv->cm_id.ib,
2586 IB_CM_REJ_CONSUMER_DEFINED, NULL,
2587 0, private_data, private_data_len);
2589 case RDMA_TRANSPORT_IWARP:
2591 ret = iw_cm_reject(id_priv->cm_id.iw,
2592 private_data, private_data_len);
2603 int rdma_disconnect(struct rdma_cm_id *id)
2605 struct rdma_id_private *id_priv;
2608 id_priv = container_of(id, struct rdma_id_private, id);
2609 if (!cma_has_cm_dev(id_priv))
2613 switch (rdma_node_get_transport(id->device->node_type)) {
2614 case RDMA_TRANSPORT_IB:
2615 ret = cma_modify_qp_err(id);
2618 /* Initiate or respond to a disconnect. */
2619 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
2620 ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
2622 case RDMA_TRANSPORT_IWARP:
2624 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
2637 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2639 struct rdma_id_private *id_priv;
2640 struct cma_multicast *mc = multicast->context;
2641 struct rdma_cm_event event;
2644 id_priv = mc->id_priv;
2645 if (cma_disable_remove(id_priv, CMA_ADDR_BOUND) &&
2646 cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
2649 if (!status && id_priv->id.qp)
2650 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
2651 multicast->rec.mlid);
2653 memset(&event, 0, sizeof event);
2654 event.status = status;
2655 event.param.ud.private_data = mc->context;
2657 event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
2658 ib_init_ah_from_mcmember(id_priv->id.device,
2659 id_priv->id.port_num, &multicast->rec,
2660 &event.param.ud.ah_attr);
2661 event.param.ud.qp_num = 0xFFFFFF;
2662 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
2664 event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
2666 ret = id_priv->id.event_handler(&id_priv->id, &event);
2668 cma_exch(id_priv, CMA_DESTROYING);
2669 cma_enable_remove(id_priv);
2670 rdma_destroy_id(&id_priv->id);
2674 cma_enable_remove(id_priv);
2678 static void cma_set_mgid(struct rdma_id_private *id_priv,
2679 struct sockaddr *addr, union ib_gid *mgid)
2681 unsigned char mc_map[MAX_ADDR_LEN];
2682 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2683 struct sockaddr_in *sin = (struct sockaddr_in *) addr;
2684 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
2686 if (cma_any_addr(addr)) {
2687 memset(mgid, 0, sizeof *mgid);
2688 } else if ((addr->sa_family == AF_INET6) &&
2689 ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
2691 /* IPv6 address is an SA assigned MGID. */
2692 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
2694 ip_ib_mc_map(sin->sin_addr.s_addr, mc_map);
2695 if (id_priv->id.ps == RDMA_PS_UDP)
2696 mc_map[7] = 0x01; /* Use RDMA CM signature */
2697 mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
2698 mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
2699 *mgid = *(union ib_gid *) (mc_map + 4);
2703 static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
2704 struct cma_multicast *mc)
2706 struct ib_sa_mcmember_rec rec;
2707 struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2708 ib_sa_comp_mask comp_mask;
2711 ib_addr_get_mgid(dev_addr, &rec.mgid);
2712 ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
2717 cma_set_mgid(id_priv, &mc->addr, &rec.mgid);
2718 if (id_priv->id.ps == RDMA_PS_UDP)
2719 rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
2720 ib_addr_get_sgid(dev_addr, &rec.port_gid);
2721 rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2724 comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
2725 IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
2726 IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
2727 IB_SA_MCMEMBER_REC_FLOW_LABEL |
2728 IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
2730 mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
2731 id_priv->id.port_num, &rec,
2732 comp_mask, M_NOWAIT,
2733 cma_ib_mc_handler, mc);
2734 if (IS_ERR(mc->multicast.ib))
2735 return PTR_ERR(mc->multicast.ib);
2740 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
2743 struct rdma_id_private *id_priv;
2744 struct cma_multicast *mc;
2747 id_priv = container_of(id, struct rdma_id_private, id);
2748 if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
2749 !cma_comp(id_priv, CMA_ADDR_RESOLVED))
2752 mc = malloc(sizeof *mc, M_DEVBUF, M_NOWAIT);
2756 memcpy(&mc->addr, addr, ip_addr_size(addr));
2757 mc->context = context;
2758 mc->id_priv = id_priv;
2760 mtx_lock(&id_priv->lock);
2761 LIST_INSERT_HEAD(&id_priv->mc_list, mc, list);
2762 mtx_unlock(&id_priv->lock);
2764 switch (rdma_node_get_transport(id->device->node_type)) {
2765 case RDMA_TRANSPORT_IB:
2766 ret = cma_join_ib_multicast(id_priv, mc);
2774 mtx_lock(&id_priv->lock);
2775 list_del(&mc->list);
2776 mtx_unlock(&id_priv->lock);
2782 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
2784 struct rdma_id_private *id_priv;
2785 struct cma_multicast *mc;
2787 id_priv = container_of(id, struct rdma_id_private, id);
2788 mtx_lock(&id_priv->lock);
2789 LIST_FOREACH(mc, &id_priv->mc_list, list) {
2790 if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
2791 list_del(&mc->list);
2792 mtx_unlock(&id_priv->lock);
2795 ib_detach_mcast(id->qp,
2796 &mc->multicast.ib->rec.mgid,
2797 mc->multicast.ib->rec.mlid);
2798 ib_sa_free_multicast(mc->multicast.ib, M_DEVBUF);
2803 mtx_unlock(&id_priv->lock);
2807 static void cma_add_one(struct ib_device *device)
2809 struct cma_device *cma_dev;
2810 struct rdma_id_private *id_priv;
2812 cma_dev = malloc(sizeof *cma_dev, M_DEVBUF, M_NOWAIT|M_ZERO);
2816 cma_dev->device = device;
2818 cv_init(&cma_dev->comp, "cma_device");
2819 mtx_init(&cma_dev->lock, "cma_device", NULL, MTX_DUPOK|MTX_DEF);
2820 cma_dev->refcount = 1;
2821 LIST_INIT(&cma_dev->id_list);
2822 ib_set_client_data(device, &cma_client, cma_dev);
2825 TAILQ_INSERT_TAIL(&dev_list, cma_dev, list);
2826 LIST_FOREACH(id_priv, &listen_any_list, list)
2827 cma_listen_on_dev(id_priv, cma_dev);
2831 static int cma_remove_id_dev(struct rdma_id_private *id_priv)
2833 struct rdma_cm_event event;
2834 enum cma_state state;
2836 /* Record that we want to remove the device */
2837 state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
2838 if (state == CMA_DESTROYING)
2841 cma_cancel_operation(id_priv, state);
2842 mtx_lock(&id_priv->lock);
2843 PANIC_IF(id_priv->dev_remove < 0);
2844 if (id_priv->dev_remove)
2845 cv_wait(&id_priv->wait_remove, &id_priv->lock);
2846 mtx_unlock(&id_priv->lock);
2848 /* Check for destruction from another callback. */
2849 if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
2852 memset(&event, 0, sizeof event);
2853 event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
2854 return id_priv->id.event_handler(&id_priv->id, &event);
2857 static void cma_process_remove(struct cma_device *cma_dev)
2859 struct rdma_id_private *id_priv;
2863 while (!LIST_EMPTY(&cma_dev->id_list)) {
2864 id_priv = LIST_FIRST(&cma_dev->id_list);
2866 if (cma_internal_listen(id_priv)) {
2867 cma_destroy_listen(id_priv);
2871 LIST_REMOVE(id_priv, list);
2872 mtx_lock(&id_priv->lock);
2873 id_priv->refcount++;
2874 mtx_unlock(&id_priv->lock);
2877 ret = cma_remove_id_dev(id_priv);
2878 cma_deref_id(id_priv);
2880 rdma_destroy_id(&id_priv->id);
2886 cma_deref_dev(cma_dev);
2887 mtx_lock(&cma_dev->lock);
2888 PANIC_IF(cma_dev->refcount < 0);
2889 if (cma_dev->refcount)
2890 cv_wait(&cma_dev->comp, &cma_dev->lock);
2891 mtx_unlock(&cma_dev->lock);
2894 static void cma_remove_one(struct ib_device *device)
2896 struct cma_device *cma_dev;
2898 cma_dev = ib_get_client_data(device, &cma_client);
2903 TAILQ_REMOVE(&dev_list, cma_dev, list);
2906 cma_process_remove(cma_dev);
2907 free(cma_dev, M_DEVBUF);
2910 static int cma_init(void)
2914 LIST_INIT(&listen_any_list);
2915 TAILQ_INIT(&dev_list);
2916 mtx_init(&lock, "cma_device list", NULL, MTX_DEF);
2918 arc4rand(&next_port, sizeof next_port, 0);
2919 next_port = ((unsigned int) next_port %
2920 (ipport_lastauto - ipport_firstauto)) +
2922 cma_wq = taskqueue_create("rdma_cm", M_NOWAIT, taskqueue_thread_enqueue,
2928 taskqueue_start_threads(&cma_wq, 1, PI_NET, "cma_wq thread");
2930 ib_sa_register_client(&sa_client);
2932 rdma_addr_register_client(&addr_client);
2934 ret = ib_register_client(&cma_client);
2940 rdma_addr_unregister_client(&addr_client);
2942 ib_sa_unregister_client(&sa_client);
2944 taskqueue_free(cma_wq);
2948 static void cma_cleanup(void)
2950 ib_unregister_client(&cma_client);
2951 rdma_addr_unregister_client(&addr_client);
2953 ib_sa_unregister_client(&sa_client);
2955 taskqueue_free(cma_wq);
2959 kvl_free(&ipoib_ps);
2963 cma_load(module_t mod, int cmd, void *arg)
2969 printf("Loading rdma_cma.\n");
2975 printf("Unloading rdma_cma.\n");
2988 static moduledata_t mod_data = {
2994 MODULE_VERSION(rdma_cma, 1);
2995 MODULE_DEPEND(rdma_cma, rdma_core, 1, 1, 1);
2996 MODULE_DEPEND(rdma_cma, rdma_addr, 1, 1, 1);
2997 MODULE_DEPEND(rdma_cma, rdma_iwcm, 1, 1, 1);
2998 DECLARE_MODULE(rdma_cma, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);