]> CyberLeo.Net >> Repos - FreeBSD/releng/7.2.git/blob - sys/contrib/rdma/rdma_cma.c
Create releng/7.2 from stable/7 in preparation for 7.2-RELEASE.
[FreeBSD/releng/7.2.git] / sys / contrib / rdma / rdma_cma.c
1 /*
2  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5  * Copyright (c) 2005-2006 Intel Corporation.  All rights reserved.
6  *
7  * This Software is licensed under one of the following licenses:
8  *
9  * 1) under the terms of the "Common Public License 1.0" a copy of which is
10  *    available from the Open Source Initiative, see
11  *    http://www.opensource.org/licenses/cpl.php.
12  *
13  * 2) under the terms of the "The BSD License" a copy of which is
14  *    available from the Open Source Initiative, see
15  *    http://www.opensource.org/licenses/bsd-license.php.
16  *
17  * 3) under the terms of the "GNU General Public License (GPL) Version 2" a
18  *    copy of which is available from the Open Source Initiative, see
19  *    http://www.opensource.org/licenses/gpl-license.php.
20  *
21  * Licensee has the right to choose one of the above licenses.
22  *
23  * Redistributions of source code must retain the above copyright
24  * notice and one of the license notices.
25  *
26  * Redistributions in binary form must reproduce both the above copyright
27  * notice, one of the license notices in the documentation
28  * and/or other materials provided with the distribution.
29  *
30  */
31
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34
35 #include <sys/param.h>
36 #include <sys/condvar.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/libkern.h>
40 #include <sys/socket.h>
41 #include <sys/module.h>
42 #include <sys/lock.h>
43 #include <sys/mutex.h>
44 #include <sys/rwlock.h>
45 #include <sys/queue.h>
46 #include <sys/taskqueue.h>
47 #include <sys/priv.h>
48 #include <sys/syslog.h>
49
50 #include <netinet/in.h>
51 #include <netinet/in_pcb.h>
52
53 #include <contrib/rdma/rdma_cm.h>
54 #include <contrib/rdma/ib_cache.h>
55 #include <contrib/rdma/ib_cm.h>
56 #include <contrib/rdma/ib_sa.h>
57 #include <contrib/rdma/iw_cm.h>
58
59 #define CMA_CM_RESPONSE_TIMEOUT 20
60 #define CMA_MAX_CM_RETRIES 15
61
62 static void cma_add_one(struct ib_device *device);
63 static void cma_remove_one(struct ib_device *device);
64
65 static struct ib_client cma_client = {
66         .name   = "cma",
67         .add    = cma_add_one,
68         .remove = cma_remove_one
69 };
70
71 #ifdef IB_SUPPORTED
72 static struct ib_sa_client sa_client;
73 #endif
74 static struct rdma_addr_client addr_client;
75 static TAILQ_HEAD(, cma_device) dev_list;
76 static LIST_HEAD(, rdma_id_private) listen_any_list;
77 static struct mtx lock;
78 static struct taskqueue *cma_wq;
79 static DEFINE_KVL(sdp_ps);
80 static DEFINE_KVL(tcp_ps);
81 static DEFINE_KVL(udp_ps);
82 static DEFINE_KVL(ipoib_ps);
83 static int next_port;
84
85 struct cma_device {
86         struct ib_device        *device;
87         struct mtx              lock;
88         struct cv               comp;
89         int                     refcount;
90
91         LIST_HEAD(, rdma_id_private) id_list;
92         TAILQ_ENTRY(cma_device) list;
93 };
94
95 enum cma_state {
96         CMA_IDLE,
97         CMA_ADDR_QUERY,
98         CMA_ADDR_RESOLVED,
99         CMA_ROUTE_QUERY,
100         CMA_ROUTE_RESOLVED,
101         CMA_CONNECT,
102         CMA_DISCONNECT,
103         CMA_ADDR_BOUND,
104         CMA_LISTEN,
105         CMA_DEVICE_REMOVAL,
106         CMA_DESTROYING
107 };
108
109 struct rdma_bind_list {
110         struct kvl              *ps;
111         TAILQ_HEAD(, rdma_id_private) owners;
112         unsigned short          port;
113 };
114
115 /*
116  * Device removal can occur at anytime, so we need extra handling to
117  * serialize notifying the user of device removal with other callbacks.
118  * We do this by disabling removal notification while a callback is in process,
119  * and reporting it after the callback completes.
120  */
121 struct rdma_id_private {
122         struct rdma_cm_id       id;
123
124         struct rdma_bind_list   *bind_list;
125         struct socket           *so;
126         TAILQ_ENTRY(rdma_id_private) node;
127         LIST_ENTRY(rdma_id_private) list; /* listen_any_list or cma_dev.list */
128         LIST_HEAD(, rdma_id_private) listen_list; /* per-device listens */
129         LIST_ENTRY(rdma_id_private) listen_entry; 
130         struct cma_device       *cma_dev;
131 #ifdef IB_SUPPORTED     
132         LIST_HEAD(, cma_multicast) mc_list;
133 #endif
134         enum cma_state          state;
135         struct mtx              lock;
136         struct cv               comp;
137         int                     refcount;
138         struct cv               wait_remove;
139         int                     dev_remove;
140
141         int                     backlog;
142         int                     timeout_ms;
143         struct ib_sa_query      *query;
144         int                     query_id;
145         union {
146                 struct ib_cm_id *ib;
147                 struct iw_cm_id *iw;
148         } cm_id;
149
150         u32                     seq_num;
151         u32                     qkey;
152         u32                     qp_num;
153         u8                      srq;
154 };
155
156 #ifdef IB_SUPPORTED
157 struct cma_multicast {
158         struct rdma_id_private *id_priv;
159         union {
160                 struct ib_sa_multicast *ib;
161         } multicast;
162         struct list_head        list;
163         void                    *context;
164         struct sockaddr         addr;
165         u8                      pad[sizeof(struct sockaddr_in6) -
166                                     sizeof(struct sockaddr)];
167 };
168 #endif
169
170 struct cma_work {
171         struct task             task;
172         struct rdma_id_private  *id;
173         enum cma_state          old_state;
174         enum cma_state          new_state;
175         struct rdma_cm_event    event;
176 };
177
178 union cma_ip_addr {
179         struct in6_addr ip6;
180         struct {
181                 __u32 pad[3];
182                 __u32 addr;
183         } ip4;
184 };
185
186 struct cma_hdr {
187         u8 cma_version;
188         u8 ip_version;  /* IP version: 7:4 */
189         __u16 port;
190         union cma_ip_addr src_addr;
191         union cma_ip_addr dst_addr;
192 };
193
194 struct sdp_hh {
195         u8 bsdh[16];
196         u8 sdp_version; /* Major version: 7:4 */
197         u8 ip_version;  /* IP version: 7:4 */
198         u8 sdp_specific1[10];
199         __u16 port;
200         __u16 sdp_specific2;
201         union cma_ip_addr src_addr;
202         union cma_ip_addr dst_addr;
203 };
204
205 struct sdp_hah {
206         u8 bsdh[16];
207         u8 sdp_version;
208 };
209
210 #define CMA_VERSION 0x00
211 #define SDP_MAJ_VERSION 0x2
212
213 static int cma_comp(struct rdma_id_private *id_priv, enum cma_state comp)
214 {
215         int ret;
216
217         mtx_lock(&id_priv->lock);
218         ret = (id_priv->state == comp);
219         mtx_unlock(&id_priv->lock);
220         return ret;
221 }
222
223 static int cma_comp_exch(struct rdma_id_private *id_priv,
224                          enum cma_state comp, enum cma_state exch)
225 {
226         int ret;
227
228         mtx_lock(&id_priv->lock);
229         if ((ret = (id_priv->state == comp)))
230                 id_priv->state = exch;
231         mtx_unlock(&id_priv->lock);
232         return ret;
233 }
234
235 static enum cma_state cma_exch(struct rdma_id_private *id_priv,
236                                enum cma_state exch)
237 {
238         enum cma_state old;
239
240         mtx_lock(&id_priv->lock);
241         old = id_priv->state;
242         id_priv->state = exch;
243         mtx_unlock(&id_priv->lock);
244         return old;
245 }
246
247 static inline u8 cma_get_ip_ver(struct cma_hdr *hdr)
248 {
249         return hdr->ip_version >> 4;
250 }
251
252 static inline void cma_set_ip_ver(struct cma_hdr *hdr, u8 ip_ver)
253 {
254         hdr->ip_version = (ip_ver << 4) | (hdr->ip_version & 0xF);
255 }
256
257 static inline u8 sdp_get_majv(u8 sdp_version)
258 {
259         return sdp_version >> 4;
260 }
261
262 static inline u8 sdp_get_ip_ver(struct sdp_hh *hh)
263 {
264         return hh->ip_version >> 4;
265 }
266
267 static inline void sdp_set_ip_ver(struct sdp_hh *hh, u8 ip_ver)
268 {
269         hh->ip_version = (ip_ver << 4) | (hh->ip_version & 0xF);
270 }
271
272 static inline int cma_is_ud_ps(enum rdma_port_space ps)
273 {
274         return (ps == RDMA_PS_UDP || ps == RDMA_PS_IPOIB);
275 }
276
277 static void cma_attach_to_dev(struct rdma_id_private *id_priv,
278                               struct cma_device *cma_dev)
279 {
280         mtx_lock(&cma_dev->lock);
281         cma_dev->refcount++;
282         mtx_unlock(&cma_dev->lock);
283         id_priv->cma_dev = cma_dev;
284         id_priv->id.device = cma_dev->device;
285         LIST_INSERT_HEAD(&cma_dev->id_list, id_priv, list);
286 }
287
288 static inline void cma_deref_dev(struct cma_device *cma_dev)
289 {
290         mtx_lock(&cma_dev->lock);
291         if (--cma_dev->refcount == 0)
292                 cv_broadcast(&cma_dev->comp);
293         mtx_unlock(&cma_dev->lock);
294 }
295
296 static void cma_detach_from_dev(struct rdma_id_private *id_priv)
297 {
298         LIST_REMOVE(id_priv, list);
299         cma_deref_dev(id_priv->cma_dev);
300         id_priv->cma_dev = NULL;
301 }
302
303 #ifdef IB_SUPPORTED
304 static int cma_set_qkey(struct ib_device *device, u8 port_num,
305                         enum rdma_port_space ps,
306                         struct rdma_dev_addr *dev_addr, u32 *qkey)
307 {
308         struct ib_sa_mcmember_rec rec;
309         int ret = 0;
310
311         switch (ps) {
312         case RDMA_PS_UDP:
313                 *qkey = RDMA_UDP_QKEY;
314                 break;
315         case RDMA_PS_IPOIB:
316                 ib_addr_get_mgid(dev_addr, &rec.mgid);
317                 ret = ib_sa_get_mcmember_rec(device, port_num, &rec.mgid, &rec);
318                 *qkey = be32_to_cpu(rec.qkey);
319                 break;
320         default:
321                 break;
322         }
323         return ret;
324 }
325 #endif
326
327 static int cma_acquire_dev(struct rdma_id_private *id_priv)
328 {
329         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
330         struct cma_device *cma_dev;
331         union ib_gid gid;
332         int ret = ENODEV;
333
334         switch (rdma_node_get_transport(dev_addr->dev_type)) {
335 #ifdef IB_SUPPORTED
336         case RDMA_TRANSPORT_IB:
337                 ib_addr_get_sgid(dev_addr, &gid);
338                 break;
339 #endif
340         case RDMA_TRANSPORT_IWARP:
341                 iw_addr_get_sgid(dev_addr, &gid);
342                 break;
343         default:
344                 return (ENODEV);
345         }
346
347         TAILQ_FOREACH(cma_dev, &dev_list, list) {
348                 ret = ib_find_cached_gid(cma_dev->device, &gid,
349                                          &id_priv->id.port_num, NULL);
350                 if (!ret) {
351 #ifdef IB_SUPPORTED
352                         ret = cma_set_qkey(cma_dev->device,
353                                            id_priv->id.port_num,
354                                            id_priv->id.ps, dev_addr,
355                                            &id_priv->qkey);
356                         if (!ret)
357 #endif
358                                 cma_attach_to_dev(id_priv, cma_dev);
359                         break;
360                 }
361         }
362         return ret;
363 }
364
365 static void cma_deref_id(struct rdma_id_private *id_priv)
366 {
367         mtx_lock(&id_priv->lock);
368         if (--id_priv->refcount == 0) {
369                 cv_broadcast(&id_priv->comp);
370         }
371         mtx_unlock(&id_priv->lock);
372 }
373
374 static int cma_disable_remove(struct rdma_id_private *id_priv,
375                               enum cma_state state)
376 {
377         int ret;
378
379         mtx_lock(&id_priv->lock);
380         if (id_priv->state == state) {
381                 id_priv->dev_remove++;
382                 ret = 0;
383         } else
384                 ret = EINVAL;
385         mtx_unlock(&id_priv->lock);
386         return ret;
387 }
388
389 static void cma_enable_remove(struct rdma_id_private *id_priv)
390 {
391         mtx_lock(&id_priv->lock);
392         if (--id_priv->dev_remove == 0)
393                 cv_broadcast(&id_priv->wait_remove);
394         mtx_unlock(&id_priv->lock);
395 }
396
397 static int cma_has_cm_dev(struct rdma_id_private *id_priv)
398 {
399         return (id_priv->id.device && id_priv->cm_id.ib);
400 }
401
402 struct rdma_cm_id *rdma_create_id(rdma_cm_event_handler event_handler,
403                                   void *context, enum rdma_port_space ps)
404 {
405         struct rdma_id_private *id_priv;
406
407         id_priv = malloc(sizeof *id_priv, M_DEVBUF, M_NOWAIT);
408         if (!id_priv)
409                 return ERR_PTR(-ENOMEM);
410         bzero(id_priv, sizeof *id_priv);
411
412         id_priv->state = CMA_IDLE;
413         id_priv->id.context = context;
414         id_priv->id.event_handler = event_handler;
415         id_priv->id.ps = ps;
416         mtx_init(&id_priv->lock, "rdma_cm_id_priv", NULL, MTX_DUPOK|MTX_DEF);
417         cv_init(&id_priv->comp, "rdma_cm_id_priv");
418         id_priv->refcount = 1;
419         cv_init(&id_priv->wait_remove, "id priv wait remove");
420         LIST_INIT(&id_priv->listen_list);
421         arc4rand(&id_priv->seq_num, sizeof id_priv->seq_num, 0);
422
423         return &id_priv->id;
424 }
425
426 static int cma_init_ud_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
427 {
428         struct ib_qp_attr qp_attr;
429         int qp_attr_mask, ret;
430
431         qp_attr.qp_state = IB_QPS_INIT;
432         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
433         if (ret)
434                 return ret;
435
436         ret = ib_modify_qp(qp, &qp_attr, qp_attr_mask);
437         if (ret)
438                 return ret;
439
440         qp_attr.qp_state = IB_QPS_RTR;
441         ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE);
442         if (ret)
443                 return ret;
444
445         qp_attr.qp_state = IB_QPS_RTS;
446         qp_attr.sq_psn = 0;
447         ret = ib_modify_qp(qp, &qp_attr, IB_QP_STATE | IB_QP_SQ_PSN);
448
449         return ret;
450 }
451
452 static int cma_init_conn_qp(struct rdma_id_private *id_priv, struct ib_qp *qp)
453 {
454         struct ib_qp_attr qp_attr;
455         int qp_attr_mask, ret;
456
457         qp_attr.qp_state = IB_QPS_INIT;
458         ret = rdma_init_qp_attr(&id_priv->id, &qp_attr, &qp_attr_mask);
459         if (ret)
460                 return ret;
461
462         return ib_modify_qp(qp, &qp_attr, qp_attr_mask);
463 }
464
465 int rdma_create_qp(struct rdma_cm_id *id, struct ib_pd *pd,
466                    struct ib_qp_init_attr *qp_init_attr)
467 {
468         struct rdma_id_private *id_priv;
469         struct ib_qp *qp;
470         int ret;
471
472         id_priv = container_of(id, struct rdma_id_private, id);
473         if (id->device != pd->device)
474                 return (EINVAL);
475
476         qp = ib_create_qp(pd, qp_init_attr);
477         if (IS_ERR(qp))
478                 return PTR_ERR(qp);
479         if (cma_is_ud_ps(id_priv->id.ps))
480                 ret = cma_init_ud_qp(id_priv, qp);
481         else
482                 ret = cma_init_conn_qp(id_priv, qp);
483         if (ret)
484                 goto err;
485
486         id->qp = qp;
487         id_priv->qp_num = qp->qp_num;
488         id_priv->srq = (qp->srq != NULL);
489         return 0;
490 err:
491         ib_destroy_qp(qp);
492         return ret;
493 }
494
495 void rdma_destroy_qp(struct rdma_cm_id *id)
496 {
497         ib_destroy_qp(id->qp);
498 }
499
500 static int cma_modify_qp_rtr(struct rdma_cm_id *id)
501 {
502         struct ib_qp_attr qp_attr;
503         int qp_attr_mask, ret;
504
505         if (!id->qp)
506                 return 0;
507
508         /* Need to update QP attributes from default values. */
509         qp_attr.qp_state = IB_QPS_INIT;
510         ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
511         if (ret)
512                 return ret;
513
514         ret = ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
515         if (ret)
516                 return ret;
517
518         qp_attr.qp_state = IB_QPS_RTR;
519         ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
520         if (ret)
521                 return ret;
522
523         return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
524 }
525
526 #ifdef IB_SUPPORTED
527 static int cma_modify_qp_rts(struct rdma_cm_id *id)
528 {
529         struct ib_qp_attr qp_attr;
530         int qp_attr_mask, ret;
531
532         if (!id->qp)
533                 return 0;
534
535         qp_attr.qp_state = IB_QPS_RTS;
536         ret = rdma_init_qp_attr(id, &qp_attr, &qp_attr_mask);
537         if (ret)
538                 return ret;
539
540         return ib_modify_qp(id->qp, &qp_attr, qp_attr_mask);
541 }
542 #endif
543
544 static int cma_modify_qp_err(struct rdma_cm_id *id)
545 {
546         struct ib_qp_attr qp_attr;
547
548         if (!id->qp)
549                 return 0;
550
551         qp_attr.qp_state = IB_QPS_ERR;
552         return ib_modify_qp(id->qp, &qp_attr, IB_QP_STATE);
553 }
554
555 #ifdef IB_SUPPORTED
556 static int cma_ib_init_qp_attr(struct rdma_id_private *id_priv,
557                                struct ib_qp_attr *qp_attr, int *qp_attr_mask)
558 {
559         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
560         int ret;
561
562         ret = ib_find_cached_pkey(id_priv->id.device, id_priv->id.port_num,
563                                   ib_addr_get_pkey(dev_addr),
564                                   &qp_attr->pkey_index);
565         if (ret)
566                 return ret;
567
568         qp_attr->port_num = id_priv->id.port_num;
569         *qp_attr_mask = IB_QP_STATE | IB_QP_PKEY_INDEX | IB_QP_PORT;
570
571         if (cma_is_ud_ps(id_priv->id.ps)) {
572                 qp_attr->qkey = id_priv->qkey;
573                 *qp_attr_mask |= IB_QP_QKEY;
574         } else {
575                 qp_attr->qp_access_flags = 0;
576                 *qp_attr_mask |= IB_QP_ACCESS_FLAGS;
577         }
578         return 0;
579 }
580 #endif
581
582 int rdma_init_qp_attr(struct rdma_cm_id *id, struct ib_qp_attr *qp_attr,
583                        int *qp_attr_mask)
584 {
585         struct rdma_id_private *id_priv;
586         int ret = 0;
587
588         id_priv = container_of(id, struct rdma_id_private, id);
589 #ifdef IB_SUPPORTED
590         switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
591         case RDMA_TRANSPORT_IB:
592                 if (!id_priv->cm_id.ib || cma_is_ud_ps(id_priv->id.ps))
593                         ret = cma_ib_init_qp_attr(id_priv, qp_attr, qp_attr_mask);
594                 else
595                         ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, qp_attr,
596                                                  qp_attr_mask);
597                 if (qp_attr->qp_state == IB_QPS_RTR)
598                         qp_attr->rq_psn = id_priv->seq_num;
599                 break;
600         case RDMA_TRANSPORT_IWARP:
601 #endif
602                 if (!id_priv->cm_id.iw) {
603                         qp_attr->qp_access_flags = IB_ACCESS_LOCAL_WRITE;
604                         *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS;
605                 } else
606                         ret = iw_cm_init_qp_attr(id_priv->cm_id.iw, qp_attr,
607                                                  qp_attr_mask);
608 #ifdef IB_SUPPORTED
609                 break;
610         default:
611                 ret = ENOSYS;
612                 break;
613         }
614 #endif
615
616         return ret;
617 }
618
619 static inline int cma_zero_addr(struct sockaddr *addr)
620 {
621         struct in6_addr *ip6;
622
623         if (addr->sa_family == AF_INET)
624                 return in_nullhost(((struct sockaddr_in *) addr)->sin_addr);
625         else {
626                 ip6 = &((struct sockaddr_in6 *) addr)->sin6_addr;
627                 return (ip6->s6_addr32[0] | ip6->s6_addr32[1] |
628                         ip6->s6_addr32[2] | ip6->s6_addr32[3]) == 0;
629         }
630 }
631
632 static inline int cma_loopback_addr(struct sockaddr *addr)
633 {
634         return ((struct sockaddr_in *)addr)->sin_addr.s_addr == INADDR_LOOPBACK;
635 }
636
637 static inline int cma_any_addr(struct sockaddr *addr)
638 {
639         return cma_zero_addr(addr) || cma_loopback_addr(addr);
640 }
641
642 static inline __be16 cma_port(struct sockaddr *addr)
643 {
644         if (addr->sa_family == AF_INET)
645                 return ((struct sockaddr_in *) addr)->sin_port;
646         else
647                 return ((struct sockaddr_in6 *) addr)->sin6_port;
648 }
649
650 static inline int cma_any_port(struct sockaddr *addr)
651 {
652         return !cma_port(addr);
653 }
654
655 #ifdef IB_SUPPORTED
656 static int cma_get_net_info(void *hdr, enum rdma_port_space ps,
657                             u8 *ip_ver, __u16 *port,
658                             union cma_ip_addr **src, union cma_ip_addr **dst)
659 {
660         switch (ps) {
661         case RDMA_PS_SDP:
662                 if (sdp_get_majv(((struct sdp_hh *) hdr)->sdp_version) !=
663                     SDP_MAJ_VERSION)
664                         return (EINVAL);
665
666                 *ip_ver = sdp_get_ip_ver(hdr);
667                 *port   = ((struct sdp_hh *) hdr)->port;
668                 *src    = &((struct sdp_hh *) hdr)->src_addr;
669                 *dst    = &((struct sdp_hh *) hdr)->dst_addr;
670                 break;
671         default:
672                 if (((struct cma_hdr *) hdr)->cma_version != CMA_VERSION)
673                         return (EINVAL);
674
675                 *ip_ver = cma_get_ip_ver(hdr);
676                 *port   = ((struct cma_hdr *) hdr)->port;
677                 *src    = &((struct cma_hdr *) hdr)->src_addr;
678                 *dst    = &((struct cma_hdr *) hdr)->dst_addr;
679                 break;
680         }
681
682         if (*ip_ver != 4 && *ip_ver != 6)
683                 return (EINVAL);
684         return 0;
685 }
686
687 static void cma_save_net_info(struct rdma_addr *addr,
688                               struct rdma_addr *listen_addr,
689                               u8 ip_ver, __u16 port,
690                               union cma_ip_addr *src, union cma_ip_addr *dst)
691 {
692         struct sockaddr_in *listen4, *ip4;
693         struct sockaddr_in6 *listen6, *ip6;
694
695         switch (ip_ver) {
696         case 4:
697                 listen4 = (struct sockaddr_in *) &listen_addr->src_addr;
698                 ip4 = (struct sockaddr_in *) &addr->src_addr;
699                 ip4->sin_family = listen4->sin_family;
700                 ip4->sin_addr.s_addr = dst->ip4.addr;
701                 ip4->sin_port = listen4->sin_port;
702
703                 ip4 = (struct sockaddr_in *) &addr->dst_addr;
704                 ip4->sin_family = listen4->sin_family;
705                 ip4->sin_addr.s_addr = src->ip4.addr;
706                 ip4->sin_port = port;
707                 break;
708         case 6:
709                 listen6 = (struct sockaddr_in6 *) &listen_addr->src_addr;
710                 ip6 = (struct sockaddr_in6 *) &addr->src_addr;
711                 ip6->sin6_family = listen6->sin6_family;
712                 ip6->sin6_addr = dst->ip6;
713                 ip6->sin6_port = listen6->sin6_port;
714
715                 ip6 = (struct sockaddr_in6 *) &addr->dst_addr;
716                 ip6->sin6_family = listen6->sin6_family;
717                 ip6->sin6_addr = src->ip6;
718                 ip6->sin6_port = port;
719                 break;
720         default:
721                 break;
722         }
723 }
724 #endif
725
726 static inline int cma_user_data_offset(enum rdma_port_space ps)
727 {
728         switch (ps) {
729         case RDMA_PS_SDP:
730                 return 0;
731         default:
732                 return sizeof(struct cma_hdr);
733         }
734 }
735
736 static void cma_cancel_route(struct rdma_id_private *id_priv)
737 {
738 #ifdef IB_SUPPORTED
739         switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
740         case RDMA_TRANSPORT_IB:
741                 if (id_priv->query)
742                         ib_sa_cancel_query(id_priv->query_id, id_priv->query);
743                 break;
744         default:
745                 break;
746         }
747 #endif
748 }
749
750 static inline int cma_internal_listen(struct rdma_id_private *id_priv)
751 {
752         return (id_priv->state == CMA_LISTEN) && id_priv->cma_dev &&
753                cma_any_addr(&id_priv->id.route.addr.src_addr);
754 }
755
756 static void cma_destroy_listen(struct rdma_id_private *id_priv)
757 {
758         cma_exch(id_priv, CMA_DESTROYING);
759
760         if (id_priv->cma_dev) {
761 #ifdef IB_SUPPORTED
762                 switch (rdma_node_get_transport(id_priv->id.device->node_type)) {
763                 case RDMA_TRANSPORT_IB:
764                         if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
765                                 ib_destroy_cm_id(id_priv->cm_id.ib);
766                         break;
767                 case RDMA_TRANSPORT_IWARP:
768 #endif
769                         if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
770                                 iw_destroy_cm_id(id_priv->cm_id.iw);
771 #ifdef IB_SUPPORTED
772                         break;
773                 default:
774                         break;
775                 }
776 #endif
777                 cma_detach_from_dev(id_priv);
778         }
779         LIST_REMOVE(id_priv, listen_entry);
780
781         cma_deref_id(id_priv);
782         mtx_lock(&id_priv->lock);
783         if (id_priv->refcount)
784                 cv_wait(&id_priv->comp, &id_priv->lock);
785         mtx_unlock(&id_priv->lock);
786
787         free(id_priv, M_DEVBUF);
788 }
789
790 static void cma_cancel_listens(struct rdma_id_private *id_priv)
791 {
792         struct rdma_id_private *dev_id_priv;
793
794         mtx_lock(&lock);
795         LIST_REMOVE(id_priv, list);
796
797         while (!LIST_EMPTY(&id_priv->listen_list)) {
798                 dev_id_priv = LIST_FIRST(&id_priv->listen_list);
799                 cma_destroy_listen(dev_id_priv);
800         }
801         mtx_unlock(&lock);
802 }
803
804 static void cma_cancel_operation(struct rdma_id_private *id_priv,
805                                  enum cma_state state)
806 {
807         switch (state) {
808         case CMA_ADDR_QUERY:
809                 rdma_addr_cancel(&id_priv->id.route.addr.dev_addr);
810                 break;
811         case CMA_ROUTE_QUERY:
812                 cma_cancel_route(id_priv);
813                 break;
814         case CMA_LISTEN:
815                 if (cma_any_addr(&id_priv->id.route.addr.src_addr) &&
816                     !id_priv->cma_dev)
817                         cma_cancel_listens(id_priv);
818                 break;
819         default:
820                 break;
821         }
822 }
823
824 static void cma_release_port(struct rdma_id_private *id_priv)
825 {
826         struct rdma_bind_list *bind_list = id_priv->bind_list;
827
828         if (!bind_list)
829                 return;
830
831         mtx_lock(&lock);
832         TAILQ_REMOVE(&bind_list->owners, id_priv, node);
833         if (TAILQ_EMPTY(&bind_list->owners)) {
834                 kvl_delete(bind_list->ps, bind_list->port);
835                 free(bind_list, M_DEVBUF);
836         }
837         mtx_unlock(&lock);
838         if (id_priv->so)
839                 soclose(id_priv->so);
840 }
841
842 #ifdef IB_SUPPORTED
843 static void cma_leave_mc_groups(struct rdma_id_private *id_priv)
844 {
845         struct cma_multicast *mc;
846
847         while (!LIST_EMPTY(&id_priv->mc_list)) {
848                 mc = LIST_FIRST(&id_priv->mc_list);
849                 LIST_REMOVE(mc, list);
850                 ib_sa_free_multicast(mc->multicast.ib);
851                 free(mc, M_DEVBUF);
852         }
853 }
854 #endif
855
856 void rdma_destroy_id(struct rdma_cm_id *id)
857 {
858         struct rdma_id_private *id_priv;
859         enum cma_state state;
860
861         id_priv = container_of(id, struct rdma_id_private, id);
862         state = cma_exch(id_priv, CMA_DESTROYING);
863         cma_cancel_operation(id_priv, state);
864
865         mtx_lock(&lock);
866         if (id_priv->cma_dev) {
867                 mtx_unlock(&lock);
868 #ifdef IB_SUPPORTED
869                 switch (rdma_node_get_transport(id->device->node_type)) {
870                 case RDMA_TRANSPORT_IB:
871                         if (id_priv->cm_id.ib && !IS_ERR(id_priv->cm_id.ib))
872                                 ib_destroy_cm_id(id_priv->cm_id.ib);
873                         break;
874                 case RDMA_TRANSPORT_IWARP:
875 #endif
876                         if (id_priv->cm_id.iw && !IS_ERR(id_priv->cm_id.iw))
877                                 iw_destroy_cm_id(id_priv->cm_id.iw);
878 #ifdef IB_SUPPORTED
879                         break;
880                 default:
881                         break;
882                 }
883                 cma_leave_mc_groups(id_priv);
884 #endif
885                 mtx_lock(&lock);
886                 cma_detach_from_dev(id_priv);
887         }
888         mtx_unlock(&lock);
889         cma_release_port(id_priv);
890         cma_deref_id(id_priv);
891         mtx_lock(&id_priv->lock);
892         PANIC_IF(id_priv->refcount < 0);
893         if (id_priv->refcount)
894                 cv_wait(&id_priv->comp, &id_priv->lock);
895         mtx_unlock(&id_priv->lock);
896         free(id_priv->id.route.path_rec, M_DEVBUF);
897         free(id_priv, M_DEVBUF);
898 }
899
900 #ifdef IB_SUPPORTED
901 static int cma_rep_recv(struct rdma_id_private *id_priv)
902 {
903         int ret;
904
905         ret = cma_modify_qp_rtr(&id_priv->id);
906         if (ret)
907                 goto reject;
908
909         ret = cma_modify_qp_rts(&id_priv->id);
910         if (ret)
911                 goto reject;
912
913         ret = ib_send_cm_rtu(id_priv->cm_id.ib, NULL, 0);
914         if (ret)
915                 goto reject;
916
917         return 0;
918 reject:
919         cma_modify_qp_err(&id_priv->id);
920         ib_send_cm_rej(id_priv->cm_id.ib, IB_CM_REJ_CONSUMER_DEFINED,
921                        NULL, 0, NULL, 0);
922         return ret;
923 }
924
925 static int cma_verify_rep(struct rdma_id_private *id_priv, void *data)
926 {
927         if (id_priv->id.ps == RDMA_PS_SDP &&
928             sdp_get_majv(((struct sdp_hah *) data)->sdp_version) !=
929             SDP_MAJ_VERSION)
930                 return (EINVAL);
931
932         return 0;
933 }
934
935 static void cma_set_rep_event_data(struct rdma_cm_event *event,
936                                    struct ib_cm_rep_event_param *rep_data,
937                                    void *private_data)
938 {
939         event->param.conn.private_data = private_data;
940         event->param.conn.private_data_len = IB_CM_REP_PRIVATE_DATA_SIZE;
941         event->param.conn.responder_resources = rep_data->responder_resources;
942         event->param.conn.initiator_depth = rep_data->initiator_depth;
943         event->param.conn.flow_control = rep_data->flow_control;
944         event->param.conn.rnr_retry_count = rep_data->rnr_retry_count;
945         event->param.conn.srq = rep_data->srq;
946         event->param.conn.qp_num = rep_data->remote_qpn;
947 }
948
949 static int cma_ib_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
950 {
951         struct rdma_id_private *id_priv = cm_id->context;
952         struct rdma_cm_event event;
953         int ret = 0;
954
955         if (cma_disable_remove(id_priv, CMA_CONNECT))
956                 return 0;
957
958         memset(&event, 0, sizeof event);
959         switch (ib_event->event) {
960         case IB_CM_REQ_ERROR:
961         case IB_CM_REP_ERROR:
962                 event.event = RDMA_CM_EVENT_UNREACHABLE;
963                 event.status = ETIMEDOUT;
964                 break;
965         case IB_CM_REP_RECEIVED:
966                 event.status = cma_verify_rep(id_priv, ib_event->private_data);
967                 if (event.status)
968                         event.event = RDMA_CM_EVENT_CONNECT_ERROR;
969                 else if (id_priv->id.qp && id_priv->id.ps != RDMA_PS_SDP) {
970                         event.status = cma_rep_recv(id_priv);
971                         event.event = event.status ? RDMA_CM_EVENT_CONNECT_ERROR :
972                                                      RDMA_CM_EVENT_ESTABLISHED;
973                 } else
974                         event.event = RDMA_CM_EVENT_CONNECT_RESPONSE;
975                 cma_set_rep_event_data(&event, &ib_event->param.rep_rcvd,
976                                        ib_event->private_data);
977                 break;
978         case IB_CM_RTU_RECEIVED:
979         case IB_CM_USER_ESTABLISHED:
980                 event.event = RDMA_CM_EVENT_ESTABLISHED;
981                 break;
982         case IB_CM_DREQ_ERROR:
983                 event.status = ETIMEDOUT; /* fall through */
984         case IB_CM_DREQ_RECEIVED:
985         case IB_CM_DREP_RECEIVED:
986                 if (!cma_comp_exch(id_priv, CMA_CONNECT, CMA_DISCONNECT))
987                         goto out;
988                 event.event = RDMA_CM_EVENT_DISCONNECTED;
989                 break;
990         case IB_CM_TIMEWAIT_EXIT:
991         case IB_CM_MRA_RECEIVED:
992                 /* ignore event */
993                 goto out;
994         case IB_CM_REJ_RECEIVED:
995                 cma_modify_qp_err(&id_priv->id);
996                 event.status = ib_event->param.rej_rcvd.reason;
997                 event.event = RDMA_CM_EVENT_REJECTED;
998                 event.param.conn.private_data = ib_event->private_data;
999                 event.param.conn.private_data_len = IB_CM_REJ_PRIVATE_DATA_SIZE;
1000                 break;
1001         default:
1002                 log(LOG_ERR, "RDMA CMA: unexpected IB CM event: %d",
1003                        ib_event->event);
1004                 goto out;
1005         }
1006
1007         ret = id_priv->id.event_handler(&id_priv->id, &event);
1008         if (ret) {
1009                 /* Destroy the CM ID by returning a non-zero value. */
1010                 id_priv->cm_id.ib = NULL;
1011                 cma_exch(id_priv, CMA_DESTROYING);
1012                 cma_enable_remove(id_priv);
1013                 rdma_destroy_id(&id_priv->id);
1014                 return ret;
1015         }
1016 out:
1017         cma_enable_remove(id_priv);
1018         return ret;
1019 }
1020
1021 static struct rdma_id_private *cma_new_conn_id(struct rdma_cm_id *listen_id,
1022                                                struct ib_cm_event *ib_event)
1023 {
1024         struct rdma_id_private *id_priv;
1025         struct rdma_cm_id *id;
1026         struct rdma_route *rt;
1027         union cma_ip_addr *src, *dst;
1028         __u16 port;
1029         u8 ip_ver;
1030
1031         if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1032                              &ip_ver, &port, &src, &dst))
1033                 goto err;
1034
1035         id = rdma_create_id(listen_id->event_handler, listen_id->context,
1036                             listen_id->ps);
1037         if (IS_ERR(id))
1038                 goto err;
1039
1040         cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1041                           ip_ver, port, src, dst);
1042
1043         rt = &id->route;
1044         rt->num_paths = ib_event->param.req_rcvd.alternate_path ? 2 : 1;
1045         rt->path_rec = malloc(sizeof *rt->path_rec * rt->num_paths,
1046                                M_DEVBUF, M_NOWAIT);
1047         if (!rt->path_rec)
1048                 goto destroy_id;
1049
1050         rt->path_rec[0] = *ib_event->param.req_rcvd.primary_path;
1051         if (rt->num_paths == 2)
1052                 rt->path_rec[1] = *ib_event->param.req_rcvd.alternate_path;
1053
1054         ib_addr_set_sgid(&rt->addr.dev_addr, &rt->path_rec[0].sgid);
1055         ib_addr_set_dgid(&rt->addr.dev_addr, &rt->path_rec[0].dgid);
1056         ib_addr_set_pkey(&rt->addr.dev_addr, be16_to_cpu(rt->path_rec[0].pkey));
1057         rt->addr.dev_addr.dev_type = RDMA_NODE_IB_CA;
1058
1059         id_priv = container_of(id, struct rdma_id_private, id);
1060         id_priv->state = CMA_CONNECT;
1061         return id_priv;
1062
1063 destroy_id:
1064         rdma_destroy_id(id);
1065 err:
1066         return NULL;
1067 }
1068
1069 static struct rdma_id_private *cma_new_udp_id(struct rdma_cm_id *listen_id,
1070                                               struct ib_cm_event *ib_event)
1071 {
1072         struct rdma_id_private *id_priv;
1073         struct rdma_cm_id *id;
1074         union cma_ip_addr *src, *dst;
1075         __u16 port;
1076         u8 ip_ver;
1077         int ret;
1078
1079         id = rdma_create_id(listen_id->event_handler, listen_id->context,
1080                             listen_id->ps);
1081         if (IS_ERR(id))
1082                 return NULL;
1083
1084
1085         if (cma_get_net_info(ib_event->private_data, listen_id->ps,
1086                              &ip_ver, &port, &src, &dst))
1087                 goto err;
1088
1089         cma_save_net_info(&id->route.addr, &listen_id->route.addr,
1090                           ip_ver, port, src, dst);
1091
1092         ret = rdma_translate_ip(&id->route.addr.src_addr,
1093                                 &id->route.addr.dev_addr);
1094         if (ret)
1095                 goto err;
1096
1097         id_priv = container_of(id, struct rdma_id_private, id);
1098         id_priv->state = CMA_CONNECT;
1099         return id_priv;
1100 err:
1101         rdma_destroy_id(id);
1102         return NULL;
1103 }
1104
1105 static void cma_set_req_event_data(struct rdma_cm_event *event,
1106                                    struct ib_cm_req_event_param *req_data,
1107                                    void *private_data, int offset)
1108 {
1109         event->param.conn.private_data = private_data + offset;
1110         event->param.conn.private_data_len = IB_CM_REQ_PRIVATE_DATA_SIZE - offset;
1111         event->param.conn.responder_resources = req_data->responder_resources;
1112         event->param.conn.initiator_depth = req_data->initiator_depth;
1113         event->param.conn.flow_control = req_data->flow_control;
1114         event->param.conn.retry_count = req_data->retry_count;
1115         event->param.conn.rnr_retry_count = req_data->rnr_retry_count;
1116         event->param.conn.srq = req_data->srq;
1117         event->param.conn.qp_num = req_data->remote_qpn;
1118 }
1119
1120 static int cma_req_handler(struct ib_cm_id *cm_id, struct ib_cm_event *ib_event)
1121 {
1122         struct rdma_id_private *listen_id, *conn_id;
1123         struct rdma_cm_event event;
1124         int offset, ret;
1125
1126         listen_id = cm_id->context;
1127         if (cma_disable_remove(listen_id, CMA_LISTEN))
1128                 return (ECONNABORTED);
1129
1130         memset(&event, 0, sizeof event);
1131         offset = cma_user_data_offset(listen_id->id.ps);
1132         event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1133         if (cma_is_ud_ps(listen_id->id.ps)) {
1134                 conn_id = cma_new_udp_id(&listen_id->id, ib_event);
1135                 event.param.ud.private_data = ib_event->private_data + offset;
1136                 event.param.ud.private_data_len =
1137                                 IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE - offset;
1138         } else {
1139                 conn_id = cma_new_conn_id(&listen_id->id, ib_event);
1140                 cma_set_req_event_data(&event, &ib_event->param.req_rcvd,
1141                                        ib_event->private_data, offset);
1142         }
1143         if (!conn_id) {
1144                 ret = ENOMEM;
1145                 goto out;
1146         }
1147
1148         mtx_lock(&conn_id->lock);
1149         conn_id->dev_remove++;
1150         mtx_unlock(&conn_id->lock);
1151         mtx_lock(&lock);
1152         ret = cma_acquire_dev(conn_id);
1153         mtx_unlock(&lock);
1154         if (ret)
1155                 goto release_conn_id;
1156
1157         conn_id->cm_id.ib = cm_id;
1158         cm_id->context = conn_id;
1159         cm_id->cm_handler = cma_ib_handler;
1160
1161         ret = conn_id->id.event_handler(&conn_id->id, &event);
1162         if (!ret)
1163                 goto out;
1164
1165         /* Destroy the CM ID by returning a non-zero value. */
1166         conn_id->cm_id.ib = NULL;
1167
1168 release_conn_id:
1169         cma_exch(conn_id, CMA_DESTROYING);
1170         cma_enable_remove(conn_id);
1171         rdma_destroy_id(&conn_id->id);
1172
1173 out:
1174         cma_enable_remove(listen_id);
1175         return ret;
1176 }
1177
1178 static __be64 cma_get_service_id(enum rdma_port_space ps, struct sockaddr *addr)
1179 {
1180         return cpu_to_be64(((u64)ps << 16) + be16_to_cpu(cma_port(addr)));
1181 }
1182
1183 static void cma_set_compare_data(enum rdma_port_space ps, struct sockaddr *addr,
1184                                  struct ib_cm_compare_data *compare)
1185 {
1186         struct cma_hdr *cma_data, *cma_mask;
1187         struct sdp_hh *sdp_data, *sdp_mask;
1188         __u32 ip4_addr;
1189         struct in6_addr ip6_addr;
1190
1191         memset(compare, 0, sizeof *compare);
1192         cma_data = (void *) compare->data;
1193         cma_mask = (void *) compare->mask;
1194         sdp_data = (void *) compare->data;
1195         sdp_mask = (void *) compare->mask;
1196
1197         switch (addr->sa_family) {
1198         case AF_INET:
1199                 ip4_addr = ((struct sockaddr_in *) addr)->sin_addr.s_addr;
1200                 if (ps == RDMA_PS_SDP) {
1201                         sdp_set_ip_ver(sdp_data, 4);
1202                         sdp_set_ip_ver(sdp_mask, 0xF);
1203                         sdp_data->dst_addr.ip4.addr = ip4_addr;
1204                         sdp_mask->dst_addr.ip4.addr = ~0;
1205                 } else {
1206                         cma_set_ip_ver(cma_data, 4);
1207                         cma_set_ip_ver(cma_mask, 0xF);
1208                         cma_data->dst_addr.ip4.addr = ip4_addr;
1209                         cma_mask->dst_addr.ip4.addr = ~0;
1210                 }
1211                 break;
1212         case AF_INET6:
1213                 ip6_addr = ((struct sockaddr_in6 *) addr)->sin6_addr;
1214                 if (ps == RDMA_PS_SDP) {
1215                         sdp_set_ip_ver(sdp_data, 6);
1216                         sdp_set_ip_ver(sdp_mask, 0xF);
1217                         sdp_data->dst_addr.ip6 = ip6_addr;
1218                         memset(&sdp_mask->dst_addr.ip6, 0xFF,
1219                                sizeof sdp_mask->dst_addr.ip6);
1220                 } else {
1221                         cma_set_ip_ver(cma_data, 6);
1222                         cma_set_ip_ver(cma_mask, 0xF);
1223                         cma_data->dst_addr.ip6 = ip6_addr;
1224                         memset(&cma_mask->dst_addr.ip6, 0xFF,
1225                                sizeof cma_mask->dst_addr.ip6);
1226                 }
1227                 break;
1228         default:
1229                 break;
1230         }
1231 }
1232 #endif /* IB_SUPPORTED */
1233
1234 static int cma_iw_handler(struct iw_cm_id *iw_id, struct iw_cm_event *iw_event)
1235 {
1236         struct rdma_id_private *id_priv = iw_id->context;
1237         struct rdma_cm_event event;
1238         struct sockaddr_in *sin;
1239         int ret = 0;
1240
1241         if (cma_disable_remove(id_priv, CMA_CONNECT))
1242                 return 0;
1243
1244         memset(&event, 0, sizeof event);
1245         switch (iw_event->event) {
1246         case IW_CM_EVENT_CLOSE:
1247                 event.event = RDMA_CM_EVENT_DISCONNECTED;
1248                 break;
1249         case IW_CM_EVENT_CONNECT_REPLY:
1250                 sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1251                 *sin = iw_event->local_addr;
1252                 sin = (struct sockaddr_in *) &id_priv->id.route.addr.dst_addr;
1253                 *sin = iw_event->remote_addr;
1254                 switch (iw_event->status) {
1255                 case 0:
1256                         event.event = RDMA_CM_EVENT_ESTABLISHED;
1257                         break;
1258                 case ECONNRESET:
1259                 case ECONNREFUSED:
1260                         event.event = RDMA_CM_EVENT_REJECTED;
1261                         break;
1262                 case ETIMEDOUT:
1263                         event.event = RDMA_CM_EVENT_UNREACHABLE;
1264                         break;
1265                 default:
1266                         event.event = RDMA_CM_EVENT_CONNECT_ERROR;
1267                         break;
1268                 }
1269                 break;
1270         case IW_CM_EVENT_ESTABLISHED:
1271                 event.event = RDMA_CM_EVENT_ESTABLISHED;
1272                 break;
1273         default:
1274                 panic("unknown event type %d", iw_event->event);
1275                 
1276         }
1277
1278         event.status = iw_event->status;
1279         event.param.conn.private_data = iw_event->private_data;
1280         event.param.conn.private_data_len = iw_event->private_data_len;
1281         ret = id_priv->id.event_handler(&id_priv->id, &event);
1282         if (ret) {
1283                 /* Destroy the CM ID by returning a non-zero value. */
1284                 id_priv->cm_id.iw = NULL;
1285                 cma_exch(id_priv, CMA_DESTROYING);
1286                 cma_enable_remove(id_priv);
1287                 rdma_destroy_id(&id_priv->id);
1288                 return ret;
1289         }
1290
1291         cma_enable_remove(id_priv);
1292         return ret;
1293 }
1294
1295 static int iw_conn_req_handler(struct iw_cm_id *cm_id,
1296                                struct iw_cm_event *iw_event)
1297 {
1298         struct rdma_cm_id *new_cm_id;
1299         struct rdma_id_private *listen_id, *conn_id;
1300         struct sockaddr_in *sin;
1301         struct ifnet *dev;
1302         struct rdma_cm_event event;
1303         int ret;
1304         struct ifaddr *ifa;
1305         uint16_t port;
1306
1307         listen_id = cm_id->context;
1308         if (cma_disable_remove(listen_id, CMA_LISTEN))
1309                 return (ECONNABORTED);
1310
1311         /* Create a new RDMA id for the new IW CM ID */
1312         new_cm_id = rdma_create_id(listen_id->id.event_handler,
1313                                    listen_id->id.context,
1314                                    RDMA_PS_TCP);
1315         if (!new_cm_id) {
1316                 ret = ENOMEM;
1317                 goto out;
1318         }
1319         conn_id = container_of(new_cm_id, struct rdma_id_private, id);
1320         mtx_lock(&conn_id->lock);
1321         ++conn_id->dev_remove;
1322         mtx_unlock(&conn_id->lock);
1323         conn_id->state = CMA_CONNECT;
1324
1325         port = iw_event->local_addr.sin_port;
1326         iw_event->local_addr.sin_port = 0;
1327         ifa = ifa_ifwithaddr((struct sockaddr *)&iw_event->local_addr);
1328         iw_event->local_addr.sin_port = port;
1329         if (!ifa) {
1330                 ret = EADDRNOTAVAIL;
1331                 cma_enable_remove(conn_id);
1332                 rdma_destroy_id(new_cm_id);
1333                 goto out;
1334         }
1335         dev = ifa->ifa_ifp;
1336         ret = rdma_copy_addr(&conn_id->id.route.addr.dev_addr, dev, NULL);
1337         if (ret) {
1338                 cma_enable_remove(conn_id);
1339                 rdma_destroy_id(new_cm_id);
1340                 goto out;
1341         }
1342
1343         mtx_lock(&lock);
1344         ret = cma_acquire_dev(conn_id);
1345         mtx_unlock(&lock);
1346         if (ret) {
1347                 cma_enable_remove(conn_id);
1348                 rdma_destroy_id(new_cm_id);
1349                 goto out;
1350         }
1351
1352         conn_id->cm_id.iw = cm_id;
1353         cm_id->context = conn_id;
1354         cm_id->cm_handler = cma_iw_handler;
1355
1356         sin = (struct sockaddr_in *) &new_cm_id->route.addr.src_addr;
1357         *sin = iw_event->local_addr;
1358         sin = (struct sockaddr_in *) &new_cm_id->route.addr.dst_addr;
1359         *sin = iw_event->remote_addr;
1360         conn_id->so = cm_id->so;
1361
1362         memset(&event, 0, sizeof event);
1363         event.event = RDMA_CM_EVENT_CONNECT_REQUEST;
1364         event.param.conn.private_data = iw_event->private_data;
1365         event.param.conn.private_data_len = iw_event->private_data_len;
1366         ret = conn_id->id.event_handler(&conn_id->id, &event);
1367         if (ret) {
1368                 /* User wants to destroy the CM ID */
1369                 conn_id->cm_id.iw = NULL;
1370                 cma_exch(conn_id, CMA_DESTROYING);
1371                 cma_enable_remove(conn_id);
1372                 rdma_destroy_id(&conn_id->id);
1373         }
1374
1375 out:
1376         cma_enable_remove(listen_id);
1377         return ret;
1378 }
1379
1380 #ifdef IB_SUPPORTED
1381 static int cma_ib_listen(struct rdma_id_private *id_priv)
1382 {
1383         struct ib_cm_compare_data compare_data;
1384         struct sockaddr *addr;
1385         __be64 svc_id;
1386         int ret;
1387
1388         id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_req_handler,
1389                                             id_priv);
1390         if (IS_ERR(id_priv->cm_id.ib))
1391                 return PTR_ERR(id_priv->cm_id.ib);
1392
1393         addr = &id_priv->id.route.addr.src_addr;
1394         svc_id = cma_get_service_id(id_priv->id.ps, addr);
1395         if (cma_any_addr(addr))
1396                 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, NULL);
1397         else {
1398                 cma_set_compare_data(id_priv->id.ps, addr, &compare_data);
1399                 ret = ib_cm_listen(id_priv->cm_id.ib, svc_id, 0, &compare_data);
1400         }
1401
1402         if (ret) {
1403                 ib_destroy_cm_id(id_priv->cm_id.ib);
1404                 id_priv->cm_id.ib = NULL;
1405         }
1406
1407         return ret;
1408 }
1409 #endif
1410
1411 static int cma_iw_listen(struct rdma_id_private *id_priv, int backlog)
1412 {
1413         int ret;
1414         struct sockaddr_in *sin;
1415
1416         id_priv->cm_id.iw = iw_create_cm_id(id_priv->id.device, id_priv->so,
1417                                             iw_conn_req_handler, id_priv);
1418         if (IS_ERR(id_priv->cm_id.iw))
1419                 return PTR_ERR(id_priv->cm_id.iw);
1420
1421         sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1422         id_priv->cm_id.iw->local_addr = *sin;
1423
1424         ret = iw_cm_listen(id_priv->cm_id.iw, backlog);
1425
1426         if (ret) {
1427                 iw_destroy_cm_id(id_priv->cm_id.iw);
1428                 id_priv->cm_id.iw = NULL;
1429         }
1430
1431         return ret;
1432 }
1433
1434 static int cma_listen_handler(struct rdma_cm_id *id,
1435                               struct rdma_cm_event *event)
1436 {
1437         struct rdma_id_private *id_priv = id->context;
1438
1439         id->context = id_priv->id.context;
1440         id->event_handler = id_priv->id.event_handler;
1441         return id_priv->id.event_handler(id, event);
1442 }
1443
1444 static void cma_listen_on_dev(struct rdma_id_private *id_priv,
1445                               struct cma_device *cma_dev)
1446 {
1447         struct rdma_id_private *dev_id_priv;
1448         struct rdma_cm_id *id;
1449         int ret;
1450
1451         id = rdma_create_id(cma_listen_handler, id_priv, id_priv->id.ps);
1452         if (IS_ERR(id))
1453                 return;
1454
1455         dev_id_priv = container_of(id, struct rdma_id_private, id);
1456
1457         dev_id_priv->state = CMA_ADDR_BOUND;
1458         memcpy(&id->route.addr.src_addr, &id_priv->id.route.addr.src_addr,
1459                ip_addr_size(&id_priv->id.route.addr.src_addr));
1460         dev_id_priv->so = id_priv->so; /* XXX */
1461
1462         cma_attach_to_dev(dev_id_priv, cma_dev);
1463         LIST_INSERT_HEAD(&id_priv->listen_list, dev_id_priv, listen_entry);
1464
1465         ret = rdma_listen(id, id_priv->backlog);
1466         if (ret)
1467                 goto err;
1468
1469         return;
1470 err:
1471         cma_destroy_listen(dev_id_priv);
1472 }
1473
1474 static void cma_listen_on_all(struct rdma_id_private *id_priv)
1475 {
1476         struct cma_device *cma_dev;
1477
1478         mtx_lock(&lock);
1479         LIST_INSERT_HEAD(&listen_any_list, id_priv, list);
1480         TAILQ_FOREACH(cma_dev, &dev_list, list)
1481                 cma_listen_on_dev(id_priv, cma_dev);
1482         mtx_unlock(&lock);
1483 }
1484
1485 static int cma_bind_any(struct rdma_cm_id *id, sa_family_t af)
1486 {
1487         struct sockaddr_in addr_in;
1488
1489         memset(&addr_in, 0, sizeof addr_in);
1490         addr_in.sin_family = af;
1491         addr_in.sin_len = sizeof addr_in;
1492         return rdma_bind_addr(id, (struct sockaddr *) &addr_in);
1493 }
1494
1495 int rdma_listen(struct rdma_cm_id *id, int backlog)
1496 {
1497         struct rdma_id_private *id_priv;
1498         int ret;
1499
1500         id_priv = container_of(id, struct rdma_id_private, id);
1501         if (id_priv->state == CMA_IDLE) {
1502                 ret = cma_bind_any(id, AF_INET);
1503                 if (ret)
1504                         return ret;
1505         }
1506
1507         if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_LISTEN))
1508                 return (EINVAL);
1509
1510         id_priv->backlog = backlog;
1511         if (id->device) {
1512 #ifdef IB_SUPPORTED
1513                 switch (rdma_node_get_transport(id->device->node_type)) {
1514                 case RDMA_TRANSPORT_IB:
1515                         ret = cma_ib_listen(id_priv);
1516                         if (ret)
1517                                 goto err;
1518                         break;
1519                 case RDMA_TRANSPORT_IWARP:
1520 #endif
1521                         ret = cma_iw_listen(id_priv, backlog);
1522                         if (ret)
1523                                 goto err;
1524 #ifdef IB_SUPPORTED
1525                         break;
1526                 default:
1527                         ret = ENOSYS;
1528                         goto err;
1529                 }
1530 #endif
1531         } else
1532                 cma_listen_on_all(id_priv);
1533
1534         return 0;
1535 err:
1536         id_priv->backlog = 0;
1537         cma_comp_exch(id_priv, CMA_LISTEN, CMA_ADDR_BOUND);
1538         return ret;
1539 }
1540
1541 #ifdef IB_SUPPORTED
1542 static void cma_query_handler(int status, struct ib_sa_path_rec *path_rec,
1543                               void *context)
1544 {
1545         struct cma_work *work = context;
1546         struct rdma_route *route;
1547
1548         route = &work->id->id.route;
1549
1550         if (!status) {
1551                 route->num_paths = 1;
1552                 *route->path_rec = *path_rec;
1553         } else {
1554                 work->old_state = CMA_ROUTE_QUERY;
1555                 work->new_state = CMA_ADDR_RESOLVED;
1556                 work->event.event = RDMA_CM_EVENT_ROUTE_ERROR;
1557                 work->event.status = status;
1558         }
1559
1560         taskqueue_enqueue(cma_wq, &work->task);
1561 }
1562
1563 static int cma_query_ib_route(struct rdma_id_private *id_priv, int timeout_ms,
1564                               struct cma_work *work)
1565 {
1566         struct rdma_dev_addr *addr = &id_priv->id.route.addr.dev_addr;
1567         struct ib_sa_path_rec path_rec;
1568
1569         memset(&path_rec, 0, sizeof path_rec);
1570         ib_addr_get_sgid(addr, &path_rec.sgid);
1571         ib_addr_get_dgid(addr, &path_rec.dgid);
1572         path_rec.pkey = cpu_to_be16(ib_addr_get_pkey(addr));
1573         path_rec.numb_path = 1;
1574         path_rec.reversible = 1;
1575
1576         id_priv->query_id = ib_sa_path_rec_get(&sa_client, id_priv->id.device,
1577                                 id_priv->id.port_num, &path_rec,
1578                                 IB_SA_PATH_REC_DGID | IB_SA_PATH_REC_SGID |
1579                                 IB_SA_PATH_REC_PKEY | IB_SA_PATH_REC_NUMB_PATH |
1580                                 IB_SA_PATH_REC_REVERSIBLE,
1581                                 timeout_ms, M_NOWAIT,
1582                                 cma_query_handler, work, &id_priv->query);
1583
1584         return (id_priv->query_id < 0) ? id_priv->query_id : 0;
1585 }
1586 #endif
1587
1588 static void cma_work_handler(void *context, int pending)
1589 {
1590         struct cma_work *work = context;
1591         struct rdma_id_private *id_priv = work->id;
1592         int destroy = 0;
1593
1594         mtx_lock(&id_priv->lock);
1595         ++id_priv->dev_remove;
1596         mtx_unlock(&id_priv->lock);
1597         if (!cma_comp_exch(id_priv, work->old_state, work->new_state))
1598                 goto out;
1599
1600         if (id_priv->id.event_handler(&id_priv->id, &work->event)) {
1601                 cma_exch(id_priv, CMA_DESTROYING);
1602                 destroy = 1;
1603         }
1604 out:
1605         cma_enable_remove(id_priv);
1606         cma_deref_id(id_priv);
1607         if (destroy)
1608                 rdma_destroy_id(&id_priv->id);
1609         free(work, M_DEVBUF);
1610 }
1611
1612 #ifdef IB_SUPPORTED
1613 static int cma_resolve_ib_route(struct rdma_id_private *id_priv, int timeout_ms)
1614 {
1615         struct rdma_route *route = &id_priv->id.route;
1616         struct cma_work *work;
1617         int ret;
1618
1619         work = malloc(sizeof *work, M_DEVBUF, M_NOWAIT);
1620         if (!work)
1621                 return (ENOMEM);
1622         bzero(work, sizeof *work);
1623
1624         work->id = id_priv;
1625         TASK_INIT(&work->task, 0, cma_work_handler, work);
1626         work->old_state = CMA_ROUTE_QUERY;
1627         work->new_state = CMA_ROUTE_RESOLVED;
1628         work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1629
1630         route->path_rec = malloc(sizeof *route->path_rec, M_DEVBUF, M_NOWAIT);
1631         if (!route->path_rec) {
1632                 ret = ENOMEM;
1633                 goto err1;
1634         }
1635
1636         ret = cma_query_ib_route(id_priv, timeout_ms, work);
1637         if (ret)
1638                 goto err2;
1639
1640         return 0;
1641 err2:
1642         free(route->path_rec, M_DEVBUF);
1643         route->path_rec = NULL;
1644 err1:
1645         free(work, M_DEVBUF);
1646         return ret;
1647 }
1648
1649 int rdma_set_ib_paths(struct rdma_cm_id *id,
1650                       struct ib_sa_path_rec *path_rec, int num_paths)
1651 {
1652         struct rdma_id_private *id_priv;
1653         int ret;
1654
1655         id_priv = container_of(id, struct rdma_id_private, id);
1656         if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_RESOLVED))
1657                 return (EINVAL);
1658
1659         id->route.path_rec = malloc(sizeof *path_rec * num_paths, M_DEVBUF, M_NOWAIT);
1660         if (!id->route.path_rec) {
1661                 ret = ENOMEM;
1662                 goto err;
1663         }
1664
1665         memcpy(id->route.path_rec, path_rec, sizeof *path_rec * num_paths);
1666         return 0;
1667 err:
1668         cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_ADDR_RESOLVED);
1669         return ret;
1670 }
1671 #endif
1672
1673 static int cma_resolve_iw_route(struct rdma_id_private *id_priv, int timeout_ms)
1674 {
1675         struct cma_work *work;
1676
1677         work = malloc(sizeof *work, M_DEVBUF, M_NOWAIT);
1678         if (!work)
1679                 return (ENOMEM);
1680         bzero(work, sizeof *work);
1681
1682         work->id = id_priv;
1683         TASK_INIT(&work->task, 0, cma_work_handler, work);
1684         work->old_state = CMA_ROUTE_QUERY;
1685         work->new_state = CMA_ROUTE_RESOLVED;
1686         work->event.event = RDMA_CM_EVENT_ROUTE_RESOLVED;
1687         taskqueue_enqueue(cma_wq, &work->task);
1688         return 0;
1689 }
1690
1691 int rdma_resolve_route(struct rdma_cm_id *id, int timeout_ms)
1692 {
1693         struct rdma_id_private *id_priv;
1694         int ret;
1695
1696         id_priv = container_of(id, struct rdma_id_private, id);
1697         if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ROUTE_QUERY))
1698                 return (EINVAL);
1699
1700         mtx_lock(&id_priv->lock);
1701         id_priv->refcount++;
1702         mtx_unlock(&id_priv->lock);
1703 #ifdef IB_SUPPORTED
1704         switch (rdma_node_get_transport(id->device->node_type)) {
1705         case RDMA_TRANSPORT_IB:
1706                 ret = cma_resolve_ib_route(id_priv, timeout_ms);
1707                 break;
1708         case RDMA_TRANSPORT_IWARP:
1709 #endif
1710                 ret = cma_resolve_iw_route(id_priv, timeout_ms);
1711 #ifdef IB_SUPPORTED
1712                 break;
1713         default:
1714                 ret = ENOSYS;
1715                 break;
1716         }
1717 #endif
1718         if (ret)
1719                 goto err;
1720
1721         return 0;
1722 err:
1723         cma_comp_exch(id_priv, CMA_ROUTE_QUERY, CMA_ADDR_RESOLVED);
1724         cma_deref_id(id_priv);
1725         return ret;
1726 }
1727
1728 static int cma_bind_loopback(struct rdma_id_private *id_priv)
1729 {
1730         struct cma_device *cma_dev;
1731         struct ib_port_attr port_attr;
1732         union ib_gid gid;
1733         u16 pkey;
1734         int ret;
1735         u8 p;
1736
1737         mtx_lock(&lock);
1738         if (TAILQ_EMPTY(&dev_list)) {
1739                 ret = ENODEV;
1740                 goto out;
1741         }
1742         TAILQ_FOREACH(cma_dev, &dev_list, list)
1743                 for (p = 1; p <= cma_dev->device->phys_port_cnt; ++p)
1744                         if (!ib_query_port(cma_dev->device, p, &port_attr) &&
1745                             port_attr.state == IB_PORT_ACTIVE)
1746                                 goto port_found;
1747
1748         p = 1;
1749         cma_dev = TAILQ_FIRST(&dev_list);
1750
1751 port_found:
1752         ret = ib_get_cached_gid(cma_dev->device, p, 0, &gid);
1753         if (ret)
1754                 goto out;
1755
1756         ret = ib_get_cached_pkey(cma_dev->device, p, 0, &pkey);
1757         if (ret)
1758                 goto out;
1759
1760         ib_addr_set_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1761         ib_addr_set_pkey(&id_priv->id.route.addr.dev_addr, pkey);
1762         id_priv->id.port_num = p;
1763         cma_attach_to_dev(id_priv, cma_dev);
1764 out:
1765         mtx_unlock(&lock);
1766         return ret;
1767 }
1768
1769 static void addr_handler(int status, struct sockaddr *src_addr,
1770                          struct rdma_dev_addr *dev_addr, void *context)
1771 {
1772         struct rdma_id_private *id_priv = context;
1773         struct rdma_cm_event event;
1774
1775         memset(&event, 0, sizeof event);
1776         mtx_lock(&id_priv->lock);
1777         ++id_priv->dev_remove;
1778         mtx_unlock(&id_priv->lock);
1779
1780         /*
1781          * Grab mutex to block rdma_destroy_id() from removing the device while
1782          * we're trying to acquire it.
1783          */
1784         mtx_lock(&lock);
1785         if (!cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_RESOLVED)) {
1786                 mtx_unlock(&lock);
1787                 goto out;
1788         }
1789
1790         if (!status && !id_priv->cma_dev)
1791                 status = cma_acquire_dev(id_priv);
1792         mtx_unlock(&lock);
1793
1794         if (status) {
1795                 if (!cma_comp_exch(id_priv, CMA_ADDR_RESOLVED, CMA_ADDR_BOUND))
1796                         goto out;
1797                 event.event = RDMA_CM_EVENT_ADDR_ERROR;
1798                 event.status = status;
1799         } else {
1800                 memcpy(&id_priv->id.route.addr.src_addr, src_addr,
1801                        ip_addr_size(src_addr));
1802                 event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1803         }
1804
1805         if (id_priv->id.event_handler(&id_priv->id, &event)) {
1806                 cma_exch(id_priv, CMA_DESTROYING);
1807                 cma_enable_remove(id_priv);
1808                 cma_deref_id(id_priv);
1809                 rdma_destroy_id(&id_priv->id);
1810                 return;
1811         }
1812 out:
1813         cma_enable_remove(id_priv);
1814         cma_deref_id(id_priv);
1815 }
1816
1817 static int cma_resolve_loopback(struct rdma_id_private *id_priv)
1818 {
1819         struct cma_work *work;
1820         struct sockaddr_in *src_in, *dst_in;
1821         union ib_gid gid;
1822         int ret;
1823
1824         work = malloc(sizeof *work, M_DEVBUF, M_NOWAIT);
1825         if (!work)
1826                 return (ENOMEM);
1827         bzero(work, sizeof *work);
1828
1829         if (!id_priv->cma_dev) {
1830                 ret = cma_bind_loopback(id_priv);
1831                 if (ret)
1832                         goto err;
1833         }
1834
1835         ib_addr_get_sgid(&id_priv->id.route.addr.dev_addr, &gid);
1836         ib_addr_set_dgid(&id_priv->id.route.addr.dev_addr, &gid);
1837
1838         if (cma_zero_addr(&id_priv->id.route.addr.src_addr)) {
1839                 src_in = (struct sockaddr_in *)&id_priv->id.route.addr.src_addr;
1840                 dst_in = (struct sockaddr_in *)&id_priv->id.route.addr.dst_addr;
1841                 src_in->sin_family = dst_in->sin_family;
1842                 src_in->sin_addr.s_addr = dst_in->sin_addr.s_addr;
1843         }
1844
1845         work->id = id_priv;
1846         TASK_INIT(&work->task, 0, cma_work_handler, work);
1847         work->old_state = CMA_ADDR_QUERY;
1848         work->new_state = CMA_ADDR_RESOLVED;
1849         work->event.event = RDMA_CM_EVENT_ADDR_RESOLVED;
1850         taskqueue_enqueue(cma_wq, &work->task);
1851         return 0;
1852 err:
1853         free(work, M_DEVBUF);
1854         return ret;
1855 }
1856
1857 static int cma_bind_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1858                          struct sockaddr *dst_addr)
1859 {
1860         if (src_addr && src_addr->sa_family)
1861                 return rdma_bind_addr(id, src_addr);
1862         else
1863                 return cma_bind_any(id, dst_addr->sa_family);
1864 }
1865
1866 int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
1867                       struct sockaddr *dst_addr, int timeout_ms)
1868 {
1869         struct rdma_id_private *id_priv;
1870         int ret;
1871
1872         id_priv = container_of(id, struct rdma_id_private, id);
1873         if (id_priv->state == CMA_IDLE) {
1874                 ret = cma_bind_addr(id, src_addr, dst_addr);
1875                 if (ret)
1876                         return ret;
1877         }
1878
1879         if (!cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_ADDR_QUERY))
1880                 return (EINVAL);
1881
1882         mtx_lock(&id_priv->lock);
1883         id_priv->refcount++;
1884         mtx_unlock(&id_priv->lock);
1885         memcpy(&id->route.addr.dst_addr, dst_addr, ip_addr_size(dst_addr));
1886         if (cma_any_addr(dst_addr))
1887                 ret = cma_resolve_loopback(id_priv);
1888         else
1889                 ret = rdma_resolve_ip(&addr_client, &id->route.addr.src_addr,
1890                                       dst_addr, &id->route.addr.dev_addr,
1891                                       timeout_ms, addr_handler, id_priv);
1892         if (ret)
1893                 goto err;
1894
1895         return 0;
1896 err:
1897         cma_comp_exch(id_priv, CMA_ADDR_QUERY, CMA_ADDR_BOUND);
1898         cma_deref_id(id_priv);
1899         return ret;
1900 }
1901
1902 static void cma_bind_port(struct rdma_bind_list *bind_list,
1903                           struct rdma_id_private *id_priv)
1904 {
1905         struct sockaddr_in *sin;
1906
1907         sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1908         sin->sin_port = htons(bind_list->port);
1909         id_priv->bind_list = bind_list;
1910         TAILQ_INSERT_HEAD(&bind_list->owners, id_priv, node);
1911 }
1912
1913 static int cma_alloc_port(struct kvl *ps, struct rdma_id_private *id_priv,
1914                           unsigned short snum)
1915 {
1916         struct rdma_bind_list *bind_list;
1917         int port, ret;
1918
1919         bind_list = malloc(sizeof *bind_list, M_DEVBUF, M_NOWAIT);
1920         if (!bind_list)
1921                 return (ENOMEM);
1922         bzero(bind_list, sizeof *bind_list);
1923
1924         do {
1925                 ret = kvl_alloc_above(ps, bind_list, snum, &port);
1926         } while (ret == EAGAIN);
1927
1928         if (ret)
1929                 goto err1;
1930
1931         if (port != snum) {
1932                 ret = EADDRNOTAVAIL;
1933                 goto err2;
1934         }
1935
1936         bind_list->ps = ps;
1937         bind_list->port = (unsigned short) port;
1938         cma_bind_port(bind_list, id_priv);
1939         return 0;
1940 err2:
1941         kvl_delete(ps, port);
1942 err1:
1943         free(bind_list, M_DEVBUF);
1944         return ret;
1945 }
1946
1947 static int cma_alloc_any_port(struct kvl *ps, struct rdma_id_private *id_priv)
1948 {
1949         struct rdma_bind_list *bind_list;
1950         int port, ret;
1951
1952         bind_list = malloc(sizeof *bind_list, M_DEVBUF, M_NOWAIT);
1953         if (!bind_list)
1954                 return (ENOMEM);
1955         bzero(bind_list, sizeof *bind_list);
1956
1957 retry:
1958         do {
1959                 ret = kvl_alloc_above(ps, bind_list, next_port, &port);
1960         } while (ret == EAGAIN);
1961
1962         if (ret)
1963                 goto err1;
1964
1965         if (port > ipport_lastauto) {
1966                 if (next_port != ipport_firstauto) {
1967                         kvl_delete(ps, port);
1968                         next_port = ipport_firstauto;
1969                         goto retry;
1970                 }
1971                 ret = EADDRNOTAVAIL;
1972                 goto err2;
1973         }
1974
1975         if (port == ipport_lastauto)
1976                 next_port = ipport_firstauto;
1977         else
1978                 next_port = port + 1;
1979
1980         bind_list->ps = ps;
1981         bind_list->port = (unsigned short) port;
1982         cma_bind_port(bind_list, id_priv);
1983         return 0;
1984 err2:
1985         kvl_delete(ps, port);
1986 err1:
1987         free(bind_list, M_DEVBUF);
1988         return ret;
1989 }
1990
1991 static int cma_use_port(struct kvl *ps, struct rdma_id_private *id_priv)
1992 {
1993         struct rdma_id_private *cur_id;
1994         struct sockaddr_in *sin, *cur_sin;
1995         struct rdma_bind_list *bind_list;
1996         unsigned short snum;
1997
1998         sin = (struct sockaddr_in *) &id_priv->id.route.addr.src_addr;
1999         snum = ntohs(sin->sin_port);
2000         if (snum <= ipport_reservedhigh && snum >= ipport_reservedlow &&
2001             priv_check(curthread, PRIV_NETINET_RESERVEDPORT))
2002                 return (EACCES);
2003
2004         bind_list = kvl_lookup(ps, snum);
2005         if (!bind_list)
2006                 return cma_alloc_port(ps, id_priv, snum);
2007
2008         /*
2009          * We don't support binding to any address if anyone is bound to
2010          * a specific address on the same port.
2011          */
2012         if (cma_any_addr(&id_priv->id.route.addr.src_addr))
2013                 return (EADDRNOTAVAIL);
2014
2015         TAILQ_FOREACH(cur_id, &bind_list->owners, node) {
2016                 if (cma_any_addr(&cur_id->id.route.addr.src_addr))
2017                         return (EADDRNOTAVAIL);
2018
2019                 cur_sin = (struct sockaddr_in *)&cur_id->id.route.addr.src_addr;
2020                 if (sin->sin_addr.s_addr == cur_sin->sin_addr.s_addr)
2021                         return (EADDRINUSE);
2022         }
2023
2024         cma_bind_port(bind_list, id_priv);
2025         return 0;
2026 }
2027
2028 static int cma_get_tcp_port(struct rdma_id_private *id_priv)
2029 {
2030         int ret;
2031         struct socket *so;
2032
2033         ret = socreate(AF_INET, &so, SOCK_STREAM, IPPROTO_TCP, 
2034                                 curthread->td_ucred, curthread);
2035         if (ret) {
2036                 printf("%s socreate err %d\n", __FUNCTION__, ret);
2037                 return ret;
2038         }
2039
2040         ret = sobind(so, (struct sockaddr *)&id_priv->id.route.addr.src_addr,
2041                         curthread);
2042         if (ret) {
2043                 soclose(so);
2044                 return ret;
2045         }
2046         id_priv->so = so;
2047         return 0;       
2048 }
2049
2050 static int cma_get_port(struct rdma_id_private *id_priv)
2051 {
2052         struct kvl *ps;
2053         int ret;
2054
2055         switch (id_priv->id.ps) {
2056         case RDMA_PS_SDP:
2057                 ps = &sdp_ps;
2058                 break;
2059         case RDMA_PS_TCP:
2060                 ps = &tcp_ps;
2061                 ret = cma_get_tcp_port(id_priv); /* Synch with native stack */
2062                 if (ret)
2063                         return ret;
2064                 break;
2065         case RDMA_PS_UDP:
2066                 ps = &udp_ps;
2067                 break;
2068         case RDMA_PS_IPOIB:
2069                 ps = &ipoib_ps;
2070                 break;
2071         default:
2072                 return (EPROTONOSUPPORT);
2073         }
2074
2075         mtx_lock(&lock);
2076         if (cma_any_port(&id_priv->id.route.addr.src_addr))
2077                 ret = cma_alloc_any_port(ps, id_priv);
2078         else
2079                 ret = cma_use_port(ps, id_priv);
2080         mtx_unlock(&lock);
2081
2082         return ret;
2083 }
2084
2085 int rdma_bind_addr(struct rdma_cm_id *id, struct sockaddr *addr)
2086 {
2087         struct rdma_id_private *id_priv;
2088         int ret;
2089
2090         if (addr->sa_family != AF_INET)
2091                 return (EAFNOSUPPORT);
2092
2093         id_priv = container_of(id, struct rdma_id_private, id);
2094         if (!cma_comp_exch(id_priv, CMA_IDLE, CMA_ADDR_BOUND))
2095                 return (EINVAL);
2096
2097         if (!cma_any_addr(addr)) {
2098                 ret = rdma_translate_ip(addr, &id->route.addr.dev_addr);
2099                 if (ret)
2100                         goto err1;
2101
2102                 mtx_lock(&lock);
2103                 ret = cma_acquire_dev(id_priv);
2104                 mtx_unlock(&lock);
2105                 if (ret)
2106                         goto err1;
2107         }
2108
2109         memcpy(&id->route.addr.src_addr, addr, ip_addr_size(addr));
2110         ret = cma_get_port(id_priv);
2111         if (ret)
2112                 goto err2;
2113
2114         return 0;
2115 err2:
2116         if (!cma_any_addr(addr)) {
2117                 mtx_lock(&lock);
2118                 cma_detach_from_dev(id_priv);
2119                 mtx_unlock(&lock);
2120         }
2121 err1:
2122         cma_comp_exch(id_priv, CMA_ADDR_BOUND, CMA_IDLE);
2123         return ret;
2124 }
2125
2126 #ifdef IB_SUPPORTED
2127 static int cma_format_hdr(void *hdr, enum rdma_port_space ps,
2128                           struct rdma_route *route)
2129 {
2130         struct sockaddr_in *src4, *dst4;
2131         struct cma_hdr *cma_hdr;
2132         struct sdp_hh *sdp_hdr;
2133
2134         src4 = (struct sockaddr_in *) &route->addr.src_addr;
2135         dst4 = (struct sockaddr_in *) &route->addr.dst_addr;
2136
2137         switch (ps) {
2138         case RDMA_PS_SDP:
2139                 sdp_hdr = hdr;
2140                 if (sdp_get_majv(sdp_hdr->sdp_version) != SDP_MAJ_VERSION)
2141                         return (EINVAL);
2142                 sdp_set_ip_ver(sdp_hdr, 4);
2143                 sdp_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2144                 sdp_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2145                 sdp_hdr->port = src4->sin_port;
2146                 break;
2147         default:
2148                 cma_hdr = hdr;
2149                 cma_hdr->cma_version = CMA_VERSION;
2150                 cma_set_ip_ver(cma_hdr, 4);
2151                 cma_hdr->src_addr.ip4.addr = src4->sin_addr.s_addr;
2152                 cma_hdr->dst_addr.ip4.addr = dst4->sin_addr.s_addr;
2153                 cma_hdr->port = src4->sin_port;
2154                 break;
2155         }
2156         return 0;
2157 }
2158
2159 static int cma_sidr_rep_handler(struct ib_cm_id *cm_id,
2160                                 struct ib_cm_event *ib_event)
2161 {
2162         struct rdma_id_private *id_priv = cm_id->context;
2163         struct rdma_cm_event event;
2164         struct ib_cm_sidr_rep_event_param *rep = &ib_event->param.sidr_rep_rcvd;
2165         int ret = 0;
2166
2167         if (cma_disable_remove(id_priv, CMA_CONNECT))
2168                 return 0;
2169
2170         memset(&event, 0, sizeof event);
2171         switch (ib_event->event) {
2172         case IB_CM_SIDR_REQ_ERROR:
2173                 event.event = RDMA_CM_EVENT_UNREACHABLE;
2174                 event.status = ETIMEDOUT;
2175                 break;
2176         case IB_CM_SIDR_REP_RECEIVED:
2177                 event.param.ud.private_data = ib_event->private_data;
2178                 event.param.ud.private_data_len = IB_CM_SIDR_REP_PRIVATE_DATA_SIZE;
2179                 if (rep->status != IB_SIDR_SUCCESS) {
2180                         event.event = RDMA_CM_EVENT_UNREACHABLE;
2181                         event.status = ib_event->param.sidr_rep_rcvd.status;
2182                         break;
2183                 }
2184                 if (id_priv->qkey != rep->qkey) {
2185                         event.event = RDMA_CM_EVENT_UNREACHABLE;
2186                         event.status = EINVAL;
2187                         break;
2188                 }
2189                 ib_init_ah_from_path(id_priv->id.device, id_priv->id.port_num,
2190                                      id_priv->id.route.path_rec,
2191                                      &event.param.ud.ah_attr);
2192                 event.param.ud.qp_num = rep->qpn;
2193                 event.param.ud.qkey = rep->qkey;
2194                 event.event = RDMA_CM_EVENT_ESTABLISHED;
2195                 event.status = 0;
2196                 break;
2197         default:
2198                 log(LOG_ERR, "RDMA CMA: unexpected IB CM event: %d",
2199                        ib_event->event);
2200                 goto out;
2201         }
2202
2203         ret = id_priv->id.event_handler(&id_priv->id, &event);
2204         if (ret) {
2205                 /* Destroy the CM ID by returning a non-zero value. */
2206                 id_priv->cm_id.ib = NULL;
2207                 cma_exch(id_priv, CMA_DESTROYING);
2208                 cma_enable_remove(id_priv);
2209                 rdma_destroy_id(&id_priv->id);
2210                 return ret;
2211         }
2212 out:
2213         cma_enable_remove(id_priv);
2214         return ret;
2215 }
2216
2217 static int cma_resolve_ib_udp(struct rdma_id_private *id_priv,
2218                               struct rdma_conn_param *conn_param)
2219 {
2220         struct ib_cm_sidr_req_param req;
2221         struct rdma_route *route;
2222         int ret;
2223
2224         req.private_data_len = sizeof(struct cma_hdr) +
2225                                conn_param->private_data_len;
2226         req.private_data = malloc(req.private_data_len, M_DEVBUF, M_NOWAIT);
2227         if (!req.private_data)
2228                 return (ENOMEM);
2229         bzero((void *)req.private_data, req.private_data_len);
2230
2231         if (conn_param->private_data && conn_param->private_data_len)
2232                 memcpy((caddr_t) req.private_data + sizeof(struct cma_hdr),
2233                        conn_param->private_data, conn_param->private_data_len);
2234
2235         route = &id_priv->id.route;
2236         ret = cma_format_hdr((void *) req.private_data, id_priv->id.ps, route);
2237         if (ret)
2238                 goto out;
2239
2240         id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device,
2241                                             cma_sidr_rep_handler, id_priv);
2242         if (IS_ERR(id_priv->cm_id.ib)) {
2243                 ret = PTR_ERR(id_priv->cm_id.ib);
2244                 goto out;
2245         }
2246
2247         req.path = route->path_rec;
2248         req.service_id = cma_get_service_id(id_priv->id.ps,
2249                                             &route->addr.dst_addr);
2250         req.timeout_ms = 1 << (CMA_CM_RESPONSE_TIMEOUT - 8);
2251         req.max_cm_retries = CMA_MAX_CM_RETRIES;
2252
2253         ret = ib_send_cm_sidr_req(id_priv->cm_id.ib, &req);
2254         if (ret) {
2255                 ib_destroy_cm_id(id_priv->cm_id.ib);
2256                 id_priv->cm_id.ib = NULL;
2257         }
2258 out:
2259         free(req.private_data, M_DEVBUF);
2260         return ret;
2261 }
2262
2263 static int cma_connect_ib(struct rdma_id_private *id_priv,
2264                           struct rdma_conn_param *conn_param)
2265 {
2266         struct ib_cm_req_param req;
2267         struct rdma_route *route;
2268         void *private_data;
2269         int offset, ret;
2270
2271         memset(&req, 0, sizeof req);
2272         offset = cma_user_data_offset(id_priv->id.ps);
2273         req.private_data_len = offset + conn_param->private_data_len;
2274         private_data = malloc(req.private_data_len, M_DEVBUF, M_NOWAIT);
2275         if (!private_data)
2276                 return (ENOMEM);
2277         bzero(private_data, req.private_data_len);
2278
2279         if (conn_param->private_data && conn_param->private_data_len)
2280                 memcpy(private_data + offset, conn_param->private_data,
2281                        conn_param->private_data_len);
2282
2283         id_priv->cm_id.ib = ib_create_cm_id(id_priv->id.device, cma_ib_handler,
2284                                             id_priv);
2285         if (IS_ERR(id_priv->cm_id.ib)) {
2286                 ret = PTR_ERR(id_priv->cm_id.ib);
2287                 goto out;
2288         }
2289
2290         route = &id_priv->id.route;
2291         ret = cma_format_hdr(private_data, id_priv->id.ps, route);
2292         if (ret)
2293                 goto out;
2294         req.private_data = private_data;
2295
2296         req.primary_path = &route->path_rec[0];
2297         if (route->num_paths == 2)
2298                 req.alternate_path = &route->path_rec[1];
2299
2300         req.service_id = cma_get_service_id(id_priv->id.ps,
2301                                             &route->addr.dst_addr);
2302         req.qp_num = id_priv->qp_num;
2303         req.qp_type = IB_QPT_RC;
2304         req.starting_psn = id_priv->seq_num;
2305         req.responder_resources = conn_param->responder_resources;
2306         req.initiator_depth = conn_param->initiator_depth;
2307         req.flow_control = conn_param->flow_control;
2308         req.retry_count = conn_param->retry_count;
2309         req.rnr_retry_count = conn_param->rnr_retry_count;
2310         req.remote_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2311         req.local_cm_response_timeout = CMA_CM_RESPONSE_TIMEOUT;
2312         req.max_cm_retries = CMA_MAX_CM_RETRIES;
2313         req.srq = id_priv->srq ? 1 : 0;
2314
2315         ret = ib_send_cm_req(id_priv->cm_id.ib, &req);
2316 out:
2317         if (ret && !IS_ERR(id_priv->cm_id.ib)) {
2318                 ib_destroy_cm_id(id_priv->cm_id.ib);
2319                 id_priv->cm_id.ib = NULL;
2320         }
2321
2322         free(private_data, M_DEVBUF);
2323         return ret;
2324 }
2325 #endif
2326
2327 static int cma_connect_iw(struct rdma_id_private *id_priv,
2328                           struct rdma_conn_param *conn_param)
2329 {
2330         struct iw_cm_id *cm_id;
2331         struct sockaddr_in* sin;
2332         int ret;
2333         struct iw_cm_conn_param iw_param;
2334
2335         cm_id = iw_create_cm_id(id_priv->id.device, id_priv->so,
2336                                  cma_iw_handler, id_priv);
2337         if (IS_ERR(cm_id)) {
2338                 ret = PTR_ERR(cm_id);
2339                 goto out;
2340         }
2341
2342         id_priv->cm_id.iw = cm_id;
2343
2344         sin = (struct sockaddr_in*) &id_priv->id.route.addr.src_addr;
2345         cm_id->local_addr = *sin;
2346
2347         sin = (struct sockaddr_in*) &id_priv->id.route.addr.dst_addr;
2348         cm_id->remote_addr = *sin;
2349
2350         ret = cma_modify_qp_rtr(&id_priv->id);
2351         if (ret)
2352                 goto out;
2353
2354         iw_param.ord = conn_param->initiator_depth;
2355         iw_param.ird = conn_param->responder_resources;
2356         iw_param.private_data = conn_param->private_data;
2357         iw_param.private_data_len = conn_param->private_data_len;
2358         if (id_priv->id.qp)
2359                 iw_param.qpn = id_priv->qp_num;
2360         else
2361                 iw_param.qpn = conn_param->qp_num;
2362         ret = iw_cm_connect(cm_id, &iw_param);
2363 out:
2364         if (ret && !IS_ERR(cm_id)) {
2365                 iw_destroy_cm_id(cm_id);
2366                 id_priv->cm_id.iw = NULL;
2367         }
2368         return ret;
2369 }
2370
2371 int rdma_connect(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2372 {
2373         struct rdma_id_private *id_priv;
2374         int ret;
2375
2376         id_priv = container_of(id, struct rdma_id_private, id);
2377         if (!cma_comp_exch(id_priv, CMA_ROUTE_RESOLVED, CMA_CONNECT))
2378                 return (EINVAL);
2379
2380         if (!id->qp) {
2381                 id_priv->qp_num = conn_param->qp_num;
2382                 id_priv->srq = conn_param->srq;
2383         }
2384
2385 #ifdef IB_SUPPORTED
2386         switch (rdma_node_get_transport(id->device->node_type)) {
2387         case RDMA_TRANSPORT_IB:
2388                 if (cma_is_ud_ps(id->ps))
2389                         ret = cma_resolve_ib_udp(id_priv, conn_param);
2390                 else
2391                         ret = cma_connect_ib(id_priv, conn_param);
2392                 break;
2393         case RDMA_TRANSPORT_IWARP:
2394 #endif
2395                 ret = cma_connect_iw(id_priv, conn_param);
2396 #ifdef IB_SUPPORTED
2397                 break;
2398         default:
2399                 ret = ENOSYS;
2400                 break;
2401         }
2402 #endif
2403         if (ret)
2404                 goto err;
2405
2406         return 0;
2407 err:
2408         cma_comp_exch(id_priv, CMA_CONNECT, CMA_ROUTE_RESOLVED);
2409         return ret;
2410 }
2411
2412 #ifdef IB_SUPPORTED
2413 static int cma_accept_ib(struct rdma_id_private *id_priv,
2414                          struct rdma_conn_param *conn_param)
2415 {
2416         struct ib_cm_rep_param rep;
2417         struct ib_qp_attr qp_attr;
2418         int qp_attr_mask, ret;
2419
2420         if (id_priv->id.qp) {
2421                 ret = cma_modify_qp_rtr(&id_priv->id);
2422                 if (ret)
2423                         goto out;
2424
2425                 qp_attr.qp_state = IB_QPS_RTS;
2426                 ret = ib_cm_init_qp_attr(id_priv->cm_id.ib, &qp_attr,
2427                                          &qp_attr_mask);
2428                 if (ret)
2429                         goto out;
2430
2431                 qp_attr.max_rd_atomic = conn_param->initiator_depth;
2432                 ret = ib_modify_qp(id_priv->id.qp, &qp_attr, qp_attr_mask);
2433                 if (ret)
2434                         goto out;
2435         }
2436
2437         memset(&rep, 0, sizeof rep);
2438         rep.qp_num = id_priv->qp_num;
2439         rep.starting_psn = id_priv->seq_num;
2440         rep.private_data = conn_param->private_data;
2441         rep.private_data_len = conn_param->private_data_len;
2442         rep.responder_resources = conn_param->responder_resources;
2443         rep.initiator_depth = conn_param->initiator_depth;
2444         rep.target_ack_delay = CMA_CM_RESPONSE_TIMEOUT;
2445         rep.failover_accepted = 0;
2446         rep.flow_control = conn_param->flow_control;
2447         rep.rnr_retry_count = conn_param->rnr_retry_count;
2448         rep.srq = id_priv->srq ? 1 : 0;
2449
2450         ret = ib_send_cm_rep(id_priv->cm_id.ib, &rep);
2451 out:
2452         return ret;
2453 }
2454 #endif
2455
2456 static int cma_accept_iw(struct rdma_id_private *id_priv,
2457                   struct rdma_conn_param *conn_param)
2458 {
2459         struct iw_cm_conn_param iw_param;
2460         int ret;
2461
2462         ret = cma_modify_qp_rtr(&id_priv->id);
2463         if (ret)
2464                 return ret;
2465
2466         iw_param.ord = conn_param->initiator_depth;
2467         iw_param.ird = conn_param->responder_resources;
2468         iw_param.private_data = conn_param->private_data;
2469         iw_param.private_data_len = conn_param->private_data_len;
2470         if (id_priv->id.qp) {
2471                 iw_param.qpn = id_priv->qp_num;
2472         } else
2473                 iw_param.qpn = conn_param->qp_num;
2474
2475         return iw_cm_accept(id_priv->cm_id.iw, &iw_param);
2476 }
2477
2478 #ifdef IB_SUPPORTED
2479 static int cma_send_sidr_rep(struct rdma_id_private *id_priv,
2480                              enum ib_cm_sidr_status status,
2481                              const void *private_data, int private_data_len)
2482 {
2483         struct ib_cm_sidr_rep_param rep;
2484
2485         memset(&rep, 0, sizeof rep);
2486         rep.status = status;
2487         if (status == IB_SIDR_SUCCESS) {
2488                 rep.qp_num = id_priv->qp_num;
2489                 rep.qkey = id_priv->qkey;
2490         }
2491         rep.private_data = private_data;
2492         rep.private_data_len = private_data_len;
2493
2494         return ib_send_cm_sidr_rep(id_priv->cm_id.ib, &rep);
2495 }
2496 #endif
2497
2498 int rdma_accept(struct rdma_cm_id *id, struct rdma_conn_param *conn_param)
2499 {
2500         struct rdma_id_private *id_priv;
2501         int ret;
2502
2503         id_priv = container_of(id, struct rdma_id_private, id);
2504         if (!cma_comp(id_priv, CMA_CONNECT))
2505                 return (EINVAL);
2506
2507         if (!id->qp && conn_param) {
2508                 id_priv->qp_num = conn_param->qp_num;
2509                 id_priv->srq = conn_param->srq;
2510         }
2511
2512 #ifdef IB_SUPPORTED
2513         switch (rdma_node_get_transport(id->device->node_type)) {
2514         case RDMA_TRANSPORT_IB:
2515                 if (cma_is_ud_ps(id->ps))
2516                         ret = cma_send_sidr_rep(id_priv, IB_SIDR_SUCCESS,
2517                                                 conn_param->private_data,
2518                                                 conn_param->private_data_len);
2519                 else if (conn_param)
2520                         ret = cma_accept_ib(id_priv, conn_param);
2521                 else
2522                         ret = cma_rep_recv(id_priv);
2523                 break;
2524         case RDMA_TRANSPORT_IWARP:
2525 #endif
2526                 ret = cma_accept_iw(id_priv, conn_param);
2527 #ifdef IB_SUPPORTED
2528                 break;
2529         default:
2530                 ret = ENOSYS;
2531                 break;
2532         }
2533 #endif
2534
2535         if (ret)
2536                 goto reject;
2537
2538         return 0;
2539 reject:
2540         cma_modify_qp_err(id);
2541         rdma_reject(id, NULL, 0);
2542         return ret;
2543 }
2544
2545 int rdma_notify(struct rdma_cm_id *id, enum ib_event_type event)
2546 {
2547         struct rdma_id_private *id_priv;
2548         int ret;
2549
2550         id_priv = container_of(id, struct rdma_id_private, id);
2551         if (!cma_has_cm_dev(id_priv))
2552                 return (EINVAL);
2553 #ifdef IB_SUPPORTED
2554         switch (id->device->node_type) {
2555         case RDMA_NODE_IB_CA:
2556                 ret = ib_cm_notify(id_priv->cm_id.ib, event);
2557                 break;
2558         default:
2559 #endif
2560                 ret = 0;
2561 #ifdef IB_SUPPORTED
2562                 break;
2563         }
2564 #endif
2565         return ret;
2566 }
2567
2568 int rdma_reject(struct rdma_cm_id *id, const void *private_data,
2569                 u8 private_data_len)
2570 {
2571         struct rdma_id_private *id_priv;
2572         int ret;
2573
2574         id_priv = container_of(id, struct rdma_id_private, id);
2575         if (!cma_has_cm_dev(id_priv))
2576                 return (EINVAL);
2577
2578 #ifdef IB_SUPPORTED
2579         switch (rdma_node_get_transport(id->device->node_type)) {
2580         case RDMA_TRANSPORT_IB:
2581                 if (cma_is_ud_ps(id->ps))
2582                         ret = cma_send_sidr_rep(id_priv, IB_SIDR_REJECT,
2583                                                 private_data, private_data_len);
2584                 else
2585                         ret = ib_send_cm_rej(id_priv->cm_id.ib,
2586                                              IB_CM_REJ_CONSUMER_DEFINED, NULL,
2587                                              0, private_data, private_data_len);
2588                 break;
2589         case RDMA_TRANSPORT_IWARP:
2590 #endif
2591                 ret = iw_cm_reject(id_priv->cm_id.iw,
2592                                    private_data, private_data_len);
2593 #ifdef IB_SUPPORTED
2594                 break;
2595         default:
2596                 ret = ENOSYS;
2597                 break;
2598         }
2599 #endif
2600         return ret;
2601 }
2602
2603 int rdma_disconnect(struct rdma_cm_id *id)
2604 {
2605         struct rdma_id_private *id_priv;
2606         int ret;
2607
2608         id_priv = container_of(id, struct rdma_id_private, id);
2609         if (!cma_has_cm_dev(id_priv))
2610                 return (EINVAL);
2611
2612 #ifdef IB_SUPPORTED
2613         switch (rdma_node_get_transport(id->device->node_type)) {
2614         case RDMA_TRANSPORT_IB:
2615                 ret = cma_modify_qp_err(id);
2616                 if (ret)
2617                         goto out;
2618                 /* Initiate or respond to a disconnect. */
2619                 if (ib_send_cm_dreq(id_priv->cm_id.ib, NULL, 0))
2620                         ib_send_cm_drep(id_priv->cm_id.ib, NULL, 0);
2621                 break;
2622         case RDMA_TRANSPORT_IWARP:
2623 #endif
2624                 ret = iw_cm_disconnect(id_priv->cm_id.iw, 0);
2625 #ifdef IB_SUPPORTED
2626                 break;
2627         default:
2628                 ret = EINVAL;
2629                 break;
2630         }
2631 out:
2632 #endif
2633         return ret;
2634 }
2635
2636 #ifdef IB_SUPPORTED
2637 static int cma_ib_mc_handler(int status, struct ib_sa_multicast *multicast)
2638 {
2639         struct rdma_id_private *id_priv;
2640         struct cma_multicast *mc = multicast->context;
2641         struct rdma_cm_event event;
2642         int ret;
2643
2644         id_priv = mc->id_priv;
2645         if (cma_disable_remove(id_priv, CMA_ADDR_BOUND) &&
2646             cma_disable_remove(id_priv, CMA_ADDR_RESOLVED))
2647                 return 0;
2648
2649         if (!status && id_priv->id.qp)
2650                 status = ib_attach_mcast(id_priv->id.qp, &multicast->rec.mgid,
2651                                          multicast->rec.mlid);
2652
2653         memset(&event, 0, sizeof event);
2654         event.status = status;
2655         event.param.ud.private_data = mc->context;
2656         if (!status) {
2657                 event.event = RDMA_CM_EVENT_MULTICAST_JOIN;
2658                 ib_init_ah_from_mcmember(id_priv->id.device,
2659                                          id_priv->id.port_num, &multicast->rec,
2660                                          &event.param.ud.ah_attr);
2661                 event.param.ud.qp_num = 0xFFFFFF;
2662                 event.param.ud.qkey = be32_to_cpu(multicast->rec.qkey);
2663         } else
2664                 event.event = RDMA_CM_EVENT_MULTICAST_ERROR;
2665
2666         ret = id_priv->id.event_handler(&id_priv->id, &event);
2667         if (ret) {
2668                 cma_exch(id_priv, CMA_DESTROYING);
2669                 cma_enable_remove(id_priv);
2670                 rdma_destroy_id(&id_priv->id);
2671                 return 0;
2672         }
2673
2674         cma_enable_remove(id_priv);
2675         return 0;
2676 }
2677
2678 static void cma_set_mgid(struct rdma_id_private *id_priv,
2679                          struct sockaddr *addr, union ib_gid *mgid)
2680 {
2681         unsigned char mc_map[MAX_ADDR_LEN];
2682         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2683         struct sockaddr_in *sin = (struct sockaddr_in *) addr;
2684         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) addr;
2685
2686         if (cma_any_addr(addr)) {
2687                 memset(mgid, 0, sizeof *mgid);
2688         } else if ((addr->sa_family == AF_INET6) &&
2689                    ((be32_to_cpu(sin6->sin6_addr.s6_addr32[0]) & 0xFF10A01B) ==
2690                                                                  0xFF10A01B)) {
2691                 /* IPv6 address is an SA assigned MGID. */
2692                 memcpy(mgid, &sin6->sin6_addr, sizeof *mgid);
2693         } else {
2694                 ip_ib_mc_map(sin->sin_addr.s_addr, mc_map);
2695                 if (id_priv->id.ps == RDMA_PS_UDP)
2696                         mc_map[7] = 0x01;       /* Use RDMA CM signature */
2697                 mc_map[8] = ib_addr_get_pkey(dev_addr) >> 8;
2698                 mc_map[9] = (unsigned char) ib_addr_get_pkey(dev_addr);
2699                 *mgid = *(union ib_gid *) (mc_map + 4);
2700         }
2701 }
2702
2703 static int cma_join_ib_multicast(struct rdma_id_private *id_priv,
2704                                  struct cma_multicast *mc)
2705 {
2706         struct ib_sa_mcmember_rec rec;
2707         struct rdma_dev_addr *dev_addr = &id_priv->id.route.addr.dev_addr;
2708         ib_sa_comp_mask comp_mask;
2709         int ret;
2710
2711         ib_addr_get_mgid(dev_addr, &rec.mgid);
2712         ret = ib_sa_get_mcmember_rec(id_priv->id.device, id_priv->id.port_num,
2713                                      &rec.mgid, &rec);
2714         if (ret)
2715                 return ret;
2716
2717         cma_set_mgid(id_priv, &mc->addr, &rec.mgid);
2718         if (id_priv->id.ps == RDMA_PS_UDP)
2719                 rec.qkey = cpu_to_be32(RDMA_UDP_QKEY);
2720         ib_addr_get_sgid(dev_addr, &rec.port_gid);
2721         rec.pkey = cpu_to_be16(ib_addr_get_pkey(dev_addr));
2722         rec.join_state = 1;
2723
2724         comp_mask = IB_SA_MCMEMBER_REC_MGID | IB_SA_MCMEMBER_REC_PORT_GID |
2725                     IB_SA_MCMEMBER_REC_PKEY | IB_SA_MCMEMBER_REC_JOIN_STATE |
2726                     IB_SA_MCMEMBER_REC_QKEY | IB_SA_MCMEMBER_REC_SL |
2727                     IB_SA_MCMEMBER_REC_FLOW_LABEL |
2728                     IB_SA_MCMEMBER_REC_TRAFFIC_CLASS;
2729
2730         mc->multicast.ib = ib_sa_join_multicast(&sa_client, id_priv->id.device,
2731                                                 id_priv->id.port_num, &rec,
2732                                                 comp_mask, M_NOWAIT,
2733                                                 cma_ib_mc_handler, mc);
2734         if (IS_ERR(mc->multicast.ib))
2735                 return PTR_ERR(mc->multicast.ib);
2736
2737         return 0;
2738 }
2739
2740 int rdma_join_multicast(struct rdma_cm_id *id, struct sockaddr *addr,
2741                         void *context)
2742 {
2743         struct rdma_id_private *id_priv;
2744         struct cma_multicast *mc;
2745         int ret;
2746
2747         id_priv = container_of(id, struct rdma_id_private, id);
2748         if (!cma_comp(id_priv, CMA_ADDR_BOUND) &&
2749             !cma_comp(id_priv, CMA_ADDR_RESOLVED))
2750                 return (EINVAL);
2751
2752         mc = malloc(sizeof *mc, M_DEVBUF, M_NOWAIT);
2753         if (!mc)
2754                 return (ENOMEM);
2755
2756         memcpy(&mc->addr, addr, ip_addr_size(addr));
2757         mc->context = context;
2758         mc->id_priv = id_priv;
2759
2760         mtx_lock(&id_priv->lock);
2761         LIST_INSERT_HEAD(&id_priv->mc_list, mc, list);
2762         mtx_unlock(&id_priv->lock);
2763
2764         switch (rdma_node_get_transport(id->device->node_type)) {
2765         case RDMA_TRANSPORT_IB:
2766                 ret = cma_join_ib_multicast(id_priv, mc);
2767                 break;
2768         default:
2769                 ret = ENOSYS;
2770                 break;
2771         }
2772
2773         if (ret) {
2774                 mtx_lock(&id_priv->lock);
2775                 list_del(&mc->list);
2776                 mtx_unlock(&id_priv->lock);
2777                 free(mc, M_DEVBUF);
2778         }
2779         return ret;
2780 }
2781
2782 void rdma_leave_multicast(struct rdma_cm_id *id, struct sockaddr *addr)
2783 {
2784         struct rdma_id_private *id_priv;
2785         struct cma_multicast *mc;
2786
2787         id_priv = container_of(id, struct rdma_id_private, id);
2788         mtx_lock(&id_priv->lock);
2789         LIST_FOREACH(mc, &id_priv->mc_list, list) {
2790                 if (!memcmp(&mc->addr, addr, ip_addr_size(addr))) {
2791                         list_del(&mc->list);
2792                         mtx_unlock(&id_priv->lock);
2793
2794                         if (id->qp)
2795                                 ib_detach_mcast(id->qp,
2796                                                 &mc->multicast.ib->rec.mgid,
2797                                                 mc->multicast.ib->rec.mlid);
2798                         ib_sa_free_multicast(mc->multicast.ib, M_DEVBUF);
2799                         free(mc, M_DEVBUF);
2800                         return;
2801                 }
2802         }
2803         mtx_unlock(&id_priv->lock);
2804 }
2805 #endif
2806
2807 static void cma_add_one(struct ib_device *device)
2808 {
2809         struct cma_device *cma_dev;
2810         struct rdma_id_private *id_priv;
2811
2812         cma_dev = malloc(sizeof *cma_dev, M_DEVBUF, M_NOWAIT|M_ZERO);
2813         if (!cma_dev)
2814                 return;
2815
2816         cma_dev->device = device;
2817
2818         cv_init(&cma_dev->comp, "cma_device");
2819         mtx_init(&cma_dev->lock, "cma_device", NULL, MTX_DUPOK|MTX_DEF);
2820         cma_dev->refcount = 1;
2821         LIST_INIT(&cma_dev->id_list);
2822         ib_set_client_data(device, &cma_client, cma_dev);
2823
2824         mtx_lock(&lock);
2825         TAILQ_INSERT_TAIL(&dev_list, cma_dev, list);
2826         LIST_FOREACH(id_priv, &listen_any_list, list)
2827                 cma_listen_on_dev(id_priv, cma_dev);
2828         mtx_unlock(&lock);
2829 }
2830
2831 static int cma_remove_id_dev(struct rdma_id_private *id_priv)
2832 {
2833         struct rdma_cm_event event;
2834         enum cma_state state;
2835
2836         /* Record that we want to remove the device */
2837         state = cma_exch(id_priv, CMA_DEVICE_REMOVAL);
2838         if (state == CMA_DESTROYING)
2839                 return 0;
2840
2841         cma_cancel_operation(id_priv, state);
2842         mtx_lock(&id_priv->lock);
2843         PANIC_IF(id_priv->dev_remove < 0);
2844         if (id_priv->dev_remove)
2845                 cv_wait(&id_priv->wait_remove, &id_priv->lock);
2846         mtx_unlock(&id_priv->lock);
2847
2848         /* Check for destruction from another callback. */
2849         if (!cma_comp(id_priv, CMA_DEVICE_REMOVAL))
2850                 return 0;
2851
2852         memset(&event, 0, sizeof event);
2853         event.event = RDMA_CM_EVENT_DEVICE_REMOVAL;
2854         return id_priv->id.event_handler(&id_priv->id, &event);
2855 }
2856
2857 static void cma_process_remove(struct cma_device *cma_dev)
2858 {
2859         struct rdma_id_private *id_priv;
2860         int ret;
2861
2862         mtx_lock(&lock);
2863         while (!LIST_EMPTY(&cma_dev->id_list)) {
2864                 id_priv = LIST_FIRST(&cma_dev->id_list);
2865
2866                 if (cma_internal_listen(id_priv)) {
2867                         cma_destroy_listen(id_priv);
2868                         continue;
2869                 }
2870
2871                 LIST_REMOVE(id_priv, list);
2872                 mtx_lock(&id_priv->lock);
2873                 id_priv->refcount++;
2874                 mtx_unlock(&id_priv->lock);
2875                 mtx_unlock(&lock);
2876
2877                 ret = cma_remove_id_dev(id_priv);
2878                 cma_deref_id(id_priv);
2879                 if (ret)
2880                         rdma_destroy_id(&id_priv->id);
2881
2882                 mtx_lock(&lock);
2883         }
2884         mtx_unlock(&lock);
2885
2886         cma_deref_dev(cma_dev);
2887         mtx_lock(&cma_dev->lock);
2888         PANIC_IF(cma_dev->refcount < 0);
2889         if (cma_dev->refcount)
2890                 cv_wait(&cma_dev->comp, &cma_dev->lock);
2891         mtx_unlock(&cma_dev->lock);
2892 }
2893
2894 static void cma_remove_one(struct ib_device *device)
2895 {
2896         struct cma_device *cma_dev;
2897
2898         cma_dev = ib_get_client_data(device, &cma_client);
2899         if (!cma_dev)
2900                 return;
2901
2902         mtx_lock(&lock);
2903         TAILQ_REMOVE(&dev_list, cma_dev, list);
2904         mtx_unlock(&lock);
2905
2906         cma_process_remove(cma_dev);
2907         free(cma_dev, M_DEVBUF);
2908 }
2909
2910 static int cma_init(void)
2911 {
2912         int ret;
2913
2914         LIST_INIT(&listen_any_list);
2915         TAILQ_INIT(&dev_list);
2916         mtx_init(&lock, "cma_device list", NULL, MTX_DEF);
2917
2918         arc4rand(&next_port, sizeof next_port, 0);
2919         next_port = ((unsigned int) next_port %
2920                     (ipport_lastauto - ipport_firstauto)) +
2921                     ipport_firstauto;
2922         cma_wq = taskqueue_create("rdma_cm", M_NOWAIT, taskqueue_thread_enqueue,
2923                 &cma_wq);
2924
2925         if (!cma_wq)
2926                 return (ENOMEM);
2927         
2928         taskqueue_start_threads(&cma_wq, 1, PI_NET, "cma_wq thread"); 
2929 #ifdef IB_SUPPORTED
2930         ib_sa_register_client(&sa_client);
2931 #endif
2932         rdma_addr_register_client(&addr_client);
2933
2934         ret = ib_register_client(&cma_client);
2935         if (ret)
2936                 goto err;
2937         return 0;
2938
2939 err:
2940         rdma_addr_unregister_client(&addr_client);
2941 #ifdef IB_SUPPORTED
2942         ib_sa_unregister_client(&sa_client);
2943 #endif
2944         taskqueue_free(cma_wq);
2945         return ret;
2946 }
2947
2948 static void cma_cleanup(void)
2949 {
2950         ib_unregister_client(&cma_client);
2951         rdma_addr_unregister_client(&addr_client);
2952 #ifdef IB_SUPPORTED
2953         ib_sa_unregister_client(&sa_client);
2954 #endif
2955         taskqueue_free(cma_wq);
2956         kvl_free(&sdp_ps);
2957         kvl_free(&tcp_ps);
2958         kvl_free(&udp_ps);
2959         kvl_free(&ipoib_ps);
2960 }
2961
2962 static int 
2963 cma_load(module_t mod, int cmd, void *arg)
2964 {
2965         int err = 0;
2966
2967         switch (cmd) {
2968         case MOD_LOAD:
2969                 printf("Loading rdma_cma.\n");
2970                 cma_init();
2971                 break;
2972         case MOD_QUIESCE:
2973                 break;
2974         case MOD_UNLOAD:
2975                 printf("Unloading rdma_cma.\n");
2976                 cma_cleanup();
2977                 break;
2978         case MOD_SHUTDOWN:
2979                 break;
2980         default:
2981                 err = EOPNOTSUPP;
2982                 break;
2983         }
2984
2985         return (err);
2986 }
2987
2988 static moduledata_t mod_data = {
2989         "rdma_cma",
2990         cma_load,
2991         0
2992 };
2993
2994 MODULE_VERSION(rdma_cma, 1);
2995 MODULE_DEPEND(rdma_cma, rdma_core, 1, 1, 1);
2996 MODULE_DEPEND(rdma_cma, rdma_addr, 1, 1, 1);
2997 MODULE_DEPEND(rdma_cma, rdma_iwcm, 1, 1, 1);
2998 DECLARE_MODULE(rdma_cma, mod_data, SI_SUB_EXEC, SI_ORDER_ANY);