]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/ofed/drivers/infiniband/core/ib_cm.c
Import tzdata 2020c
[FreeBSD/FreeBSD.git] / sys / ofed / drivers / infiniband / core / ib_cm.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2004-2007 Intel Corporation.  All rights reserved.
5  * Copyright (c) 2004 Topspin Corporation.  All rights reserved.
6  * Copyright (c) 2004, 2005 Voltaire Corporation.  All rights reserved.
7  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
8  *
9  * This software is available to you under a choice of one of two
10  * licenses.  You may choose to be licensed under the terms of the GNU
11  * General Public License (GPL) Version 2, available from the file
12  * COPYING in the main directory of this source tree, or the
13  * OpenIB.org BSD license below:
14  *
15  *     Redistribution and use in source and binary forms, with or
16  *     without modification, are permitted provided that the following
17  *     conditions are met:
18  *
19  *      - Redistributions of source code must retain the above
20  *        copyright notice, this list of conditions and the following
21  *        disclaimer.
22  *
23  *      - Redistributions in binary form must reproduce the above
24  *        copyright notice, this list of conditions and the following
25  *        disclaimer in the documentation and/or other materials
26  *        provided with the distribution.
27  *
28  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
29  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
30  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
31  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
32  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
33  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
34  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
35  * SOFTWARE.
36  */
37
38 #include <sys/cdefs.h>
39 __FBSDID("$FreeBSD$");
40
41 #include <linux/completion.h>
42 #include <linux/dma-mapping.h>
43 #include <linux/device.h>
44 #include <linux/module.h>
45 #include <linux/err.h>
46 #include <linux/idr.h>
47 #include <linux/interrupt.h>
48 #include <linux/random.h>
49 #include <linux/rbtree.h>
50 #include <linux/spinlock.h>
51 #include <linux/slab.h>
52 #include <linux/sysfs.h>
53 #include <linux/workqueue.h>
54 #include <linux/kdev_t.h>
55 #include <linux/etherdevice.h>
56
57 #include <asm/atomic-long.h>
58
59 #include <rdma/ib_cache.h>
60 #include <rdma/ib_cm.h>
61 #include "cm_msgs.h"
62
63 MODULE_AUTHOR("Sean Hefty");
64 MODULE_DESCRIPTION("InfiniBand CM");
65 MODULE_LICENSE("Dual BSD/GPL");
66
67 static void cm_add_one(struct ib_device *device);
68 static void cm_remove_one(struct ib_device *device, void *client_data);
69
70 static struct ib_client cm_client = {
71         .name   = "cm",
72         .add    = cm_add_one,
73         .remove = cm_remove_one
74 };
75
76 static struct ib_cm {
77         spinlock_t lock;
78         struct list_head device_list;
79         rwlock_t device_lock;
80         struct rb_root listen_service_table;
81         u64 listen_service_id;
82         /* struct rb_root peer_service_table; todo: fix peer to peer */
83         struct rb_root remote_qp_table;
84         struct rb_root remote_id_table;
85         struct rb_root remote_sidr_table;
86         struct idr local_id_table;
87         __be32 random_id_operand;
88         struct list_head timewait_list;
89         struct workqueue_struct *wq;
90         /* Sync on cm change port state */
91         spinlock_t state_lock;
92 } cm;
93
94 /* Counter indexes ordered by attribute ID */
95 enum {
96         CM_REQ_COUNTER,
97         CM_MRA_COUNTER,
98         CM_REJ_COUNTER,
99         CM_REP_COUNTER,
100         CM_RTU_COUNTER,
101         CM_DREQ_COUNTER,
102         CM_DREP_COUNTER,
103         CM_SIDR_REQ_COUNTER,
104         CM_SIDR_REP_COUNTER,
105         CM_LAP_COUNTER,
106         CM_APR_COUNTER,
107         CM_ATTR_COUNT,
108         CM_ATTR_ID_OFFSET = 0x0010,
109 };
110
111 enum {
112         CM_XMIT,
113         CM_XMIT_RETRIES,
114         CM_RECV,
115         CM_RECV_DUPLICATES,
116         CM_COUNTER_GROUPS
117 };
118
119 static char const counter_group_names[CM_COUNTER_GROUPS]
120                                      [sizeof("cm_rx_duplicates")] = {
121         "cm_tx_msgs", "cm_tx_retries",
122         "cm_rx_msgs", "cm_rx_duplicates"
123 };
124
125 struct cm_counter_group {
126         struct kobject obj;
127         atomic_long_t counter[CM_ATTR_COUNT];
128 };
129
130 struct cm_counter_attribute {
131         struct attribute attr;
132         int index;
133 };
134
135 #define CM_COUNTER_ATTR(_name, _index) \
136 struct cm_counter_attribute cm_##_name##_counter_attr = { \
137         .attr = { .name = __stringify(_name), .mode = 0444 }, \
138         .index = _index \
139 }
140
141 static CM_COUNTER_ATTR(req, CM_REQ_COUNTER);
142 static CM_COUNTER_ATTR(mra, CM_MRA_COUNTER);
143 static CM_COUNTER_ATTR(rej, CM_REJ_COUNTER);
144 static CM_COUNTER_ATTR(rep, CM_REP_COUNTER);
145 static CM_COUNTER_ATTR(rtu, CM_RTU_COUNTER);
146 static CM_COUNTER_ATTR(dreq, CM_DREQ_COUNTER);
147 static CM_COUNTER_ATTR(drep, CM_DREP_COUNTER);
148 static CM_COUNTER_ATTR(sidr_req, CM_SIDR_REQ_COUNTER);
149 static CM_COUNTER_ATTR(sidr_rep, CM_SIDR_REP_COUNTER);
150 static CM_COUNTER_ATTR(lap, CM_LAP_COUNTER);
151 static CM_COUNTER_ATTR(apr, CM_APR_COUNTER);
152
153 static struct attribute *cm_counter_default_attrs[] = {
154         &cm_req_counter_attr.attr,
155         &cm_mra_counter_attr.attr,
156         &cm_rej_counter_attr.attr,
157         &cm_rep_counter_attr.attr,
158         &cm_rtu_counter_attr.attr,
159         &cm_dreq_counter_attr.attr,
160         &cm_drep_counter_attr.attr,
161         &cm_sidr_req_counter_attr.attr,
162         &cm_sidr_rep_counter_attr.attr,
163         &cm_lap_counter_attr.attr,
164         &cm_apr_counter_attr.attr,
165         NULL
166 };
167
168 struct cm_port {
169         struct cm_device *cm_dev;
170         struct ib_mad_agent *mad_agent;
171         struct kobject port_obj;
172         u8 port_num;
173         struct list_head cm_priv_prim_list;
174         struct list_head cm_priv_altr_list;
175         struct cm_counter_group counter_group[CM_COUNTER_GROUPS];
176 };
177
178 struct cm_device {
179         struct list_head list;
180         struct ib_device *ib_device;
181         struct device *device;
182         u8 ack_delay;
183         int going_down;
184         struct cm_port *port[0];
185 };
186
187 struct cm_av {
188         struct cm_port *port;
189         union ib_gid dgid;
190         struct ib_ah_attr ah_attr;
191         u16 pkey_index;
192         u8 timeout;
193 };
194
195 struct cm_work {
196         struct delayed_work work;
197         struct list_head list;
198         struct cm_port *port;
199         struct ib_mad_recv_wc *mad_recv_wc;     /* Received MADs */
200         __be32 local_id;                        /* Established / timewait */
201         __be32 remote_id;
202         struct ib_cm_event cm_event;
203         struct ib_sa_path_rec path[0];
204 };
205
206 struct cm_timewait_info {
207         struct cm_work work;                    /* Must be first. */
208         struct list_head list;
209         struct rb_node remote_qp_node;
210         struct rb_node remote_id_node;
211         __be64 remote_ca_guid;
212         __be32 remote_qpn;
213         u8 inserted_remote_qp;
214         u8 inserted_remote_id;
215 };
216
217 struct cm_id_private {
218         struct ib_cm_id id;
219
220         struct rb_node service_node;
221         struct rb_node sidr_id_node;
222         spinlock_t lock;        /* Do not acquire inside cm.lock */
223         struct completion comp;
224         atomic_t refcount;
225         /* Number of clients sharing this ib_cm_id. Only valid for listeners.
226          * Protected by the cm.lock spinlock. */
227         int listen_sharecount;
228
229         struct ib_mad_send_buf *msg;
230         struct cm_timewait_info *timewait_info;
231         /* todo: use alternate port on send failure */
232         struct cm_av av;
233         struct cm_av alt_av;
234
235         void *private_data;
236         __be64 tid;
237         __be32 local_qpn;
238         __be32 remote_qpn;
239         enum ib_qp_type qp_type;
240         __be32 sq_psn;
241         __be32 rq_psn;
242         int timeout_ms;
243         enum ib_mtu path_mtu;
244         __be16 pkey;
245         u8 private_data_len;
246         u8 max_cm_retries;
247         u8 peer_to_peer;
248         u8 responder_resources;
249         u8 initiator_depth;
250         u8 retry_count;
251         u8 rnr_retry_count;
252         u8 service_timeout;
253         u8 target_ack_delay;
254
255         struct list_head prim_list;
256         struct list_head altr_list;
257         /* Indicates that the send port mad is registered and av is set */
258         int prim_send_port_not_ready;
259         int altr_send_port_not_ready;
260
261         struct list_head work_list;
262         atomic_t work_count;
263 };
264
265 static void cm_work_handler(struct work_struct *work);
266
267 static inline void cm_deref_id(struct cm_id_private *cm_id_priv)
268 {
269         if (atomic_dec_and_test(&cm_id_priv->refcount))
270                 complete(&cm_id_priv->comp);
271 }
272
273 static int cm_alloc_msg(struct cm_id_private *cm_id_priv,
274                         struct ib_mad_send_buf **msg)
275 {
276         struct ib_mad_agent *mad_agent;
277         struct ib_mad_send_buf *m;
278         struct ib_ah *ah;
279         struct cm_av *av;
280         unsigned long flags, flags2;
281         int ret = 0;
282
283         /* don't let the port to be released till the agent is down */
284         spin_lock_irqsave(&cm.state_lock, flags2);
285         spin_lock_irqsave(&cm.lock, flags);
286         if (!cm_id_priv->prim_send_port_not_ready)
287                 av = &cm_id_priv->av;
288         else if (!cm_id_priv->altr_send_port_not_ready &&
289                  (cm_id_priv->alt_av.port))
290                 av = &cm_id_priv->alt_av;
291         else {
292                 pr_info("%s: not valid CM id\n", __func__);
293                 ret = -ENODEV;
294                 spin_unlock_irqrestore(&cm.lock, flags);
295                 goto out;
296         }
297         spin_unlock_irqrestore(&cm.lock, flags);
298         /* Make sure the port haven't released the mad yet */
299         mad_agent = cm_id_priv->av.port->mad_agent;
300         if (!mad_agent) {
301                 pr_info("%s: not a valid MAD agent\n", __func__);
302                 ret = -ENODEV;
303                 goto out;
304         }
305         ah = ib_create_ah(mad_agent->qp->pd, &av->ah_attr);
306         if (IS_ERR(ah)) {
307                 ret = PTR_ERR(ah);
308                 goto out;
309         }
310
311         m = ib_create_send_mad(mad_agent, cm_id_priv->id.remote_cm_qpn,
312                                av->pkey_index,
313                                0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
314                                GFP_ATOMIC,
315                                IB_MGMT_BASE_VERSION);
316         if (IS_ERR(m)) {
317                 ib_destroy_ah(ah);
318                 ret = PTR_ERR(m);
319                 goto out;
320         }
321
322         /* Timeout set by caller if response is expected. */
323         m->ah = ah;
324         m->retries = cm_id_priv->max_cm_retries;
325
326         atomic_inc(&cm_id_priv->refcount);
327         m->context[0] = cm_id_priv;
328         *msg = m;
329
330 out:
331         spin_unlock_irqrestore(&cm.state_lock, flags2);
332         return ret;
333 }
334
335 static struct ib_mad_send_buf *cm_alloc_response_msg_no_ah(struct cm_port *port,
336                                                            struct ib_mad_recv_wc *mad_recv_wc)
337 {
338         return ib_create_send_mad(port->mad_agent, 1, mad_recv_wc->wc->pkey_index,
339                                   0, IB_MGMT_MAD_HDR, IB_MGMT_MAD_DATA,
340                                   GFP_ATOMIC,
341                                   IB_MGMT_BASE_VERSION);
342 }
343
344 static int cm_create_response_msg_ah(struct cm_port *port,
345                                      struct ib_mad_recv_wc *mad_recv_wc,
346                                      struct ib_mad_send_buf *msg)
347 {
348         struct ib_ah *ah;
349
350         ah = ib_create_ah_from_wc(port->mad_agent->qp->pd, mad_recv_wc->wc,
351                                   mad_recv_wc->recv_buf.grh, port->port_num);
352         if (IS_ERR(ah))
353                 return PTR_ERR(ah);
354
355         msg->ah = ah;
356         return 0;
357 }
358
359 static void cm_free_msg(struct ib_mad_send_buf *msg)
360 {
361         if (msg->ah)
362                 ib_destroy_ah(msg->ah);
363         if (msg->context[0])
364                 cm_deref_id(msg->context[0]);
365         ib_free_send_mad(msg);
366 }
367
368 static int cm_alloc_response_msg(struct cm_port *port,
369                                  struct ib_mad_recv_wc *mad_recv_wc,
370                                  struct ib_mad_send_buf **msg)
371 {
372         struct ib_mad_send_buf *m;
373         int ret;
374
375         m = cm_alloc_response_msg_no_ah(port, mad_recv_wc);
376         if (IS_ERR(m))
377                 return PTR_ERR(m);
378
379         ret = cm_create_response_msg_ah(port, mad_recv_wc, m);
380         if (ret) {
381                 cm_free_msg(m);
382                 return ret;
383         }
384
385         *msg = m;
386         return 0;
387 }
388
389 static void * cm_copy_private_data(const void *private_data,
390                                    u8 private_data_len)
391 {
392         void *data;
393
394         if (!private_data || !private_data_len)
395                 return NULL;
396
397         data = kmemdup(private_data, private_data_len, GFP_KERNEL);
398         if (!data)
399                 return ERR_PTR(-ENOMEM);
400
401         return data;
402 }
403
404 static void cm_set_private_data(struct cm_id_private *cm_id_priv,
405                                  void *private_data, u8 private_data_len)
406 {
407         if (cm_id_priv->private_data && cm_id_priv->private_data_len)
408                 kfree(cm_id_priv->private_data);
409
410         cm_id_priv->private_data = private_data;
411         cm_id_priv->private_data_len = private_data_len;
412 }
413
414 static int cm_init_av_for_response(struct cm_port *port, struct ib_wc *wc,
415                                    struct ib_grh *grh, struct cm_av *av)
416 {
417         av->port = port;
418         av->pkey_index = wc->pkey_index;
419         return ib_init_ah_from_wc(port->cm_dev->ib_device, port->port_num, wc,
420                                   grh, &av->ah_attr);
421 }
422
423 static int cm_init_av_by_path(struct ib_sa_path_rec *path, struct cm_av *av,
424                               struct cm_id_private *cm_id_priv)
425 {
426         struct cm_device *cm_dev;
427         struct cm_port *port = NULL;
428         unsigned long flags;
429         int ret;
430         u8 p;
431         struct net_device *ndev = ib_get_ndev_from_path(path);
432
433         read_lock_irqsave(&cm.device_lock, flags);
434         list_for_each_entry(cm_dev, &cm.device_list, list) {
435                 if (!ib_find_cached_gid(cm_dev->ib_device, &path->sgid,
436                                         path->gid_type, ndev, &p, NULL)) {
437                         port = cm_dev->port[p-1];
438                         break;
439                 }
440         }
441         read_unlock_irqrestore(&cm.device_lock, flags);
442
443         if (ndev)
444                 dev_put(ndev);
445
446         if (!port)
447                 return -EINVAL;
448
449         ret = ib_find_cached_pkey(cm_dev->ib_device, port->port_num,
450                                   be16_to_cpu(path->pkey), &av->pkey_index);
451         if (ret)
452                 return ret;
453
454         av->port = port;
455         ret = ib_init_ah_from_path(cm_dev->ib_device, port->port_num,
456                                    path, &av->ah_attr);
457         if (ret)
458                 return ret;
459
460         av->timeout = path->packet_life_time + 1;
461
462         spin_lock_irqsave(&cm.lock, flags);
463         if (&cm_id_priv->av == av)
464                 list_add_tail(&cm_id_priv->prim_list, &port->cm_priv_prim_list);
465         else if (&cm_id_priv->alt_av == av)
466                 list_add_tail(&cm_id_priv->altr_list, &port->cm_priv_altr_list);
467         else
468                 ret = -EINVAL;
469
470         spin_unlock_irqrestore(&cm.lock, flags);
471
472         return ret;
473 }
474
475 static int cm_alloc_id(struct cm_id_private *cm_id_priv)
476 {
477         unsigned long flags;
478         int id;
479
480         idr_preload(GFP_KERNEL);
481         spin_lock_irqsave(&cm.lock, flags);
482
483         id = idr_alloc_cyclic(&cm.local_id_table, cm_id_priv, 0, 0, GFP_NOWAIT);
484
485         spin_unlock_irqrestore(&cm.lock, flags);
486         idr_preload_end();
487
488         cm_id_priv->id.local_id = (__force __be32)id ^ cm.random_id_operand;
489         return id < 0 ? id : 0;
490 }
491
492 static void cm_free_id(__be32 local_id)
493 {
494         spin_lock_irq(&cm.lock);
495         idr_remove(&cm.local_id_table,
496                    (__force int) (local_id ^ cm.random_id_operand));
497         spin_unlock_irq(&cm.lock);
498 }
499
500 static struct cm_id_private * cm_get_id(__be32 local_id, __be32 remote_id)
501 {
502         struct cm_id_private *cm_id_priv;
503
504         cm_id_priv = idr_find(&cm.local_id_table,
505                               (__force int) (local_id ^ cm.random_id_operand));
506         if (cm_id_priv) {
507                 if (cm_id_priv->id.remote_id == remote_id)
508                         atomic_inc(&cm_id_priv->refcount);
509                 else
510                         cm_id_priv = NULL;
511         }
512
513         return cm_id_priv;
514 }
515
516 static struct cm_id_private * cm_acquire_id(__be32 local_id, __be32 remote_id)
517 {
518         struct cm_id_private *cm_id_priv;
519
520         spin_lock_irq(&cm.lock);
521         cm_id_priv = cm_get_id(local_id, remote_id);
522         spin_unlock_irq(&cm.lock);
523
524         return cm_id_priv;
525 }
526
527 /*
528  * Trivial helpers to strip endian annotation and compare; the
529  * endianness doesn't actually matter since we just need a stable
530  * order for the RB tree.
531  */
532 static int be32_lt(__be32 a, __be32 b)
533 {
534         return (__force u32) a < (__force u32) b;
535 }
536
537 static int be32_gt(__be32 a, __be32 b)
538 {
539         return (__force u32) a > (__force u32) b;
540 }
541
542 static int be64_lt(__be64 a, __be64 b)
543 {
544         return (__force u64) a < (__force u64) b;
545 }
546
547 static int be64_gt(__be64 a, __be64 b)
548 {
549         return (__force u64) a > (__force u64) b;
550 }
551
552 static struct cm_id_private * cm_insert_listen(struct cm_id_private *cm_id_priv)
553 {
554         struct rb_node **link = &cm.listen_service_table.rb_node;
555         struct rb_node *parent = NULL;
556         struct cm_id_private *cur_cm_id_priv;
557         __be64 service_id = cm_id_priv->id.service_id;
558         __be64 service_mask = cm_id_priv->id.service_mask;
559
560         while (*link) {
561                 parent = *link;
562                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
563                                           service_node);
564                 if ((cur_cm_id_priv->id.service_mask & service_id) ==
565                     (service_mask & cur_cm_id_priv->id.service_id) &&
566                     (cm_id_priv->id.device == cur_cm_id_priv->id.device))
567                         return cur_cm_id_priv;
568
569                 if (cm_id_priv->id.device < cur_cm_id_priv->id.device)
570                         link = &(*link)->rb_left;
571                 else if (cm_id_priv->id.device > cur_cm_id_priv->id.device)
572                         link = &(*link)->rb_right;
573                 else if (be64_lt(service_id, cur_cm_id_priv->id.service_id))
574                         link = &(*link)->rb_left;
575                 else if (be64_gt(service_id, cur_cm_id_priv->id.service_id))
576                         link = &(*link)->rb_right;
577                 else
578                         link = &(*link)->rb_right;
579         }
580         rb_link_node(&cm_id_priv->service_node, parent, link);
581         rb_insert_color(&cm_id_priv->service_node, &cm.listen_service_table);
582         return NULL;
583 }
584
585 static struct cm_id_private * cm_find_listen(struct ib_device *device,
586                                              __be64 service_id)
587 {
588         struct rb_node *node = cm.listen_service_table.rb_node;
589         struct cm_id_private *cm_id_priv;
590
591         while (node) {
592                 cm_id_priv = rb_entry(node, struct cm_id_private, service_node);
593                 if ((cm_id_priv->id.service_mask & service_id) ==
594                      cm_id_priv->id.service_id &&
595                     (cm_id_priv->id.device == device))
596                         return cm_id_priv;
597
598                 if (device < cm_id_priv->id.device)
599                         node = node->rb_left;
600                 else if (device > cm_id_priv->id.device)
601                         node = node->rb_right;
602                 else if (be64_lt(service_id, cm_id_priv->id.service_id))
603                         node = node->rb_left;
604                 else if (be64_gt(service_id, cm_id_priv->id.service_id))
605                         node = node->rb_right;
606                 else
607                         node = node->rb_right;
608         }
609         return NULL;
610 }
611
612 static struct cm_timewait_info * cm_insert_remote_id(struct cm_timewait_info
613                                                      *timewait_info)
614 {
615         struct rb_node **link = &cm.remote_id_table.rb_node;
616         struct rb_node *parent = NULL;
617         struct cm_timewait_info *cur_timewait_info;
618         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
619         __be32 remote_id = timewait_info->work.remote_id;
620
621         while (*link) {
622                 parent = *link;
623                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
624                                              remote_id_node);
625                 if (be32_lt(remote_id, cur_timewait_info->work.remote_id))
626                         link = &(*link)->rb_left;
627                 else if (be32_gt(remote_id, cur_timewait_info->work.remote_id))
628                         link = &(*link)->rb_right;
629                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
630                         link = &(*link)->rb_left;
631                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
632                         link = &(*link)->rb_right;
633                 else
634                         return cur_timewait_info;
635         }
636         timewait_info->inserted_remote_id = 1;
637         rb_link_node(&timewait_info->remote_id_node, parent, link);
638         rb_insert_color(&timewait_info->remote_id_node, &cm.remote_id_table);
639         return NULL;
640 }
641
642 static struct cm_timewait_info * cm_find_remote_id(__be64 remote_ca_guid,
643                                                    __be32 remote_id)
644 {
645         struct rb_node *node = cm.remote_id_table.rb_node;
646         struct cm_timewait_info *timewait_info;
647
648         while (node) {
649                 timewait_info = rb_entry(node, struct cm_timewait_info,
650                                          remote_id_node);
651                 if (be32_lt(remote_id, timewait_info->work.remote_id))
652                         node = node->rb_left;
653                 else if (be32_gt(remote_id, timewait_info->work.remote_id))
654                         node = node->rb_right;
655                 else if (be64_lt(remote_ca_guid, timewait_info->remote_ca_guid))
656                         node = node->rb_left;
657                 else if (be64_gt(remote_ca_guid, timewait_info->remote_ca_guid))
658                         node = node->rb_right;
659                 else
660                         return timewait_info;
661         }
662         return NULL;
663 }
664
665 static struct cm_timewait_info * cm_insert_remote_qpn(struct cm_timewait_info
666                                                       *timewait_info)
667 {
668         struct rb_node **link = &cm.remote_qp_table.rb_node;
669         struct rb_node *parent = NULL;
670         struct cm_timewait_info *cur_timewait_info;
671         __be64 remote_ca_guid = timewait_info->remote_ca_guid;
672         __be32 remote_qpn = timewait_info->remote_qpn;
673
674         while (*link) {
675                 parent = *link;
676                 cur_timewait_info = rb_entry(parent, struct cm_timewait_info,
677                                              remote_qp_node);
678                 if (be32_lt(remote_qpn, cur_timewait_info->remote_qpn))
679                         link = &(*link)->rb_left;
680                 else if (be32_gt(remote_qpn, cur_timewait_info->remote_qpn))
681                         link = &(*link)->rb_right;
682                 else if (be64_lt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
683                         link = &(*link)->rb_left;
684                 else if (be64_gt(remote_ca_guid, cur_timewait_info->remote_ca_guid))
685                         link = &(*link)->rb_right;
686                 else
687                         return cur_timewait_info;
688         }
689         timewait_info->inserted_remote_qp = 1;
690         rb_link_node(&timewait_info->remote_qp_node, parent, link);
691         rb_insert_color(&timewait_info->remote_qp_node, &cm.remote_qp_table);
692         return NULL;
693 }
694
695 static struct cm_id_private * cm_insert_remote_sidr(struct cm_id_private
696                                                     *cm_id_priv)
697 {
698         struct rb_node **link = &cm.remote_sidr_table.rb_node;
699         struct rb_node *parent = NULL;
700         struct cm_id_private *cur_cm_id_priv;
701         union ib_gid *port_gid = &cm_id_priv->av.dgid;
702         __be32 remote_id = cm_id_priv->id.remote_id;
703
704         while (*link) {
705                 parent = *link;
706                 cur_cm_id_priv = rb_entry(parent, struct cm_id_private,
707                                           sidr_id_node);
708                 if (be32_lt(remote_id, cur_cm_id_priv->id.remote_id))
709                         link = &(*link)->rb_left;
710                 else if (be32_gt(remote_id, cur_cm_id_priv->id.remote_id))
711                         link = &(*link)->rb_right;
712                 else {
713                         int cmp;
714                         cmp = memcmp(port_gid, &cur_cm_id_priv->av.dgid,
715                                      sizeof *port_gid);
716                         if (cmp < 0)
717                                 link = &(*link)->rb_left;
718                         else if (cmp > 0)
719                                 link = &(*link)->rb_right;
720                         else
721                                 return cur_cm_id_priv;
722                 }
723         }
724         rb_link_node(&cm_id_priv->sidr_id_node, parent, link);
725         rb_insert_color(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
726         return NULL;
727 }
728
729 static void cm_reject_sidr_req(struct cm_id_private *cm_id_priv,
730                                enum ib_cm_sidr_status status)
731 {
732         struct ib_cm_sidr_rep_param param;
733
734         memset(&param, 0, sizeof param);
735         param.status = status;
736         ib_send_cm_sidr_rep(&cm_id_priv->id, &param);
737 }
738
739 struct ib_cm_id *ib_create_cm_id(struct ib_device *device,
740                                  ib_cm_handler cm_handler,
741                                  void *context)
742 {
743         struct cm_id_private *cm_id_priv;
744         int ret;
745
746         cm_id_priv = kzalloc(sizeof *cm_id_priv, GFP_KERNEL);
747         if (!cm_id_priv)
748                 return ERR_PTR(-ENOMEM);
749
750         cm_id_priv->id.state = IB_CM_IDLE;
751         cm_id_priv->id.device = device;
752         cm_id_priv->id.cm_handler = cm_handler;
753         cm_id_priv->id.context = context;
754         cm_id_priv->id.remote_cm_qpn = 1;
755         ret = cm_alloc_id(cm_id_priv);
756         if (ret)
757                 goto error;
758
759         spin_lock_init(&cm_id_priv->lock);
760         init_completion(&cm_id_priv->comp);
761         INIT_LIST_HEAD(&cm_id_priv->work_list);
762         INIT_LIST_HEAD(&cm_id_priv->prim_list);
763         INIT_LIST_HEAD(&cm_id_priv->altr_list);
764         atomic_set(&cm_id_priv->work_count, -1);
765         atomic_set(&cm_id_priv->refcount, 1);
766         return &cm_id_priv->id;
767
768 error:
769         kfree(cm_id_priv);
770         return ERR_PTR(-ENOMEM);
771 }
772 EXPORT_SYMBOL(ib_create_cm_id);
773
774 static struct cm_work * cm_dequeue_work(struct cm_id_private *cm_id_priv)
775 {
776         struct cm_work *work;
777
778         if (list_empty(&cm_id_priv->work_list))
779                 return NULL;
780
781         work = list_entry(cm_id_priv->work_list.next, struct cm_work, list);
782         list_del(&work->list);
783         return work;
784 }
785
786 static void cm_free_work(struct cm_work *work)
787 {
788         if (work->mad_recv_wc)
789                 ib_free_recv_mad(work->mad_recv_wc);
790         kfree(work);
791 }
792
793 static inline int cm_convert_to_ms(int iba_time)
794 {
795         /* approximate conversion to ms from 4.096us x 2^iba_time */
796         return 1 << max(iba_time - 8, 0);
797 }
798
799 /*
800  * calculate: 4.096x2^ack_timeout = 4.096x2^ack_delay + 2x4.096x2^life_time
801  * Because of how ack_timeout is stored, adding one doubles the timeout.
802  * To avoid large timeouts, select the max(ack_delay, life_time + 1), and
803  * increment it (round up) only if the other is within 50%.
804  */
805 static u8 cm_ack_timeout(u8 ca_ack_delay, u8 packet_life_time)
806 {
807         int ack_timeout = packet_life_time + 1;
808
809         if (ack_timeout >= ca_ack_delay)
810                 ack_timeout += (ca_ack_delay >= (ack_timeout - 1));
811         else
812                 ack_timeout = ca_ack_delay +
813                               (ack_timeout >= (ca_ack_delay - 1));
814
815         return min(31, ack_timeout);
816 }
817
818 static void cm_cleanup_timewait(struct cm_timewait_info *timewait_info)
819 {
820         if (timewait_info->inserted_remote_id) {
821                 rb_erase(&timewait_info->remote_id_node, &cm.remote_id_table);
822                 timewait_info->inserted_remote_id = 0;
823         }
824
825         if (timewait_info->inserted_remote_qp) {
826                 rb_erase(&timewait_info->remote_qp_node, &cm.remote_qp_table);
827                 timewait_info->inserted_remote_qp = 0;
828         }
829 }
830
831 static struct cm_timewait_info * cm_create_timewait_info(__be32 local_id)
832 {
833         struct cm_timewait_info *timewait_info;
834
835         timewait_info = kzalloc(sizeof *timewait_info, GFP_KERNEL);
836         if (!timewait_info)
837                 return ERR_PTR(-ENOMEM);
838
839         timewait_info->work.local_id = local_id;
840         INIT_DELAYED_WORK(&timewait_info->work.work, cm_work_handler);
841         timewait_info->work.cm_event.event = IB_CM_TIMEWAIT_EXIT;
842         return timewait_info;
843 }
844
845 static void cm_enter_timewait(struct cm_id_private *cm_id_priv)
846 {
847         int wait_time;
848         unsigned long flags;
849         struct cm_device *cm_dev;
850
851         cm_dev = ib_get_client_data(cm_id_priv->id.device, &cm_client);
852         if (!cm_dev)
853                 return;
854
855         spin_lock_irqsave(&cm.lock, flags);
856         cm_cleanup_timewait(cm_id_priv->timewait_info);
857         list_add_tail(&cm_id_priv->timewait_info->list, &cm.timewait_list);
858         spin_unlock_irqrestore(&cm.lock, flags);
859
860         /*
861          * The cm_id could be destroyed by the user before we exit timewait.
862          * To protect against this, we search for the cm_id after exiting
863          * timewait before notifying the user that we've exited timewait.
864          */
865         cm_id_priv->id.state = IB_CM_TIMEWAIT;
866         wait_time = cm_convert_to_ms(cm_id_priv->av.timeout);
867
868         /* Check if the device started its remove_one */
869         spin_lock_irqsave(&cm.lock, flags);
870         if (!cm_dev->going_down)
871                 queue_delayed_work(cm.wq, &cm_id_priv->timewait_info->work.work,
872                                    msecs_to_jiffies(wait_time));
873         spin_unlock_irqrestore(&cm.lock, flags);
874
875         cm_id_priv->timewait_info = NULL;
876 }
877
878 static void cm_reset_to_idle(struct cm_id_private *cm_id_priv)
879 {
880         unsigned long flags;
881
882         cm_id_priv->id.state = IB_CM_IDLE;
883         if (cm_id_priv->timewait_info) {
884                 spin_lock_irqsave(&cm.lock, flags);
885                 cm_cleanup_timewait(cm_id_priv->timewait_info);
886                 spin_unlock_irqrestore(&cm.lock, flags);
887                 kfree(cm_id_priv->timewait_info);
888                 cm_id_priv->timewait_info = NULL;
889         }
890 }
891
892 static void cm_destroy_id(struct ib_cm_id *cm_id, int err)
893 {
894         struct cm_id_private *cm_id_priv;
895         struct cm_work *work;
896
897         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
898 retest:
899         spin_lock_irq(&cm_id_priv->lock);
900         switch (cm_id->state) {
901         case IB_CM_LISTEN:
902                 spin_unlock_irq(&cm_id_priv->lock);
903
904                 spin_lock_irq(&cm.lock);
905                 if (--cm_id_priv->listen_sharecount > 0) {
906                         /* The id is still shared. */
907                         cm_deref_id(cm_id_priv);
908                         spin_unlock_irq(&cm.lock);
909                         return;
910                 }
911                 rb_erase(&cm_id_priv->service_node, &cm.listen_service_table);
912                 spin_unlock_irq(&cm.lock);
913                 break;
914         case IB_CM_SIDR_REQ_SENT:
915                 cm_id->state = IB_CM_IDLE;
916                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
917                 spin_unlock_irq(&cm_id_priv->lock);
918                 break;
919         case IB_CM_SIDR_REQ_RCVD:
920                 spin_unlock_irq(&cm_id_priv->lock);
921                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_REJECT);
922                 spin_lock_irq(&cm.lock);
923                 if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node))
924                         rb_erase(&cm_id_priv->sidr_id_node,
925                                  &cm.remote_sidr_table);
926                 spin_unlock_irq(&cm.lock);
927                 break;
928         case IB_CM_REQ_SENT:
929         case IB_CM_MRA_REQ_RCVD:
930                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
931                 spin_unlock_irq(&cm_id_priv->lock);
932                 ib_send_cm_rej(cm_id, IB_CM_REJ_TIMEOUT,
933                                &cm_id_priv->id.device->node_guid,
934                                sizeof cm_id_priv->id.device->node_guid,
935                                NULL, 0);
936                 break;
937         case IB_CM_REQ_RCVD:
938                 if (err == -ENOMEM) {
939                         /* Do not reject to allow future retries. */
940                         cm_reset_to_idle(cm_id_priv);
941                         spin_unlock_irq(&cm_id_priv->lock);
942                 } else {
943                         spin_unlock_irq(&cm_id_priv->lock);
944                         ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
945                                        NULL, 0, NULL, 0);
946                 }
947                 break;
948         case IB_CM_REP_SENT:
949         case IB_CM_MRA_REP_RCVD:
950                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
951                 /* Fall through */
952         case IB_CM_MRA_REQ_SENT:
953         case IB_CM_REP_RCVD:
954         case IB_CM_MRA_REP_SENT:
955                 spin_unlock_irq(&cm_id_priv->lock);
956                 ib_send_cm_rej(cm_id, IB_CM_REJ_CONSUMER_DEFINED,
957                                NULL, 0, NULL, 0);
958                 break;
959         case IB_CM_ESTABLISHED:
960                 spin_unlock_irq(&cm_id_priv->lock);
961                 if (cm_id_priv->qp_type == IB_QPT_XRC_TGT)
962                         break;
963                 ib_send_cm_dreq(cm_id, NULL, 0);
964                 goto retest;
965         case IB_CM_DREQ_SENT:
966                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
967                 cm_enter_timewait(cm_id_priv);
968                 spin_unlock_irq(&cm_id_priv->lock);
969                 break;
970         case IB_CM_DREQ_RCVD:
971                 spin_unlock_irq(&cm_id_priv->lock);
972                 ib_send_cm_drep(cm_id, NULL, 0);
973                 break;
974         default:
975                 spin_unlock_irq(&cm_id_priv->lock);
976                 break;
977         }
978
979         spin_lock_irq(&cm.lock);
980         if (!list_empty(&cm_id_priv->altr_list) &&
981             (!cm_id_priv->altr_send_port_not_ready))
982                 list_del(&cm_id_priv->altr_list);
983         if (!list_empty(&cm_id_priv->prim_list) &&
984             (!cm_id_priv->prim_send_port_not_ready))
985                 list_del(&cm_id_priv->prim_list);
986         spin_unlock_irq(&cm.lock);
987
988         cm_free_id(cm_id->local_id);
989         cm_deref_id(cm_id_priv);
990         wait_for_completion(&cm_id_priv->comp);
991         while ((work = cm_dequeue_work(cm_id_priv)) != NULL)
992                 cm_free_work(work);
993         kfree(cm_id_priv->private_data);
994         kfree(cm_id_priv);
995 }
996
997 void ib_destroy_cm_id(struct ib_cm_id *cm_id)
998 {
999         cm_destroy_id(cm_id, 0);
1000 }
1001 EXPORT_SYMBOL(ib_destroy_cm_id);
1002
1003 /**
1004  * __ib_cm_listen - Initiates listening on the specified service ID for
1005  *   connection and service ID resolution requests.
1006  * @cm_id: Connection identifier associated with the listen request.
1007  * @service_id: Service identifier matched against incoming connection
1008  *   and service ID resolution requests.  The service ID should be specified
1009  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1010  *   assign a service ID to the caller.
1011  * @service_mask: Mask applied to service ID used to listen across a
1012  *   range of service IDs.  If set to 0, the service ID is matched
1013  *   exactly.  This parameter is ignored if %service_id is set to
1014  *   IB_CM_ASSIGN_SERVICE_ID.
1015  */
1016 static int __ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id,
1017                           __be64 service_mask)
1018 {
1019         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
1020         int ret = 0;
1021
1022         service_mask = service_mask ? service_mask : ~cpu_to_be64(0);
1023         service_id &= service_mask;
1024         if ((service_id & IB_SERVICE_ID_AGN_MASK) == IB_CM_ASSIGN_SERVICE_ID &&
1025             (service_id != IB_CM_ASSIGN_SERVICE_ID))
1026                 return -EINVAL;
1027
1028         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1029         if (cm_id->state != IB_CM_IDLE)
1030                 return -EINVAL;
1031
1032         cm_id->state = IB_CM_LISTEN;
1033         ++cm_id_priv->listen_sharecount;
1034
1035         if (service_id == IB_CM_ASSIGN_SERVICE_ID) {
1036                 cm_id->service_id = cpu_to_be64(cm.listen_service_id++);
1037                 cm_id->service_mask = ~cpu_to_be64(0);
1038         } else {
1039                 cm_id->service_id = service_id;
1040                 cm_id->service_mask = service_mask;
1041         }
1042         cur_cm_id_priv = cm_insert_listen(cm_id_priv);
1043
1044         if (cur_cm_id_priv) {
1045                 cm_id->state = IB_CM_IDLE;
1046                 --cm_id_priv->listen_sharecount;
1047                 ret = -EBUSY;
1048         }
1049         return ret;
1050 }
1051
1052 int ib_cm_listen(struct ib_cm_id *cm_id, __be64 service_id, __be64 service_mask)
1053 {
1054         unsigned long flags;
1055         int ret;
1056
1057         spin_lock_irqsave(&cm.lock, flags);
1058         ret = __ib_cm_listen(cm_id, service_id, service_mask);
1059         spin_unlock_irqrestore(&cm.lock, flags);
1060
1061         return ret;
1062 }
1063 EXPORT_SYMBOL(ib_cm_listen);
1064
1065 /**
1066  * Create a new listening ib_cm_id and listen on the given service ID.
1067  *
1068  * If there's an existing ID listening on that same device and service ID,
1069  * return it.
1070  *
1071  * @device: Device associated with the cm_id.  All related communication will
1072  * be associated with the specified device.
1073  * @cm_handler: Callback invoked to notify the user of CM events.
1074  * @service_id: Service identifier matched against incoming connection
1075  *   and service ID resolution requests.  The service ID should be specified
1076  *   network-byte order.  If set to IB_CM_ASSIGN_SERVICE_ID, the CM will
1077  *   assign a service ID to the caller.
1078  *
1079  * Callers should call ib_destroy_cm_id when done with the listener ID.
1080  */
1081 struct ib_cm_id *ib_cm_insert_listen(struct ib_device *device,
1082                                      ib_cm_handler cm_handler,
1083                                      __be64 service_id)
1084 {
1085         struct cm_id_private *cm_id_priv;
1086         struct ib_cm_id *cm_id;
1087         unsigned long flags;
1088         int err = 0;
1089
1090         /* Create an ID in advance, since the creation may sleep */
1091         cm_id = ib_create_cm_id(device, cm_handler, NULL);
1092         if (IS_ERR(cm_id))
1093                 return cm_id;
1094
1095         spin_lock_irqsave(&cm.lock, flags);
1096
1097         if (service_id == IB_CM_ASSIGN_SERVICE_ID)
1098                 goto new_id;
1099
1100         /* Find an existing ID */
1101         cm_id_priv = cm_find_listen(device, service_id);
1102         if (cm_id_priv) {
1103                 if (cm_id->cm_handler != cm_handler || cm_id->context) {
1104                         /* Sharing an ib_cm_id with different handlers is not
1105                          * supported */
1106                         spin_unlock_irqrestore(&cm.lock, flags);
1107                         return ERR_PTR(-EINVAL);
1108                 }
1109                 atomic_inc(&cm_id_priv->refcount);
1110                 ++cm_id_priv->listen_sharecount;
1111                 spin_unlock_irqrestore(&cm.lock, flags);
1112
1113                 ib_destroy_cm_id(cm_id);
1114                 cm_id = &cm_id_priv->id;
1115                 return cm_id;
1116         }
1117
1118 new_id:
1119         /* Use newly created ID */
1120         err = __ib_cm_listen(cm_id, service_id, 0);
1121
1122         spin_unlock_irqrestore(&cm.lock, flags);
1123
1124         if (err) {
1125                 ib_destroy_cm_id(cm_id);
1126                 return ERR_PTR(err);
1127         }
1128         return cm_id;
1129 }
1130 EXPORT_SYMBOL(ib_cm_insert_listen);
1131
1132 static __be64 cm_form_tid(struct cm_id_private *cm_id_priv,
1133                           enum cm_msg_sequence msg_seq)
1134 {
1135         u64 hi_tid, low_tid;
1136
1137         hi_tid   = ((u64) cm_id_priv->av.port->mad_agent->hi_tid) << 32;
1138         low_tid  = (u64) ((__force u32)cm_id_priv->id.local_id |
1139                           (msg_seq << 30));
1140         return cpu_to_be64(hi_tid | low_tid);
1141 }
1142
1143 static void cm_format_mad_hdr(struct ib_mad_hdr *hdr,
1144                               __be16 attr_id, __be64 tid)
1145 {
1146         hdr->base_version  = IB_MGMT_BASE_VERSION;
1147         hdr->mgmt_class    = IB_MGMT_CLASS_CM;
1148         hdr->class_version = IB_CM_CLASS_VERSION;
1149         hdr->method        = IB_MGMT_METHOD_SEND;
1150         hdr->attr_id       = attr_id;
1151         hdr->tid           = tid;
1152 }
1153
1154 static void cm_format_req(struct cm_req_msg *req_msg,
1155                           struct cm_id_private *cm_id_priv,
1156                           struct ib_cm_req_param *param)
1157 {
1158         struct ib_sa_path_rec *pri_path = param->primary_path;
1159         struct ib_sa_path_rec *alt_path = param->alternate_path;
1160
1161         cm_format_mad_hdr(&req_msg->hdr, CM_REQ_ATTR_ID,
1162                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_REQ));
1163
1164         req_msg->local_comm_id = cm_id_priv->id.local_id;
1165         req_msg->service_id = param->service_id;
1166         req_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1167         cm_req_set_local_qpn(req_msg, cpu_to_be32(param->qp_num));
1168         cm_req_set_init_depth(req_msg, param->initiator_depth);
1169         cm_req_set_remote_resp_timeout(req_msg,
1170                                        param->remote_cm_response_timeout);
1171         cm_req_set_qp_type(req_msg, param->qp_type);
1172         cm_req_set_flow_ctrl(req_msg, param->flow_control);
1173         cm_req_set_starting_psn(req_msg, cpu_to_be32(param->starting_psn));
1174         cm_req_set_local_resp_timeout(req_msg,
1175                                       param->local_cm_response_timeout);
1176         req_msg->pkey = param->primary_path->pkey;
1177         cm_req_set_path_mtu(req_msg, param->primary_path->mtu);
1178         cm_req_set_max_cm_retries(req_msg, param->max_cm_retries);
1179
1180         if (param->qp_type != IB_QPT_XRC_INI) {
1181                 cm_req_set_resp_res(req_msg, param->responder_resources);
1182                 cm_req_set_retry_count(req_msg, param->retry_count);
1183                 cm_req_set_rnr_retry_count(req_msg, param->rnr_retry_count);
1184                 cm_req_set_srq(req_msg, param->srq);
1185         }
1186
1187         if (pri_path->hop_limit <= 1) {
1188                 req_msg->primary_local_lid = pri_path->slid;
1189                 req_msg->primary_remote_lid = pri_path->dlid;
1190         } else {
1191                 /* Work-around until there's a way to obtain remote LID info */
1192                 req_msg->primary_local_lid = IB_LID_PERMISSIVE;
1193                 req_msg->primary_remote_lid = IB_LID_PERMISSIVE;
1194         }
1195         req_msg->primary_local_gid = pri_path->sgid;
1196         req_msg->primary_remote_gid = pri_path->dgid;
1197         cm_req_set_primary_flow_label(req_msg, pri_path->flow_label);
1198         cm_req_set_primary_packet_rate(req_msg, pri_path->rate);
1199         req_msg->primary_traffic_class = pri_path->traffic_class;
1200         req_msg->primary_hop_limit = pri_path->hop_limit;
1201         cm_req_set_primary_sl(req_msg, pri_path->sl);
1202         cm_req_set_primary_subnet_local(req_msg, (pri_path->hop_limit <= 1));
1203         cm_req_set_primary_local_ack_timeout(req_msg,
1204                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1205                                pri_path->packet_life_time));
1206
1207         if (alt_path) {
1208                 if (alt_path->hop_limit <= 1) {
1209                         req_msg->alt_local_lid = alt_path->slid;
1210                         req_msg->alt_remote_lid = alt_path->dlid;
1211                 } else {
1212                         req_msg->alt_local_lid = IB_LID_PERMISSIVE;
1213                         req_msg->alt_remote_lid = IB_LID_PERMISSIVE;
1214                 }
1215                 req_msg->alt_local_gid = alt_path->sgid;
1216                 req_msg->alt_remote_gid = alt_path->dgid;
1217                 cm_req_set_alt_flow_label(req_msg,
1218                                           alt_path->flow_label);
1219                 cm_req_set_alt_packet_rate(req_msg, alt_path->rate);
1220                 req_msg->alt_traffic_class = alt_path->traffic_class;
1221                 req_msg->alt_hop_limit = alt_path->hop_limit;
1222                 cm_req_set_alt_sl(req_msg, alt_path->sl);
1223                 cm_req_set_alt_subnet_local(req_msg, (alt_path->hop_limit <= 1));
1224                 cm_req_set_alt_local_ack_timeout(req_msg,
1225                         cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
1226                                        alt_path->packet_life_time));
1227         }
1228
1229         if (param->private_data && param->private_data_len)
1230                 memcpy(req_msg->private_data, param->private_data,
1231                        param->private_data_len);
1232 }
1233
1234 static int cm_validate_req_param(struct ib_cm_req_param *param)
1235 {
1236         /* peer-to-peer not supported */
1237         if (param->peer_to_peer)
1238                 return -EINVAL;
1239
1240         if (!param->primary_path)
1241                 return -EINVAL;
1242
1243         if (param->qp_type != IB_QPT_RC && param->qp_type != IB_QPT_UC &&
1244             param->qp_type != IB_QPT_XRC_INI)
1245                 return -EINVAL;
1246
1247         if (param->private_data &&
1248             param->private_data_len > IB_CM_REQ_PRIVATE_DATA_SIZE)
1249                 return -EINVAL;
1250
1251         if (param->alternate_path &&
1252             (param->alternate_path->pkey != param->primary_path->pkey ||
1253              param->alternate_path->mtu != param->primary_path->mtu))
1254                 return -EINVAL;
1255
1256         return 0;
1257 }
1258
1259 int ib_send_cm_req(struct ib_cm_id *cm_id,
1260                    struct ib_cm_req_param *param)
1261 {
1262         struct cm_id_private *cm_id_priv;
1263         struct cm_req_msg *req_msg;
1264         unsigned long flags;
1265         int ret;
1266
1267         ret = cm_validate_req_param(param);
1268         if (ret)
1269                 return ret;
1270
1271         /* Verify that we're not in timewait. */
1272         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1273         spin_lock_irqsave(&cm_id_priv->lock, flags);
1274         if (cm_id->state != IB_CM_IDLE) {
1275                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1276                 ret = -EINVAL;
1277                 goto out;
1278         }
1279         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1280
1281         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1282                                                             id.local_id);
1283         if (IS_ERR(cm_id_priv->timewait_info)) {
1284                 ret = PTR_ERR(cm_id_priv->timewait_info);
1285                 goto out;
1286         }
1287
1288         ret = cm_init_av_by_path(param->primary_path, &cm_id_priv->av,
1289                                  cm_id_priv);
1290         if (ret)
1291                 goto error1;
1292         if (param->alternate_path) {
1293                 ret = cm_init_av_by_path(param->alternate_path,
1294                                          &cm_id_priv->alt_av, cm_id_priv);
1295                 if (ret)
1296                         goto error1;
1297         }
1298         cm_id->service_id = param->service_id;
1299         cm_id->service_mask = ~cpu_to_be64(0);
1300         cm_id_priv->timeout_ms = cm_convert_to_ms(
1301                                     param->primary_path->packet_life_time) * 2 +
1302                                  cm_convert_to_ms(
1303                                     param->remote_cm_response_timeout);
1304         cm_id_priv->max_cm_retries = param->max_cm_retries;
1305         cm_id_priv->initiator_depth = param->initiator_depth;
1306         cm_id_priv->responder_resources = param->responder_resources;
1307         cm_id_priv->retry_count = param->retry_count;
1308         cm_id_priv->path_mtu = param->primary_path->mtu;
1309         cm_id_priv->pkey = param->primary_path->pkey;
1310         cm_id_priv->qp_type = param->qp_type;
1311
1312         ret = cm_alloc_msg(cm_id_priv, &cm_id_priv->msg);
1313         if (ret)
1314                 goto error1;
1315
1316         req_msg = (struct cm_req_msg *) cm_id_priv->msg->mad;
1317         cm_format_req(req_msg, cm_id_priv, param);
1318         cm_id_priv->tid = req_msg->hdr.tid;
1319         cm_id_priv->msg->timeout_ms = cm_id_priv->timeout_ms;
1320         cm_id_priv->msg->context[1] = (void *) (unsigned long) IB_CM_REQ_SENT;
1321
1322         cm_id_priv->local_qpn = cm_req_get_local_qpn(req_msg);
1323         cm_id_priv->rq_psn = cm_req_get_starting_psn(req_msg);
1324
1325         spin_lock_irqsave(&cm_id_priv->lock, flags);
1326         ret = ib_post_send_mad(cm_id_priv->msg, NULL);
1327         if (ret) {
1328                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1329                 goto error2;
1330         }
1331         BUG_ON(cm_id->state != IB_CM_IDLE);
1332         cm_id->state = IB_CM_REQ_SENT;
1333         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1334         return 0;
1335
1336 error2: cm_free_msg(cm_id_priv->msg);
1337 error1: kfree(cm_id_priv->timewait_info);
1338 out:    return ret;
1339 }
1340 EXPORT_SYMBOL(ib_send_cm_req);
1341
1342 static int cm_issue_rej(struct cm_port *port,
1343                         struct ib_mad_recv_wc *mad_recv_wc,
1344                         enum ib_cm_rej_reason reason,
1345                         enum cm_msg_response msg_rejected,
1346                         void *ari, u8 ari_length)
1347 {
1348         struct ib_mad_send_buf *msg = NULL;
1349         struct cm_rej_msg *rej_msg, *rcv_msg;
1350         int ret;
1351
1352         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
1353         if (ret)
1354                 return ret;
1355
1356         /* We just need common CM header information.  Cast to any message. */
1357         rcv_msg = (struct cm_rej_msg *) mad_recv_wc->recv_buf.mad;
1358         rej_msg = (struct cm_rej_msg *) msg->mad;
1359
1360         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, rcv_msg->hdr.tid);
1361         rej_msg->remote_comm_id = rcv_msg->local_comm_id;
1362         rej_msg->local_comm_id = rcv_msg->remote_comm_id;
1363         cm_rej_set_msg_rejected(rej_msg, msg_rejected);
1364         rej_msg->reason = cpu_to_be16(reason);
1365
1366         if (ari && ari_length) {
1367                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1368                 memcpy(rej_msg->ari, ari, ari_length);
1369         }
1370
1371         ret = ib_post_send_mad(msg, NULL);
1372         if (ret)
1373                 cm_free_msg(msg);
1374
1375         return ret;
1376 }
1377
1378 static void cm_format_paths_from_req(struct cm_req_msg *req_msg,
1379                                             struct ib_sa_path_rec *primary_path,
1380                                             struct ib_sa_path_rec *alt_path)
1381 {
1382         memset(primary_path, 0, sizeof *primary_path);
1383         primary_path->dgid = req_msg->primary_local_gid;
1384         primary_path->sgid = req_msg->primary_remote_gid;
1385         primary_path->dlid = req_msg->primary_local_lid;
1386         primary_path->slid = req_msg->primary_remote_lid;
1387         primary_path->flow_label = cm_req_get_primary_flow_label(req_msg);
1388         primary_path->hop_limit = req_msg->primary_hop_limit;
1389         primary_path->traffic_class = req_msg->primary_traffic_class;
1390         primary_path->reversible = 1;
1391         primary_path->pkey = req_msg->pkey;
1392         primary_path->sl = cm_req_get_primary_sl(req_msg);
1393         primary_path->mtu_selector = IB_SA_EQ;
1394         primary_path->mtu = cm_req_get_path_mtu(req_msg);
1395         primary_path->rate_selector = IB_SA_EQ;
1396         primary_path->rate = cm_req_get_primary_packet_rate(req_msg);
1397         primary_path->packet_life_time_selector = IB_SA_EQ;
1398         primary_path->packet_life_time =
1399                 cm_req_get_primary_local_ack_timeout(req_msg);
1400         primary_path->packet_life_time -= (primary_path->packet_life_time > 0);
1401         primary_path->service_id = req_msg->service_id;
1402
1403         if (req_msg->alt_local_lid) {
1404                 memset(alt_path, 0, sizeof *alt_path);
1405                 alt_path->dgid = req_msg->alt_local_gid;
1406                 alt_path->sgid = req_msg->alt_remote_gid;
1407                 alt_path->dlid = req_msg->alt_local_lid;
1408                 alt_path->slid = req_msg->alt_remote_lid;
1409                 alt_path->flow_label = cm_req_get_alt_flow_label(req_msg);
1410                 alt_path->hop_limit = req_msg->alt_hop_limit;
1411                 alt_path->traffic_class = req_msg->alt_traffic_class;
1412                 alt_path->reversible = 1;
1413                 alt_path->pkey = req_msg->pkey;
1414                 alt_path->sl = cm_req_get_alt_sl(req_msg);
1415                 alt_path->mtu_selector = IB_SA_EQ;
1416                 alt_path->mtu = cm_req_get_path_mtu(req_msg);
1417                 alt_path->rate_selector = IB_SA_EQ;
1418                 alt_path->rate = cm_req_get_alt_packet_rate(req_msg);
1419                 alt_path->packet_life_time_selector = IB_SA_EQ;
1420                 alt_path->packet_life_time =
1421                         cm_req_get_alt_local_ack_timeout(req_msg);
1422                 alt_path->packet_life_time -= (alt_path->packet_life_time > 0);
1423                 alt_path->service_id = req_msg->service_id;
1424         }
1425 }
1426
1427 static u16 cm_get_bth_pkey(struct cm_work *work)
1428 {
1429         struct ib_device *ib_dev = work->port->cm_dev->ib_device;
1430         u8 port_num = work->port->port_num;
1431         u16 pkey_index = work->mad_recv_wc->wc->pkey_index;
1432         u16 pkey;
1433         int ret;
1434
1435         ret = ib_get_cached_pkey(ib_dev, port_num, pkey_index, &pkey);
1436         if (ret) {
1437                 dev_warn_ratelimited(&ib_dev->dev, "ib_cm: Couldn't retrieve pkey for incoming request (port %d, pkey index %d). %d\n",
1438                                      port_num, pkey_index, ret);
1439                 return 0;
1440         }
1441
1442         return pkey;
1443 }
1444
1445 static void cm_format_req_event(struct cm_work *work,
1446                                 struct cm_id_private *cm_id_priv,
1447                                 struct ib_cm_id *listen_id)
1448 {
1449         struct cm_req_msg *req_msg;
1450         struct ib_cm_req_event_param *param;
1451
1452         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1453         param = &work->cm_event.param.req_rcvd;
1454         param->listen_id = listen_id;
1455         param->bth_pkey = cm_get_bth_pkey(work);
1456         param->port = cm_id_priv->av.port->port_num;
1457         param->primary_path = &work->path[0];
1458         if (req_msg->alt_local_lid)
1459                 param->alternate_path = &work->path[1];
1460         else
1461                 param->alternate_path = NULL;
1462         param->remote_ca_guid = req_msg->local_ca_guid;
1463         param->remote_qkey = be32_to_cpu(req_msg->local_qkey);
1464         param->remote_qpn = be32_to_cpu(cm_req_get_local_qpn(req_msg));
1465         param->qp_type = cm_req_get_qp_type(req_msg);
1466         param->starting_psn = be32_to_cpu(cm_req_get_starting_psn(req_msg));
1467         param->responder_resources = cm_req_get_init_depth(req_msg);
1468         param->initiator_depth = cm_req_get_resp_res(req_msg);
1469         param->local_cm_response_timeout =
1470                                         cm_req_get_remote_resp_timeout(req_msg);
1471         param->flow_control = cm_req_get_flow_ctrl(req_msg);
1472         param->remote_cm_response_timeout =
1473                                         cm_req_get_local_resp_timeout(req_msg);
1474         param->retry_count = cm_req_get_retry_count(req_msg);
1475         param->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1476         param->srq = cm_req_get_srq(req_msg);
1477         param->ppath_sgid_index = cm_id_priv->av.ah_attr.grh.sgid_index;
1478         work->cm_event.private_data = &req_msg->private_data;
1479 }
1480
1481 static void cm_process_work(struct cm_id_private *cm_id_priv,
1482                             struct cm_work *work)
1483 {
1484         int ret;
1485
1486         /* We will typically only have the current event to report. */
1487         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &work->cm_event);
1488         cm_free_work(work);
1489
1490         while (!ret && !atomic_add_negative(-1, &cm_id_priv->work_count)) {
1491                 spin_lock_irq(&cm_id_priv->lock);
1492                 work = cm_dequeue_work(cm_id_priv);
1493                 spin_unlock_irq(&cm_id_priv->lock);
1494                 BUG_ON(!work);
1495                 ret = cm_id_priv->id.cm_handler(&cm_id_priv->id,
1496                                                 &work->cm_event);
1497                 cm_free_work(work);
1498         }
1499         cm_deref_id(cm_id_priv);
1500         if (ret)
1501                 cm_destroy_id(&cm_id_priv->id, ret);
1502 }
1503
1504 static void cm_format_mra(struct cm_mra_msg *mra_msg,
1505                           struct cm_id_private *cm_id_priv,
1506                           enum cm_msg_response msg_mraed, u8 service_timeout,
1507                           const void *private_data, u8 private_data_len)
1508 {
1509         cm_format_mad_hdr(&mra_msg->hdr, CM_MRA_ATTR_ID, cm_id_priv->tid);
1510         cm_mra_set_msg_mraed(mra_msg, msg_mraed);
1511         mra_msg->local_comm_id = cm_id_priv->id.local_id;
1512         mra_msg->remote_comm_id = cm_id_priv->id.remote_id;
1513         cm_mra_set_service_timeout(mra_msg, service_timeout);
1514
1515         if (private_data && private_data_len)
1516                 memcpy(mra_msg->private_data, private_data, private_data_len);
1517 }
1518
1519 static void cm_format_rej(struct cm_rej_msg *rej_msg,
1520                           struct cm_id_private *cm_id_priv,
1521                           enum ib_cm_rej_reason reason,
1522                           void *ari,
1523                           u8 ari_length,
1524                           const void *private_data,
1525                           u8 private_data_len)
1526 {
1527         cm_format_mad_hdr(&rej_msg->hdr, CM_REJ_ATTR_ID, cm_id_priv->tid);
1528         rej_msg->remote_comm_id = cm_id_priv->id.remote_id;
1529
1530         switch(cm_id_priv->id.state) {
1531         case IB_CM_REQ_RCVD:
1532                 rej_msg->local_comm_id = 0;
1533                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1534                 break;
1535         case IB_CM_MRA_REQ_SENT:
1536                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1537                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REQ);
1538                 break;
1539         case IB_CM_REP_RCVD:
1540         case IB_CM_MRA_REP_SENT:
1541                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1542                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_REP);
1543                 break;
1544         default:
1545                 rej_msg->local_comm_id = cm_id_priv->id.local_id;
1546                 cm_rej_set_msg_rejected(rej_msg, CM_MSG_RESPONSE_OTHER);
1547                 break;
1548         }
1549
1550         rej_msg->reason = cpu_to_be16(reason);
1551         if (ari && ari_length) {
1552                 cm_rej_set_reject_info_len(rej_msg, ari_length);
1553                 memcpy(rej_msg->ari, ari, ari_length);
1554         }
1555
1556         if (private_data && private_data_len)
1557                 memcpy(rej_msg->private_data, private_data, private_data_len);
1558 }
1559
1560 static void cm_dup_req_handler(struct cm_work *work,
1561                                struct cm_id_private *cm_id_priv)
1562 {
1563         struct ib_mad_send_buf *msg = NULL;
1564         int ret;
1565
1566         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1567                         counter[CM_REQ_COUNTER]);
1568
1569         /* Quick state check to discard duplicate REQs. */
1570         if (cm_id_priv->id.state == IB_CM_REQ_RCVD)
1571                 return;
1572
1573         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1574         if (ret)
1575                 return;
1576
1577         spin_lock_irq(&cm_id_priv->lock);
1578         switch (cm_id_priv->id.state) {
1579         case IB_CM_MRA_REQ_SENT:
1580                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1581                               CM_MSG_RESPONSE_REQ, cm_id_priv->service_timeout,
1582                               cm_id_priv->private_data,
1583                               cm_id_priv->private_data_len);
1584                 break;
1585         case IB_CM_TIMEWAIT:
1586                 cm_format_rej((struct cm_rej_msg *) msg->mad, cm_id_priv,
1587                               IB_CM_REJ_STALE_CONN, NULL, 0, NULL, 0);
1588                 break;
1589         default:
1590                 goto unlock;
1591         }
1592         spin_unlock_irq(&cm_id_priv->lock);
1593
1594         ret = ib_post_send_mad(msg, NULL);
1595         if (ret)
1596                 goto free;
1597         return;
1598
1599 unlock: spin_unlock_irq(&cm_id_priv->lock);
1600 free:   cm_free_msg(msg);
1601 }
1602
1603 static struct cm_id_private * cm_match_req(struct cm_work *work,
1604                                            struct cm_id_private *cm_id_priv)
1605 {
1606         struct cm_id_private *listen_cm_id_priv, *cur_cm_id_priv;
1607         struct cm_timewait_info *timewait_info;
1608         struct cm_req_msg *req_msg;
1609
1610         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1611
1612         /* Check for possible duplicate REQ. */
1613         spin_lock_irq(&cm.lock);
1614         timewait_info = cm_insert_remote_id(cm_id_priv->timewait_info);
1615         if (timewait_info) {
1616                 cur_cm_id_priv = cm_get_id(timewait_info->work.local_id,
1617                                            timewait_info->work.remote_id);
1618                 spin_unlock_irq(&cm.lock);
1619                 if (cur_cm_id_priv) {
1620                         cm_dup_req_handler(work, cur_cm_id_priv);
1621                         cm_deref_id(cur_cm_id_priv);
1622                 }
1623                 return NULL;
1624         }
1625
1626         /* Check for stale connections. */
1627         timewait_info = cm_insert_remote_qpn(cm_id_priv->timewait_info);
1628         if (timewait_info) {
1629                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1630                 spin_unlock_irq(&cm.lock);
1631                 cm_issue_rej(work->port, work->mad_recv_wc,
1632                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REQ,
1633                              NULL, 0);
1634                 return NULL;
1635         }
1636
1637         /* Find matching listen request. */
1638         listen_cm_id_priv = cm_find_listen(cm_id_priv->id.device,
1639                                            req_msg->service_id);
1640         if (!listen_cm_id_priv) {
1641                 cm_cleanup_timewait(cm_id_priv->timewait_info);
1642                 spin_unlock_irq(&cm.lock);
1643                 cm_issue_rej(work->port, work->mad_recv_wc,
1644                              IB_CM_REJ_INVALID_SERVICE_ID, CM_MSG_RESPONSE_REQ,
1645                              NULL, 0);
1646                 goto out;
1647         }
1648         atomic_inc(&listen_cm_id_priv->refcount);
1649         atomic_inc(&cm_id_priv->refcount);
1650         cm_id_priv->id.state = IB_CM_REQ_RCVD;
1651         atomic_inc(&cm_id_priv->work_count);
1652         spin_unlock_irq(&cm.lock);
1653 out:
1654         return listen_cm_id_priv;
1655 }
1656
1657 /*
1658  * Work-around for inter-subnet connections.  If the LIDs are permissive,
1659  * we need to override the LID/SL data in the REQ with the LID information
1660  * in the work completion.
1661  */
1662 static void cm_process_routed_req(struct cm_req_msg *req_msg, struct ib_wc *wc)
1663 {
1664         if (!cm_req_get_primary_subnet_local(req_msg)) {
1665                 if (req_msg->primary_local_lid == IB_LID_PERMISSIVE) {
1666                         req_msg->primary_local_lid = cpu_to_be16(wc->slid);
1667                         cm_req_set_primary_sl(req_msg, wc->sl);
1668                 }
1669
1670                 if (req_msg->primary_remote_lid == IB_LID_PERMISSIVE)
1671                         req_msg->primary_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1672         }
1673
1674         if (!cm_req_get_alt_subnet_local(req_msg)) {
1675                 if (req_msg->alt_local_lid == IB_LID_PERMISSIVE) {
1676                         req_msg->alt_local_lid = cpu_to_be16(wc->slid);
1677                         cm_req_set_alt_sl(req_msg, wc->sl);
1678                 }
1679
1680                 if (req_msg->alt_remote_lid == IB_LID_PERMISSIVE)
1681                         req_msg->alt_remote_lid = cpu_to_be16(wc->dlid_path_bits);
1682         }
1683 }
1684
1685 static int cm_req_handler(struct cm_work *work)
1686 {
1687         struct ib_cm_id *cm_id;
1688         struct cm_id_private *cm_id_priv, *listen_cm_id_priv;
1689         struct cm_req_msg *req_msg;
1690         union ib_gid gid;
1691         struct ib_gid_attr gid_attr;
1692         int ret;
1693
1694         req_msg = (struct cm_req_msg *)work->mad_recv_wc->recv_buf.mad;
1695
1696         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
1697         if (IS_ERR(cm_id))
1698                 return PTR_ERR(cm_id);
1699
1700         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1701         cm_id_priv->id.remote_id = req_msg->local_comm_id;
1702         ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
1703                                       work->mad_recv_wc->recv_buf.grh,
1704                                       &cm_id_priv->av);
1705         if (ret)
1706                 goto destroy;
1707         cm_id_priv->timewait_info = cm_create_timewait_info(cm_id_priv->
1708                                                             id.local_id);
1709         if (IS_ERR(cm_id_priv->timewait_info)) {
1710                 ret = PTR_ERR(cm_id_priv->timewait_info);
1711                 goto destroy;
1712         }
1713         cm_id_priv->timewait_info->work.remote_id = req_msg->local_comm_id;
1714         cm_id_priv->timewait_info->remote_ca_guid = req_msg->local_ca_guid;
1715         cm_id_priv->timewait_info->remote_qpn = cm_req_get_local_qpn(req_msg);
1716
1717         listen_cm_id_priv = cm_match_req(work, cm_id_priv);
1718         if (!listen_cm_id_priv) {
1719                 ret = -EINVAL;
1720                 kfree(cm_id_priv->timewait_info);
1721                 goto destroy;
1722         }
1723
1724         cm_id_priv->id.cm_handler = listen_cm_id_priv->id.cm_handler;
1725         cm_id_priv->id.context = listen_cm_id_priv->id.context;
1726         cm_id_priv->id.service_id = req_msg->service_id;
1727         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
1728
1729         cm_process_routed_req(req_msg, work->mad_recv_wc->wc);
1730         cm_format_paths_from_req(req_msg, &work->path[0], &work->path[1]);
1731
1732         memcpy(work->path[0].dmac, cm_id_priv->av.ah_attr.dmac, ETH_ALEN);
1733         work->path[0].hop_limit = cm_id_priv->av.ah_attr.grh.hop_limit;
1734         ret = ib_get_cached_gid(work->port->cm_dev->ib_device,
1735                                 work->port->port_num,
1736                                 cm_id_priv->av.ah_attr.grh.sgid_index,
1737                                 &gid, &gid_attr);
1738         if (!ret) {
1739                 if (gid_attr.ndev) {
1740                         work->path[0].ifindex = gid_attr.ndev->if_index;
1741                         work->path[0].net = dev_net(gid_attr.ndev);
1742                         dev_put(gid_attr.ndev);
1743                 }
1744                 work->path[0].gid_type = gid_attr.gid_type;
1745                 ret = cm_init_av_by_path(&work->path[0], &cm_id_priv->av,
1746                                          cm_id_priv);
1747         }
1748         if (ret) {
1749                 int err = ib_get_cached_gid(work->port->cm_dev->ib_device,
1750                                             work->port->port_num, 0,
1751                                             &work->path[0].sgid,
1752                                             &gid_attr);
1753                 if (!err && gid_attr.ndev) {
1754                         work->path[0].ifindex = gid_attr.ndev->if_index;
1755                         work->path[0].net = dev_net(gid_attr.ndev);
1756                         dev_put(gid_attr.ndev);
1757                 }
1758                 work->path[0].gid_type = gid_attr.gid_type;
1759                 ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_GID,
1760                                &work->path[0].sgid, sizeof work->path[0].sgid,
1761                                NULL, 0);
1762                 goto rejected;
1763         }
1764         if (req_msg->alt_local_lid) {
1765                 ret = cm_init_av_by_path(&work->path[1], &cm_id_priv->alt_av,
1766                                          cm_id_priv);
1767                 if (ret) {
1768                         ib_send_cm_rej(cm_id, IB_CM_REJ_INVALID_ALT_GID,
1769                                        &work->path[0].sgid,
1770                                        sizeof work->path[0].sgid, NULL, 0);
1771                         goto rejected;
1772                 }
1773         }
1774         cm_id_priv->tid = req_msg->hdr.tid;
1775         cm_id_priv->timeout_ms = cm_convert_to_ms(
1776                                         cm_req_get_local_resp_timeout(req_msg));
1777         cm_id_priv->max_cm_retries = cm_req_get_max_cm_retries(req_msg);
1778         cm_id_priv->remote_qpn = cm_req_get_local_qpn(req_msg);
1779         cm_id_priv->initiator_depth = cm_req_get_resp_res(req_msg);
1780         cm_id_priv->responder_resources = cm_req_get_init_depth(req_msg);
1781         cm_id_priv->path_mtu = cm_req_get_path_mtu(req_msg);
1782         cm_id_priv->pkey = req_msg->pkey;
1783         cm_id_priv->sq_psn = cm_req_get_starting_psn(req_msg);
1784         cm_id_priv->retry_count = cm_req_get_retry_count(req_msg);
1785         cm_id_priv->rnr_retry_count = cm_req_get_rnr_retry_count(req_msg);
1786         cm_id_priv->qp_type = cm_req_get_qp_type(req_msg);
1787
1788         cm_format_req_event(work, cm_id_priv, &listen_cm_id_priv->id);
1789         cm_process_work(cm_id_priv, work);
1790         cm_deref_id(listen_cm_id_priv);
1791         return 0;
1792
1793 rejected:
1794         atomic_dec(&cm_id_priv->refcount);
1795         cm_deref_id(listen_cm_id_priv);
1796 destroy:
1797         ib_destroy_cm_id(cm_id);
1798         return ret;
1799 }
1800
1801 static void cm_format_rep(struct cm_rep_msg *rep_msg,
1802                           struct cm_id_private *cm_id_priv,
1803                           struct ib_cm_rep_param *param)
1804 {
1805         cm_format_mad_hdr(&rep_msg->hdr, CM_REP_ATTR_ID, cm_id_priv->tid);
1806         rep_msg->local_comm_id = cm_id_priv->id.local_id;
1807         rep_msg->remote_comm_id = cm_id_priv->id.remote_id;
1808         cm_rep_set_starting_psn(rep_msg, cpu_to_be32(param->starting_psn));
1809         rep_msg->resp_resources = param->responder_resources;
1810         cm_rep_set_target_ack_delay(rep_msg,
1811                                     cm_id_priv->av.port->cm_dev->ack_delay);
1812         cm_rep_set_failover(rep_msg, param->failover_accepted);
1813         cm_rep_set_rnr_retry_count(rep_msg, param->rnr_retry_count);
1814         rep_msg->local_ca_guid = cm_id_priv->id.device->node_guid;
1815
1816         if (cm_id_priv->qp_type != IB_QPT_XRC_TGT) {
1817                 rep_msg->initiator_depth = param->initiator_depth;
1818                 cm_rep_set_flow_ctrl(rep_msg, param->flow_control);
1819                 cm_rep_set_srq(rep_msg, param->srq);
1820                 cm_rep_set_local_qpn(rep_msg, cpu_to_be32(param->qp_num));
1821         } else {
1822                 cm_rep_set_srq(rep_msg, 1);
1823                 cm_rep_set_local_eecn(rep_msg, cpu_to_be32(param->qp_num));
1824         }
1825
1826         if (param->private_data && param->private_data_len)
1827                 memcpy(rep_msg->private_data, param->private_data,
1828                        param->private_data_len);
1829 }
1830
1831 int ib_send_cm_rep(struct ib_cm_id *cm_id,
1832                    struct ib_cm_rep_param *param)
1833 {
1834         struct cm_id_private *cm_id_priv;
1835         struct ib_mad_send_buf *msg;
1836         struct cm_rep_msg *rep_msg;
1837         unsigned long flags;
1838         int ret;
1839
1840         if (param->private_data &&
1841             param->private_data_len > IB_CM_REP_PRIVATE_DATA_SIZE)
1842                 return -EINVAL;
1843
1844         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1845         spin_lock_irqsave(&cm_id_priv->lock, flags);
1846         if (cm_id->state != IB_CM_REQ_RCVD &&
1847             cm_id->state != IB_CM_MRA_REQ_SENT) {
1848                 ret = -EINVAL;
1849                 goto out;
1850         }
1851
1852         ret = cm_alloc_msg(cm_id_priv, &msg);
1853         if (ret)
1854                 goto out;
1855
1856         rep_msg = (struct cm_rep_msg *) msg->mad;
1857         cm_format_rep(rep_msg, cm_id_priv, param);
1858         msg->timeout_ms = cm_id_priv->timeout_ms;
1859         msg->context[1] = (void *) (unsigned long) IB_CM_REP_SENT;
1860
1861         ret = ib_post_send_mad(msg, NULL);
1862         if (ret) {
1863                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1864                 cm_free_msg(msg);
1865                 return ret;
1866         }
1867
1868         cm_id->state = IB_CM_REP_SENT;
1869         cm_id_priv->msg = msg;
1870         cm_id_priv->initiator_depth = param->initiator_depth;
1871         cm_id_priv->responder_resources = param->responder_resources;
1872         cm_id_priv->rq_psn = cm_rep_get_starting_psn(rep_msg);
1873         cm_id_priv->local_qpn = cpu_to_be32(param->qp_num & 0xFFFFFF);
1874
1875 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1876         return ret;
1877 }
1878 EXPORT_SYMBOL(ib_send_cm_rep);
1879
1880 static void cm_format_rtu(struct cm_rtu_msg *rtu_msg,
1881                           struct cm_id_private *cm_id_priv,
1882                           const void *private_data,
1883                           u8 private_data_len)
1884 {
1885         cm_format_mad_hdr(&rtu_msg->hdr, CM_RTU_ATTR_ID, cm_id_priv->tid);
1886         rtu_msg->local_comm_id = cm_id_priv->id.local_id;
1887         rtu_msg->remote_comm_id = cm_id_priv->id.remote_id;
1888
1889         if (private_data && private_data_len)
1890                 memcpy(rtu_msg->private_data, private_data, private_data_len);
1891 }
1892
1893 int ib_send_cm_rtu(struct ib_cm_id *cm_id,
1894                    const void *private_data,
1895                    u8 private_data_len)
1896 {
1897         struct cm_id_private *cm_id_priv;
1898         struct ib_mad_send_buf *msg;
1899         unsigned long flags;
1900         void *data;
1901         int ret;
1902
1903         if (private_data && private_data_len > IB_CM_RTU_PRIVATE_DATA_SIZE)
1904                 return -EINVAL;
1905
1906         data = cm_copy_private_data(private_data, private_data_len);
1907         if (IS_ERR(data))
1908                 return PTR_ERR(data);
1909
1910         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
1911         spin_lock_irqsave(&cm_id_priv->lock, flags);
1912         if (cm_id->state != IB_CM_REP_RCVD &&
1913             cm_id->state != IB_CM_MRA_REP_SENT) {
1914                 ret = -EINVAL;
1915                 goto error;
1916         }
1917
1918         ret = cm_alloc_msg(cm_id_priv, &msg);
1919         if (ret)
1920                 goto error;
1921
1922         cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1923                       private_data, private_data_len);
1924
1925         ret = ib_post_send_mad(msg, NULL);
1926         if (ret) {
1927                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1928                 cm_free_msg(msg);
1929                 kfree(data);
1930                 return ret;
1931         }
1932
1933         cm_id->state = IB_CM_ESTABLISHED;
1934         cm_set_private_data(cm_id_priv, data, private_data_len);
1935         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1936         return 0;
1937
1938 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
1939         kfree(data);
1940         return ret;
1941 }
1942 EXPORT_SYMBOL(ib_send_cm_rtu);
1943
1944 static void cm_format_rep_event(struct cm_work *work, enum ib_qp_type qp_type)
1945 {
1946         struct cm_rep_msg *rep_msg;
1947         struct ib_cm_rep_event_param *param;
1948
1949         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
1950         param = &work->cm_event.param.rep_rcvd;
1951         param->remote_ca_guid = rep_msg->local_ca_guid;
1952         param->remote_qkey = be32_to_cpu(rep_msg->local_qkey);
1953         param->remote_qpn = be32_to_cpu(cm_rep_get_qpn(rep_msg, qp_type));
1954         param->starting_psn = be32_to_cpu(cm_rep_get_starting_psn(rep_msg));
1955         param->responder_resources = rep_msg->initiator_depth;
1956         param->initiator_depth = rep_msg->resp_resources;
1957         param->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
1958         param->failover_accepted = cm_rep_get_failover(rep_msg);
1959         param->flow_control = cm_rep_get_flow_ctrl(rep_msg);
1960         param->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
1961         param->srq = cm_rep_get_srq(rep_msg);
1962         work->cm_event.private_data = &rep_msg->private_data;
1963 }
1964
1965 static void cm_dup_rep_handler(struct cm_work *work)
1966 {
1967         struct cm_id_private *cm_id_priv;
1968         struct cm_rep_msg *rep_msg;
1969         struct ib_mad_send_buf *msg = NULL;
1970         int ret;
1971
1972         rep_msg = (struct cm_rep_msg *) work->mad_recv_wc->recv_buf.mad;
1973         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id,
1974                                    rep_msg->local_comm_id);
1975         if (!cm_id_priv)
1976                 return;
1977
1978         atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
1979                         counter[CM_REP_COUNTER]);
1980         ret = cm_alloc_response_msg(work->port, work->mad_recv_wc, &msg);
1981         if (ret)
1982                 goto deref;
1983
1984         spin_lock_irq(&cm_id_priv->lock);
1985         if (cm_id_priv->id.state == IB_CM_ESTABLISHED)
1986                 cm_format_rtu((struct cm_rtu_msg *) msg->mad, cm_id_priv,
1987                               cm_id_priv->private_data,
1988                               cm_id_priv->private_data_len);
1989         else if (cm_id_priv->id.state == IB_CM_MRA_REP_SENT)
1990                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
1991                               CM_MSG_RESPONSE_REP, cm_id_priv->service_timeout,
1992                               cm_id_priv->private_data,
1993                               cm_id_priv->private_data_len);
1994         else
1995                 goto unlock;
1996         spin_unlock_irq(&cm_id_priv->lock);
1997
1998         ret = ib_post_send_mad(msg, NULL);
1999         if (ret)
2000                 goto free;
2001         goto deref;
2002
2003 unlock: spin_unlock_irq(&cm_id_priv->lock);
2004 free:   cm_free_msg(msg);
2005 deref:  cm_deref_id(cm_id_priv);
2006 }
2007
2008 static int cm_rep_handler(struct cm_work *work)
2009 {
2010         struct cm_id_private *cm_id_priv;
2011         struct cm_rep_msg *rep_msg;
2012         int ret;
2013
2014         rep_msg = (struct cm_rep_msg *)work->mad_recv_wc->recv_buf.mad;
2015         cm_id_priv = cm_acquire_id(rep_msg->remote_comm_id, 0);
2016         if (!cm_id_priv) {
2017                 cm_dup_rep_handler(work);
2018                 return -EINVAL;
2019         }
2020
2021         cm_format_rep_event(work, cm_id_priv->qp_type);
2022
2023         spin_lock_irq(&cm_id_priv->lock);
2024         switch (cm_id_priv->id.state) {
2025         case IB_CM_REQ_SENT:
2026         case IB_CM_MRA_REQ_RCVD:
2027                 break;
2028         default:
2029                 spin_unlock_irq(&cm_id_priv->lock);
2030                 ret = -EINVAL;
2031                 goto error;
2032         }
2033
2034         cm_id_priv->timewait_info->work.remote_id = rep_msg->local_comm_id;
2035         cm_id_priv->timewait_info->remote_ca_guid = rep_msg->local_ca_guid;
2036         cm_id_priv->timewait_info->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2037
2038         spin_lock(&cm.lock);
2039         /* Check for duplicate REP. */
2040         if (cm_insert_remote_id(cm_id_priv->timewait_info)) {
2041                 spin_unlock(&cm.lock);
2042                 spin_unlock_irq(&cm_id_priv->lock);
2043                 ret = -EINVAL;
2044                 goto error;
2045         }
2046         /* Check for a stale connection. */
2047         if (cm_insert_remote_qpn(cm_id_priv->timewait_info)) {
2048                 rb_erase(&cm_id_priv->timewait_info->remote_id_node,
2049                          &cm.remote_id_table);
2050                 cm_id_priv->timewait_info->inserted_remote_id = 0;
2051                 spin_unlock(&cm.lock);
2052                 spin_unlock_irq(&cm_id_priv->lock);
2053                 cm_issue_rej(work->port, work->mad_recv_wc,
2054                              IB_CM_REJ_STALE_CONN, CM_MSG_RESPONSE_REP,
2055                              NULL, 0);
2056                 ret = -EINVAL;
2057                 goto error;
2058         }
2059         spin_unlock(&cm.lock);
2060
2061         cm_id_priv->id.state = IB_CM_REP_RCVD;
2062         cm_id_priv->id.remote_id = rep_msg->local_comm_id;
2063         cm_id_priv->remote_qpn = cm_rep_get_qpn(rep_msg, cm_id_priv->qp_type);
2064         cm_id_priv->initiator_depth = rep_msg->resp_resources;
2065         cm_id_priv->responder_resources = rep_msg->initiator_depth;
2066         cm_id_priv->sq_psn = cm_rep_get_starting_psn(rep_msg);
2067         cm_id_priv->rnr_retry_count = cm_rep_get_rnr_retry_count(rep_msg);
2068         cm_id_priv->target_ack_delay = cm_rep_get_target_ack_delay(rep_msg);
2069         cm_id_priv->av.timeout =
2070                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2071                                        cm_id_priv->av.timeout - 1);
2072         cm_id_priv->alt_av.timeout =
2073                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2074                                        cm_id_priv->alt_av.timeout - 1);
2075
2076         /* todo: handle peer_to_peer */
2077
2078         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2079         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2080         if (!ret)
2081                 list_add_tail(&work->list, &cm_id_priv->work_list);
2082         spin_unlock_irq(&cm_id_priv->lock);
2083
2084         if (ret)
2085                 cm_process_work(cm_id_priv, work);
2086         else
2087                 cm_deref_id(cm_id_priv);
2088         return 0;
2089
2090 error:
2091         cm_deref_id(cm_id_priv);
2092         return ret;
2093 }
2094
2095 static int cm_establish_handler(struct cm_work *work)
2096 {
2097         struct cm_id_private *cm_id_priv;
2098         int ret;
2099
2100         /* See comment in cm_establish about lookup. */
2101         cm_id_priv = cm_acquire_id(work->local_id, work->remote_id);
2102         if (!cm_id_priv)
2103                 return -EINVAL;
2104
2105         spin_lock_irq(&cm_id_priv->lock);
2106         if (cm_id_priv->id.state != IB_CM_ESTABLISHED) {
2107                 spin_unlock_irq(&cm_id_priv->lock);
2108                 goto out;
2109         }
2110
2111         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2112         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2113         if (!ret)
2114                 list_add_tail(&work->list, &cm_id_priv->work_list);
2115         spin_unlock_irq(&cm_id_priv->lock);
2116
2117         if (ret)
2118                 cm_process_work(cm_id_priv, work);
2119         else
2120                 cm_deref_id(cm_id_priv);
2121         return 0;
2122 out:
2123         cm_deref_id(cm_id_priv);
2124         return -EINVAL;
2125 }
2126
2127 static int cm_rtu_handler(struct cm_work *work)
2128 {
2129         struct cm_id_private *cm_id_priv;
2130         struct cm_rtu_msg *rtu_msg;
2131         int ret;
2132
2133         rtu_msg = (struct cm_rtu_msg *)work->mad_recv_wc->recv_buf.mad;
2134         cm_id_priv = cm_acquire_id(rtu_msg->remote_comm_id,
2135                                    rtu_msg->local_comm_id);
2136         if (!cm_id_priv)
2137                 return -EINVAL;
2138
2139         work->cm_event.private_data = &rtu_msg->private_data;
2140
2141         spin_lock_irq(&cm_id_priv->lock);
2142         if (cm_id_priv->id.state != IB_CM_REP_SENT &&
2143             cm_id_priv->id.state != IB_CM_MRA_REP_RCVD) {
2144                 spin_unlock_irq(&cm_id_priv->lock);
2145                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2146                                 counter[CM_RTU_COUNTER]);
2147                 goto out;
2148         }
2149         cm_id_priv->id.state = IB_CM_ESTABLISHED;
2150
2151         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2152         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2153         if (!ret)
2154                 list_add_tail(&work->list, &cm_id_priv->work_list);
2155         spin_unlock_irq(&cm_id_priv->lock);
2156
2157         if (ret)
2158                 cm_process_work(cm_id_priv, work);
2159         else
2160                 cm_deref_id(cm_id_priv);
2161         return 0;
2162 out:
2163         cm_deref_id(cm_id_priv);
2164         return -EINVAL;
2165 }
2166
2167 static void cm_format_dreq(struct cm_dreq_msg *dreq_msg,
2168                           struct cm_id_private *cm_id_priv,
2169                           const void *private_data,
2170                           u8 private_data_len)
2171 {
2172         cm_format_mad_hdr(&dreq_msg->hdr, CM_DREQ_ATTR_ID,
2173                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_DREQ));
2174         dreq_msg->local_comm_id = cm_id_priv->id.local_id;
2175         dreq_msg->remote_comm_id = cm_id_priv->id.remote_id;
2176         cm_dreq_set_remote_qpn(dreq_msg, cm_id_priv->remote_qpn);
2177
2178         if (private_data && private_data_len)
2179                 memcpy(dreq_msg->private_data, private_data, private_data_len);
2180 }
2181
2182 int ib_send_cm_dreq(struct ib_cm_id *cm_id,
2183                     const void *private_data,
2184                     u8 private_data_len)
2185 {
2186         struct cm_id_private *cm_id_priv;
2187         struct ib_mad_send_buf *msg;
2188         unsigned long flags;
2189         int ret;
2190
2191         if (private_data && private_data_len > IB_CM_DREQ_PRIVATE_DATA_SIZE)
2192                 return -EINVAL;
2193
2194         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2195         spin_lock_irqsave(&cm_id_priv->lock, flags);
2196         if (cm_id->state != IB_CM_ESTABLISHED) {
2197                 ret = -EINVAL;
2198                 goto out;
2199         }
2200
2201         if (cm_id->lap_state == IB_CM_LAP_SENT ||
2202             cm_id->lap_state == IB_CM_MRA_LAP_RCVD)
2203                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2204
2205         ret = cm_alloc_msg(cm_id_priv, &msg);
2206         if (ret) {
2207                 cm_enter_timewait(cm_id_priv);
2208                 goto out;
2209         }
2210
2211         cm_format_dreq((struct cm_dreq_msg *) msg->mad, cm_id_priv,
2212                        private_data, private_data_len);
2213         msg->timeout_ms = cm_id_priv->timeout_ms;
2214         msg->context[1] = (void *) (unsigned long) IB_CM_DREQ_SENT;
2215
2216         ret = ib_post_send_mad(msg, NULL);
2217         if (ret) {
2218                 cm_enter_timewait(cm_id_priv);
2219                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2220                 cm_free_msg(msg);
2221                 return ret;
2222         }
2223
2224         cm_id->state = IB_CM_DREQ_SENT;
2225         cm_id_priv->msg = msg;
2226 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2227         return ret;
2228 }
2229 EXPORT_SYMBOL(ib_send_cm_dreq);
2230
2231 static void cm_format_drep(struct cm_drep_msg *drep_msg,
2232                           struct cm_id_private *cm_id_priv,
2233                           const void *private_data,
2234                           u8 private_data_len)
2235 {
2236         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, cm_id_priv->tid);
2237         drep_msg->local_comm_id = cm_id_priv->id.local_id;
2238         drep_msg->remote_comm_id = cm_id_priv->id.remote_id;
2239
2240         if (private_data && private_data_len)
2241                 memcpy(drep_msg->private_data, private_data, private_data_len);
2242 }
2243
2244 int ib_send_cm_drep(struct ib_cm_id *cm_id,
2245                     const void *private_data,
2246                     u8 private_data_len)
2247 {
2248         struct cm_id_private *cm_id_priv;
2249         struct ib_mad_send_buf *msg;
2250         unsigned long flags;
2251         void *data;
2252         int ret;
2253
2254         if (private_data && private_data_len > IB_CM_DREP_PRIVATE_DATA_SIZE)
2255                 return -EINVAL;
2256
2257         data = cm_copy_private_data(private_data, private_data_len);
2258         if (IS_ERR(data))
2259                 return PTR_ERR(data);
2260
2261         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2262         spin_lock_irqsave(&cm_id_priv->lock, flags);
2263         if (cm_id->state != IB_CM_DREQ_RCVD) {
2264                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2265                 kfree(data);
2266                 return -EINVAL;
2267         }
2268
2269         cm_set_private_data(cm_id_priv, data, private_data_len);
2270         cm_enter_timewait(cm_id_priv);
2271
2272         ret = cm_alloc_msg(cm_id_priv, &msg);
2273         if (ret)
2274                 goto out;
2275
2276         cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2277                        private_data, private_data_len);
2278
2279         ret = ib_post_send_mad(msg, NULL);
2280         if (ret) {
2281                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2282                 cm_free_msg(msg);
2283                 return ret;
2284         }
2285
2286 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2287         return ret;
2288 }
2289 EXPORT_SYMBOL(ib_send_cm_drep);
2290
2291 static int cm_issue_drep(struct cm_port *port,
2292                          struct ib_mad_recv_wc *mad_recv_wc)
2293 {
2294         struct ib_mad_send_buf *msg = NULL;
2295         struct cm_dreq_msg *dreq_msg;
2296         struct cm_drep_msg *drep_msg;
2297         int ret;
2298
2299         ret = cm_alloc_response_msg(port, mad_recv_wc, &msg);
2300         if (ret)
2301                 return ret;
2302
2303         dreq_msg = (struct cm_dreq_msg *) mad_recv_wc->recv_buf.mad;
2304         drep_msg = (struct cm_drep_msg *) msg->mad;
2305
2306         cm_format_mad_hdr(&drep_msg->hdr, CM_DREP_ATTR_ID, dreq_msg->hdr.tid);
2307         drep_msg->remote_comm_id = dreq_msg->local_comm_id;
2308         drep_msg->local_comm_id = dreq_msg->remote_comm_id;
2309
2310         ret = ib_post_send_mad(msg, NULL);
2311         if (ret)
2312                 cm_free_msg(msg);
2313
2314         return ret;
2315 }
2316
2317 static int cm_dreq_handler(struct cm_work *work)
2318 {
2319         struct cm_id_private *cm_id_priv;
2320         struct cm_dreq_msg *dreq_msg;
2321         struct ib_mad_send_buf *msg = NULL;
2322         int ret;
2323
2324         dreq_msg = (struct cm_dreq_msg *)work->mad_recv_wc->recv_buf.mad;
2325         cm_id_priv = cm_acquire_id(dreq_msg->remote_comm_id,
2326                                    dreq_msg->local_comm_id);
2327         if (!cm_id_priv) {
2328                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2329                                 counter[CM_DREQ_COUNTER]);
2330                 cm_issue_drep(work->port, work->mad_recv_wc);
2331                 return -EINVAL;
2332         }
2333
2334         work->cm_event.private_data = &dreq_msg->private_data;
2335
2336         spin_lock_irq(&cm_id_priv->lock);
2337         if (cm_id_priv->local_qpn != cm_dreq_get_remote_qpn(dreq_msg))
2338                 goto unlock;
2339
2340         switch (cm_id_priv->id.state) {
2341         case IB_CM_REP_SENT:
2342         case IB_CM_DREQ_SENT:
2343                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2344                 break;
2345         case IB_CM_ESTABLISHED:
2346                 if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT ||
2347                     cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2348                         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2349                 break;
2350         case IB_CM_MRA_REP_RCVD:
2351                 break;
2352         case IB_CM_TIMEWAIT:
2353                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2354                                 counter[CM_DREQ_COUNTER]);
2355                 msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
2356                 if (IS_ERR(msg))
2357                         goto unlock;
2358
2359                 cm_format_drep((struct cm_drep_msg *) msg->mad, cm_id_priv,
2360                                cm_id_priv->private_data,
2361                                cm_id_priv->private_data_len);
2362                 spin_unlock_irq(&cm_id_priv->lock);
2363
2364                 if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
2365                     ib_post_send_mad(msg, NULL))
2366                         cm_free_msg(msg);
2367                 goto deref;
2368         case IB_CM_DREQ_RCVD:
2369                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2370                                 counter[CM_DREQ_COUNTER]);
2371                 goto unlock;
2372         default:
2373                 goto unlock;
2374         }
2375         cm_id_priv->id.state = IB_CM_DREQ_RCVD;
2376         cm_id_priv->tid = dreq_msg->hdr.tid;
2377         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2378         if (!ret)
2379                 list_add_tail(&work->list, &cm_id_priv->work_list);
2380         spin_unlock_irq(&cm_id_priv->lock);
2381
2382         if (ret)
2383                 cm_process_work(cm_id_priv, work);
2384         else
2385                 cm_deref_id(cm_id_priv);
2386         return 0;
2387
2388 unlock: spin_unlock_irq(&cm_id_priv->lock);
2389 deref:  cm_deref_id(cm_id_priv);
2390         return -EINVAL;
2391 }
2392
2393 static int cm_drep_handler(struct cm_work *work)
2394 {
2395         struct cm_id_private *cm_id_priv;
2396         struct cm_drep_msg *drep_msg;
2397         int ret;
2398
2399         drep_msg = (struct cm_drep_msg *)work->mad_recv_wc->recv_buf.mad;
2400         cm_id_priv = cm_acquire_id(drep_msg->remote_comm_id,
2401                                    drep_msg->local_comm_id);
2402         if (!cm_id_priv)
2403                 return -EINVAL;
2404
2405         work->cm_event.private_data = &drep_msg->private_data;
2406
2407         spin_lock_irq(&cm_id_priv->lock);
2408         if (cm_id_priv->id.state != IB_CM_DREQ_SENT &&
2409             cm_id_priv->id.state != IB_CM_DREQ_RCVD) {
2410                 spin_unlock_irq(&cm_id_priv->lock);
2411                 goto out;
2412         }
2413         cm_enter_timewait(cm_id_priv);
2414
2415         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2416         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2417         if (!ret)
2418                 list_add_tail(&work->list, &cm_id_priv->work_list);
2419         spin_unlock_irq(&cm_id_priv->lock);
2420
2421         if (ret)
2422                 cm_process_work(cm_id_priv, work);
2423         else
2424                 cm_deref_id(cm_id_priv);
2425         return 0;
2426 out:
2427         cm_deref_id(cm_id_priv);
2428         return -EINVAL;
2429 }
2430
2431 int ib_send_cm_rej(struct ib_cm_id *cm_id,
2432                    enum ib_cm_rej_reason reason,
2433                    void *ari,
2434                    u8 ari_length,
2435                    const void *private_data,
2436                    u8 private_data_len)
2437 {
2438         struct cm_id_private *cm_id_priv;
2439         struct ib_mad_send_buf *msg;
2440         unsigned long flags;
2441         int ret;
2442
2443         if ((private_data && private_data_len > IB_CM_REJ_PRIVATE_DATA_SIZE) ||
2444             (ari && ari_length > IB_CM_REJ_ARI_LENGTH))
2445                 return -EINVAL;
2446
2447         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2448
2449         spin_lock_irqsave(&cm_id_priv->lock, flags);
2450         switch (cm_id->state) {
2451         case IB_CM_REQ_SENT:
2452         case IB_CM_MRA_REQ_RCVD:
2453         case IB_CM_REQ_RCVD:
2454         case IB_CM_MRA_REQ_SENT:
2455         case IB_CM_REP_RCVD:
2456         case IB_CM_MRA_REP_SENT:
2457                 ret = cm_alloc_msg(cm_id_priv, &msg);
2458                 if (!ret)
2459                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2460                                       cm_id_priv, reason, ari, ari_length,
2461                                       private_data, private_data_len);
2462
2463                 cm_reset_to_idle(cm_id_priv);
2464                 break;
2465         case IB_CM_REP_SENT:
2466         case IB_CM_MRA_REP_RCVD:
2467                 ret = cm_alloc_msg(cm_id_priv, &msg);
2468                 if (!ret)
2469                         cm_format_rej((struct cm_rej_msg *) msg->mad,
2470                                       cm_id_priv, reason, ari, ari_length,
2471                                       private_data, private_data_len);
2472
2473                 cm_enter_timewait(cm_id_priv);
2474                 break;
2475         default:
2476                 ret = -EINVAL;
2477                 goto out;
2478         }
2479
2480         if (ret)
2481                 goto out;
2482
2483         ret = ib_post_send_mad(msg, NULL);
2484         if (ret)
2485                 cm_free_msg(msg);
2486
2487 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2488         return ret;
2489 }
2490 EXPORT_SYMBOL(ib_send_cm_rej);
2491
2492 static void cm_format_rej_event(struct cm_work *work)
2493 {
2494         struct cm_rej_msg *rej_msg;
2495         struct ib_cm_rej_event_param *param;
2496
2497         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2498         param = &work->cm_event.param.rej_rcvd;
2499         param->ari = rej_msg->ari;
2500         param->ari_length = cm_rej_get_reject_info_len(rej_msg);
2501         param->reason = __be16_to_cpu(rej_msg->reason);
2502         work->cm_event.private_data = &rej_msg->private_data;
2503 }
2504
2505 static struct cm_id_private * cm_acquire_rejected_id(struct cm_rej_msg *rej_msg)
2506 {
2507         struct cm_timewait_info *timewait_info;
2508         struct cm_id_private *cm_id_priv;
2509         __be32 remote_id;
2510
2511         remote_id = rej_msg->local_comm_id;
2512
2513         if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_TIMEOUT) {
2514                 spin_lock_irq(&cm.lock);
2515                 timewait_info = cm_find_remote_id( *((__be64 *) rej_msg->ari),
2516                                                   remote_id);
2517                 if (!timewait_info) {
2518                         spin_unlock_irq(&cm.lock);
2519                         return NULL;
2520                 }
2521                 cm_id_priv = idr_find(&cm.local_id_table, (__force int)
2522                                       (timewait_info->work.local_id ^
2523                                        cm.random_id_operand));
2524                 if (cm_id_priv) {
2525                         if (cm_id_priv->id.remote_id == remote_id)
2526                                 atomic_inc(&cm_id_priv->refcount);
2527                         else
2528                                 cm_id_priv = NULL;
2529                 }
2530                 spin_unlock_irq(&cm.lock);
2531         } else if (cm_rej_get_msg_rejected(rej_msg) == CM_MSG_RESPONSE_REQ)
2532                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, 0);
2533         else
2534                 cm_id_priv = cm_acquire_id(rej_msg->remote_comm_id, remote_id);
2535
2536         return cm_id_priv;
2537 }
2538
2539 static int cm_rej_handler(struct cm_work *work)
2540 {
2541         struct cm_id_private *cm_id_priv;
2542         struct cm_rej_msg *rej_msg;
2543         int ret;
2544
2545         rej_msg = (struct cm_rej_msg *)work->mad_recv_wc->recv_buf.mad;
2546         cm_id_priv = cm_acquire_rejected_id(rej_msg);
2547         if (!cm_id_priv)
2548                 return -EINVAL;
2549
2550         cm_format_rej_event(work);
2551
2552         spin_lock_irq(&cm_id_priv->lock);
2553         switch (cm_id_priv->id.state) {
2554         case IB_CM_REQ_SENT:
2555         case IB_CM_MRA_REQ_RCVD:
2556         case IB_CM_REP_SENT:
2557         case IB_CM_MRA_REP_RCVD:
2558                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2559                 /* fall through */
2560         case IB_CM_REQ_RCVD:
2561         case IB_CM_MRA_REQ_SENT:
2562                 if (__be16_to_cpu(rej_msg->reason) == IB_CM_REJ_STALE_CONN)
2563                         cm_enter_timewait(cm_id_priv);
2564                 else
2565                         cm_reset_to_idle(cm_id_priv);
2566                 break;
2567         case IB_CM_DREQ_SENT:
2568                 ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
2569                 /* fall through */
2570         case IB_CM_REP_RCVD:
2571         case IB_CM_MRA_REP_SENT:
2572                 cm_enter_timewait(cm_id_priv);
2573                 break;
2574         case IB_CM_ESTABLISHED:
2575                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT ||
2576                     cm_id_priv->id.lap_state == IB_CM_LAP_SENT) {
2577                         if (cm_id_priv->id.lap_state == IB_CM_LAP_SENT)
2578                                 ib_cancel_mad(cm_id_priv->av.port->mad_agent,
2579                                               cm_id_priv->msg);
2580                         cm_enter_timewait(cm_id_priv);
2581                         break;
2582                 }
2583                 /* fall through */
2584         default:
2585                 spin_unlock_irq(&cm_id_priv->lock);
2586                 ret = -EINVAL;
2587                 goto out;
2588         }
2589
2590         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2591         if (!ret)
2592                 list_add_tail(&work->list, &cm_id_priv->work_list);
2593         spin_unlock_irq(&cm_id_priv->lock);
2594
2595         if (ret)
2596                 cm_process_work(cm_id_priv, work);
2597         else
2598                 cm_deref_id(cm_id_priv);
2599         return 0;
2600 out:
2601         cm_deref_id(cm_id_priv);
2602         return -EINVAL;
2603 }
2604
2605 int ib_send_cm_mra(struct ib_cm_id *cm_id,
2606                    u8 service_timeout,
2607                    const void *private_data,
2608                    u8 private_data_len)
2609 {
2610         struct cm_id_private *cm_id_priv;
2611         struct ib_mad_send_buf *msg;
2612         enum ib_cm_state cm_state;
2613         enum ib_cm_lap_state lap_state;
2614         enum cm_msg_response msg_response;
2615         void *data;
2616         unsigned long flags;
2617         int ret;
2618
2619         if (private_data && private_data_len > IB_CM_MRA_PRIVATE_DATA_SIZE)
2620                 return -EINVAL;
2621
2622         data = cm_copy_private_data(private_data, private_data_len);
2623         if (IS_ERR(data))
2624                 return PTR_ERR(data);
2625
2626         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2627
2628         spin_lock_irqsave(&cm_id_priv->lock, flags);
2629         switch(cm_id_priv->id.state) {
2630         case IB_CM_REQ_RCVD:
2631                 cm_state = IB_CM_MRA_REQ_SENT;
2632                 lap_state = cm_id->lap_state;
2633                 msg_response = CM_MSG_RESPONSE_REQ;
2634                 break;
2635         case IB_CM_REP_RCVD:
2636                 cm_state = IB_CM_MRA_REP_SENT;
2637                 lap_state = cm_id->lap_state;
2638                 msg_response = CM_MSG_RESPONSE_REP;
2639                 break;
2640         case IB_CM_ESTABLISHED:
2641                 if (cm_id->lap_state == IB_CM_LAP_RCVD) {
2642                         cm_state = cm_id->state;
2643                         lap_state = IB_CM_MRA_LAP_SENT;
2644                         msg_response = CM_MSG_RESPONSE_OTHER;
2645                         break;
2646                 }
2647         default:
2648                 ret = -EINVAL;
2649                 goto error1;
2650         }
2651
2652         if (!(service_timeout & IB_CM_MRA_FLAG_DELAY)) {
2653                 ret = cm_alloc_msg(cm_id_priv, &msg);
2654                 if (ret)
2655                         goto error1;
2656
2657                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2658                               msg_response, service_timeout,
2659                               private_data, private_data_len);
2660                 ret = ib_post_send_mad(msg, NULL);
2661                 if (ret)
2662                         goto error2;
2663         }
2664
2665         cm_id->state = cm_state;
2666         cm_id->lap_state = lap_state;
2667         cm_id_priv->service_timeout = service_timeout;
2668         cm_set_private_data(cm_id_priv, data, private_data_len);
2669         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2670         return 0;
2671
2672 error1: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2673         kfree(data);
2674         return ret;
2675
2676 error2: spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2677         kfree(data);
2678         cm_free_msg(msg);
2679         return ret;
2680 }
2681 EXPORT_SYMBOL(ib_send_cm_mra);
2682
2683 static struct cm_id_private * cm_acquire_mraed_id(struct cm_mra_msg *mra_msg)
2684 {
2685         switch (cm_mra_get_msg_mraed(mra_msg)) {
2686         case CM_MSG_RESPONSE_REQ:
2687                 return cm_acquire_id(mra_msg->remote_comm_id, 0);
2688         case CM_MSG_RESPONSE_REP:
2689         case CM_MSG_RESPONSE_OTHER:
2690                 return cm_acquire_id(mra_msg->remote_comm_id,
2691                                      mra_msg->local_comm_id);
2692         default:
2693                 return NULL;
2694         }
2695 }
2696
2697 static int cm_mra_handler(struct cm_work *work)
2698 {
2699         struct cm_id_private *cm_id_priv;
2700         struct cm_mra_msg *mra_msg;
2701         int timeout, ret;
2702
2703         mra_msg = (struct cm_mra_msg *)work->mad_recv_wc->recv_buf.mad;
2704         cm_id_priv = cm_acquire_mraed_id(mra_msg);
2705         if (!cm_id_priv)
2706                 return -EINVAL;
2707
2708         work->cm_event.private_data = &mra_msg->private_data;
2709         work->cm_event.param.mra_rcvd.service_timeout =
2710                                         cm_mra_get_service_timeout(mra_msg);
2711         timeout = cm_convert_to_ms(cm_mra_get_service_timeout(mra_msg)) +
2712                   cm_convert_to_ms(cm_id_priv->av.timeout);
2713
2714         spin_lock_irq(&cm_id_priv->lock);
2715         switch (cm_id_priv->id.state) {
2716         case IB_CM_REQ_SENT:
2717                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REQ ||
2718                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2719                                   cm_id_priv->msg, timeout))
2720                         goto out;
2721                 cm_id_priv->id.state = IB_CM_MRA_REQ_RCVD;
2722                 break;
2723         case IB_CM_REP_SENT:
2724                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_REP ||
2725                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2726                                   cm_id_priv->msg, timeout))
2727                         goto out;
2728                 cm_id_priv->id.state = IB_CM_MRA_REP_RCVD;
2729                 break;
2730         case IB_CM_ESTABLISHED:
2731                 if (cm_mra_get_msg_mraed(mra_msg) != CM_MSG_RESPONSE_OTHER ||
2732                     cm_id_priv->id.lap_state != IB_CM_LAP_SENT ||
2733                     ib_modify_mad(cm_id_priv->av.port->mad_agent,
2734                                   cm_id_priv->msg, timeout)) {
2735                         if (cm_id_priv->id.lap_state == IB_CM_MRA_LAP_RCVD)
2736                                 atomic_long_inc(&work->port->
2737                                                 counter_group[CM_RECV_DUPLICATES].
2738                                                 counter[CM_MRA_COUNTER]);
2739                         goto out;
2740                 }
2741                 cm_id_priv->id.lap_state = IB_CM_MRA_LAP_RCVD;
2742                 break;
2743         case IB_CM_MRA_REQ_RCVD:
2744         case IB_CM_MRA_REP_RCVD:
2745                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2746                                 counter[CM_MRA_COUNTER]);
2747                 /* fall through */
2748         default:
2749                 goto out;
2750         }
2751
2752         cm_id_priv->msg->context[1] = (void *) (unsigned long)
2753                                       cm_id_priv->id.state;
2754         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2755         if (!ret)
2756                 list_add_tail(&work->list, &cm_id_priv->work_list);
2757         spin_unlock_irq(&cm_id_priv->lock);
2758
2759         if (ret)
2760                 cm_process_work(cm_id_priv, work);
2761         else
2762                 cm_deref_id(cm_id_priv);
2763         return 0;
2764 out:
2765         spin_unlock_irq(&cm_id_priv->lock);
2766         cm_deref_id(cm_id_priv);
2767         return -EINVAL;
2768 }
2769
2770 static void cm_format_lap(struct cm_lap_msg *lap_msg,
2771                           struct cm_id_private *cm_id_priv,
2772                           struct ib_sa_path_rec *alternate_path,
2773                           const void *private_data,
2774                           u8 private_data_len)
2775 {
2776         cm_format_mad_hdr(&lap_msg->hdr, CM_LAP_ATTR_ID,
2777                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_LAP));
2778         lap_msg->local_comm_id = cm_id_priv->id.local_id;
2779         lap_msg->remote_comm_id = cm_id_priv->id.remote_id;
2780         cm_lap_set_remote_qpn(lap_msg, cm_id_priv->remote_qpn);
2781         /* todo: need remote CM response timeout */
2782         cm_lap_set_remote_resp_timeout(lap_msg, 0x1F);
2783         lap_msg->alt_local_lid = alternate_path->slid;
2784         lap_msg->alt_remote_lid = alternate_path->dlid;
2785         lap_msg->alt_local_gid = alternate_path->sgid;
2786         lap_msg->alt_remote_gid = alternate_path->dgid;
2787         cm_lap_set_flow_label(lap_msg, alternate_path->flow_label);
2788         cm_lap_set_traffic_class(lap_msg, alternate_path->traffic_class);
2789         lap_msg->alt_hop_limit = alternate_path->hop_limit;
2790         cm_lap_set_packet_rate(lap_msg, alternate_path->rate);
2791         cm_lap_set_sl(lap_msg, alternate_path->sl);
2792         cm_lap_set_subnet_local(lap_msg, 1); /* local only... */
2793         cm_lap_set_local_ack_timeout(lap_msg,
2794                 cm_ack_timeout(cm_id_priv->av.port->cm_dev->ack_delay,
2795                                alternate_path->packet_life_time));
2796
2797         if (private_data && private_data_len)
2798                 memcpy(lap_msg->private_data, private_data, private_data_len);
2799 }
2800
2801 int ib_send_cm_lap(struct ib_cm_id *cm_id,
2802                    struct ib_sa_path_rec *alternate_path,
2803                    const void *private_data,
2804                    u8 private_data_len)
2805 {
2806         struct cm_id_private *cm_id_priv;
2807         struct ib_mad_send_buf *msg;
2808         unsigned long flags;
2809         int ret;
2810
2811         if (private_data && private_data_len > IB_CM_LAP_PRIVATE_DATA_SIZE)
2812                 return -EINVAL;
2813
2814         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2815         spin_lock_irqsave(&cm_id_priv->lock, flags);
2816         if (cm_id->state != IB_CM_ESTABLISHED ||
2817             (cm_id->lap_state != IB_CM_LAP_UNINIT &&
2818              cm_id->lap_state != IB_CM_LAP_IDLE)) {
2819                 ret = -EINVAL;
2820                 goto out;
2821         }
2822
2823         ret = cm_init_av_by_path(alternate_path, &cm_id_priv->alt_av,
2824                                  cm_id_priv);
2825         if (ret)
2826                 goto out;
2827         cm_id_priv->alt_av.timeout =
2828                         cm_ack_timeout(cm_id_priv->target_ack_delay,
2829                                        cm_id_priv->alt_av.timeout - 1);
2830
2831         ret = cm_alloc_msg(cm_id_priv, &msg);
2832         if (ret)
2833                 goto out;
2834
2835         cm_format_lap((struct cm_lap_msg *) msg->mad, cm_id_priv,
2836                       alternate_path, private_data, private_data_len);
2837         msg->timeout_ms = cm_id_priv->timeout_ms;
2838         msg->context[1] = (void *) (unsigned long) IB_CM_ESTABLISHED;
2839
2840         ret = ib_post_send_mad(msg, NULL);
2841         if (ret) {
2842                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2843                 cm_free_msg(msg);
2844                 return ret;
2845         }
2846
2847         cm_id->lap_state = IB_CM_LAP_SENT;
2848         cm_id_priv->msg = msg;
2849
2850 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
2851         return ret;
2852 }
2853 EXPORT_SYMBOL(ib_send_cm_lap);
2854
2855 static void cm_format_path_from_lap(struct cm_id_private *cm_id_priv,
2856                                     struct ib_sa_path_rec *path,
2857                                     struct cm_lap_msg *lap_msg)
2858 {
2859         memset(path, 0, sizeof *path);
2860         path->dgid = lap_msg->alt_local_gid;
2861         path->sgid = lap_msg->alt_remote_gid;
2862         path->dlid = lap_msg->alt_local_lid;
2863         path->slid = lap_msg->alt_remote_lid;
2864         path->flow_label = cm_lap_get_flow_label(lap_msg);
2865         path->hop_limit = lap_msg->alt_hop_limit;
2866         path->traffic_class = cm_lap_get_traffic_class(lap_msg);
2867         path->reversible = 1;
2868         path->pkey = cm_id_priv->pkey;
2869         path->sl = cm_lap_get_sl(lap_msg);
2870         path->mtu_selector = IB_SA_EQ;
2871         path->mtu = cm_id_priv->path_mtu;
2872         path->rate_selector = IB_SA_EQ;
2873         path->rate = cm_lap_get_packet_rate(lap_msg);
2874         path->packet_life_time_selector = IB_SA_EQ;
2875         path->packet_life_time = cm_lap_get_local_ack_timeout(lap_msg);
2876         path->packet_life_time -= (path->packet_life_time > 0);
2877 }
2878
2879 static int cm_lap_handler(struct cm_work *work)
2880 {
2881         struct cm_id_private *cm_id_priv;
2882         struct cm_lap_msg *lap_msg;
2883         struct ib_cm_lap_event_param *param;
2884         struct ib_mad_send_buf *msg = NULL;
2885         int ret;
2886
2887         /* todo: verify LAP request and send reject APR if invalid. */
2888         lap_msg = (struct cm_lap_msg *)work->mad_recv_wc->recv_buf.mad;
2889         cm_id_priv = cm_acquire_id(lap_msg->remote_comm_id,
2890                                    lap_msg->local_comm_id);
2891         if (!cm_id_priv)
2892                 return -EINVAL;
2893
2894         param = &work->cm_event.param.lap_rcvd;
2895         param->alternate_path = &work->path[0];
2896         cm_format_path_from_lap(cm_id_priv, param->alternate_path, lap_msg);
2897         work->cm_event.private_data = &lap_msg->private_data;
2898
2899         spin_lock_irq(&cm_id_priv->lock);
2900         if (cm_id_priv->id.state != IB_CM_ESTABLISHED)
2901                 goto unlock;
2902
2903         switch (cm_id_priv->id.lap_state) {
2904         case IB_CM_LAP_UNINIT:
2905         case IB_CM_LAP_IDLE:
2906                 break;
2907         case IB_CM_MRA_LAP_SENT:
2908                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2909                                 counter[CM_LAP_COUNTER]);
2910                 msg = cm_alloc_response_msg_no_ah(work->port, work->mad_recv_wc);
2911                 if (IS_ERR(msg))
2912                         goto unlock;
2913
2914                 cm_format_mra((struct cm_mra_msg *) msg->mad, cm_id_priv,
2915                               CM_MSG_RESPONSE_OTHER,
2916                               cm_id_priv->service_timeout,
2917                               cm_id_priv->private_data,
2918                               cm_id_priv->private_data_len);
2919                 spin_unlock_irq(&cm_id_priv->lock);
2920
2921                 if (cm_create_response_msg_ah(work->port, work->mad_recv_wc, msg) ||
2922                     ib_post_send_mad(msg, NULL))
2923                         cm_free_msg(msg);
2924                 goto deref;
2925         case IB_CM_LAP_RCVD:
2926                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
2927                                 counter[CM_LAP_COUNTER]);
2928                 goto unlock;
2929         default:
2930                 goto unlock;
2931         }
2932
2933         cm_id_priv->id.lap_state = IB_CM_LAP_RCVD;
2934         cm_id_priv->tid = lap_msg->hdr.tid;
2935         ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
2936                                       work->mad_recv_wc->recv_buf.grh,
2937                                       &cm_id_priv->av);
2938         if (ret)
2939                 goto unlock;
2940         ret = cm_init_av_by_path(param->alternate_path, &cm_id_priv->alt_av,
2941                                  cm_id_priv);
2942         if (ret)
2943                 goto unlock;
2944         ret = atomic_inc_and_test(&cm_id_priv->work_count);
2945         if (!ret)
2946                 list_add_tail(&work->list, &cm_id_priv->work_list);
2947         spin_unlock_irq(&cm_id_priv->lock);
2948
2949         if (ret)
2950                 cm_process_work(cm_id_priv, work);
2951         else
2952                 cm_deref_id(cm_id_priv);
2953         return 0;
2954
2955 unlock: spin_unlock_irq(&cm_id_priv->lock);
2956 deref:  cm_deref_id(cm_id_priv);
2957         return -EINVAL;
2958 }
2959
2960 static void cm_format_apr(struct cm_apr_msg *apr_msg,
2961                           struct cm_id_private *cm_id_priv,
2962                           enum ib_cm_apr_status status,
2963                           void *info,
2964                           u8 info_length,
2965                           const void *private_data,
2966                           u8 private_data_len)
2967 {
2968         cm_format_mad_hdr(&apr_msg->hdr, CM_APR_ATTR_ID, cm_id_priv->tid);
2969         apr_msg->local_comm_id = cm_id_priv->id.local_id;
2970         apr_msg->remote_comm_id = cm_id_priv->id.remote_id;
2971         apr_msg->ap_status = (u8) status;
2972
2973         if (info && info_length) {
2974                 apr_msg->info_length = info_length;
2975                 memcpy(apr_msg->info, info, info_length);
2976         }
2977
2978         if (private_data && private_data_len)
2979                 memcpy(apr_msg->private_data, private_data, private_data_len);
2980 }
2981
2982 int ib_send_cm_apr(struct ib_cm_id *cm_id,
2983                    enum ib_cm_apr_status status,
2984                    void *info,
2985                    u8 info_length,
2986                    const void *private_data,
2987                    u8 private_data_len)
2988 {
2989         struct cm_id_private *cm_id_priv;
2990         struct ib_mad_send_buf *msg;
2991         unsigned long flags;
2992         int ret;
2993
2994         if ((private_data && private_data_len > IB_CM_APR_PRIVATE_DATA_SIZE) ||
2995             (info && info_length > IB_CM_APR_INFO_LENGTH))
2996                 return -EINVAL;
2997
2998         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
2999         spin_lock_irqsave(&cm_id_priv->lock, flags);
3000         if (cm_id->state != IB_CM_ESTABLISHED ||
3001             (cm_id->lap_state != IB_CM_LAP_RCVD &&
3002              cm_id->lap_state != IB_CM_MRA_LAP_SENT)) {
3003                 ret = -EINVAL;
3004                 goto out;
3005         }
3006
3007         ret = cm_alloc_msg(cm_id_priv, &msg);
3008         if (ret)
3009                 goto out;
3010
3011         cm_format_apr((struct cm_apr_msg *) msg->mad, cm_id_priv, status,
3012                       info, info_length, private_data, private_data_len);
3013         ret = ib_post_send_mad(msg, NULL);
3014         if (ret) {
3015                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3016                 cm_free_msg(msg);
3017                 return ret;
3018         }
3019
3020         cm_id->lap_state = IB_CM_LAP_IDLE;
3021 out:    spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3022         return ret;
3023 }
3024 EXPORT_SYMBOL(ib_send_cm_apr);
3025
3026 static int cm_apr_handler(struct cm_work *work)
3027 {
3028         struct cm_id_private *cm_id_priv;
3029         struct cm_apr_msg *apr_msg;
3030         int ret;
3031
3032         apr_msg = (struct cm_apr_msg *)work->mad_recv_wc->recv_buf.mad;
3033         cm_id_priv = cm_acquire_id(apr_msg->remote_comm_id,
3034                                    apr_msg->local_comm_id);
3035         if (!cm_id_priv)
3036                 return -EINVAL; /* Unmatched reply. */
3037
3038         work->cm_event.param.apr_rcvd.ap_status = apr_msg->ap_status;
3039         work->cm_event.param.apr_rcvd.apr_info = &apr_msg->info;
3040         work->cm_event.param.apr_rcvd.info_len = apr_msg->info_length;
3041         work->cm_event.private_data = &apr_msg->private_data;
3042
3043         spin_lock_irq(&cm_id_priv->lock);
3044         if (cm_id_priv->id.state != IB_CM_ESTABLISHED ||
3045             (cm_id_priv->id.lap_state != IB_CM_LAP_SENT &&
3046              cm_id_priv->id.lap_state != IB_CM_MRA_LAP_RCVD)) {
3047                 spin_unlock_irq(&cm_id_priv->lock);
3048                 goto out;
3049         }
3050         cm_id_priv->id.lap_state = IB_CM_LAP_IDLE;
3051         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3052         cm_id_priv->msg = NULL;
3053
3054         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3055         if (!ret)
3056                 list_add_tail(&work->list, &cm_id_priv->work_list);
3057         spin_unlock_irq(&cm_id_priv->lock);
3058
3059         if (ret)
3060                 cm_process_work(cm_id_priv, work);
3061         else
3062                 cm_deref_id(cm_id_priv);
3063         return 0;
3064 out:
3065         cm_deref_id(cm_id_priv);
3066         return -EINVAL;
3067 }
3068
3069 static int cm_timewait_handler(struct cm_work *work)
3070 {
3071         struct cm_timewait_info *timewait_info;
3072         struct cm_id_private *cm_id_priv;
3073         int ret;
3074
3075         timewait_info = (struct cm_timewait_info *)work;
3076         spin_lock_irq(&cm.lock);
3077         list_del(&timewait_info->list);
3078         spin_unlock_irq(&cm.lock);
3079
3080         cm_id_priv = cm_acquire_id(timewait_info->work.local_id,
3081                                    timewait_info->work.remote_id);
3082         if (!cm_id_priv)
3083                 return -EINVAL;
3084
3085         spin_lock_irq(&cm_id_priv->lock);
3086         if (cm_id_priv->id.state != IB_CM_TIMEWAIT ||
3087             cm_id_priv->remote_qpn != timewait_info->remote_qpn) {
3088                 spin_unlock_irq(&cm_id_priv->lock);
3089                 goto out;
3090         }
3091         cm_id_priv->id.state = IB_CM_IDLE;
3092         ret = atomic_inc_and_test(&cm_id_priv->work_count);
3093         if (!ret)
3094                 list_add_tail(&work->list, &cm_id_priv->work_list);
3095         spin_unlock_irq(&cm_id_priv->lock);
3096
3097         if (ret)
3098                 cm_process_work(cm_id_priv, work);
3099         else
3100                 cm_deref_id(cm_id_priv);
3101         return 0;
3102 out:
3103         cm_deref_id(cm_id_priv);
3104         return -EINVAL;
3105 }
3106
3107 static void cm_format_sidr_req(struct cm_sidr_req_msg *sidr_req_msg,
3108                                struct cm_id_private *cm_id_priv,
3109                                struct ib_cm_sidr_req_param *param)
3110 {
3111         cm_format_mad_hdr(&sidr_req_msg->hdr, CM_SIDR_REQ_ATTR_ID,
3112                           cm_form_tid(cm_id_priv, CM_MSG_SEQUENCE_SIDR));
3113         sidr_req_msg->request_id = cm_id_priv->id.local_id;
3114         sidr_req_msg->pkey = param->path->pkey;
3115         sidr_req_msg->service_id = param->service_id;
3116
3117         if (param->private_data && param->private_data_len)
3118                 memcpy(sidr_req_msg->private_data, param->private_data,
3119                        param->private_data_len);
3120 }
3121
3122 int ib_send_cm_sidr_req(struct ib_cm_id *cm_id,
3123                         struct ib_cm_sidr_req_param *param)
3124 {
3125         struct cm_id_private *cm_id_priv;
3126         struct ib_mad_send_buf *msg;
3127         unsigned long flags;
3128         int ret;
3129
3130         if (!param->path || (param->private_data &&
3131              param->private_data_len > IB_CM_SIDR_REQ_PRIVATE_DATA_SIZE))
3132                 return -EINVAL;
3133
3134         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3135         ret = cm_init_av_by_path(param->path, &cm_id_priv->av, cm_id_priv);
3136         if (ret)
3137                 goto out;
3138
3139         cm_id->service_id = param->service_id;
3140         cm_id->service_mask = ~cpu_to_be64(0);
3141         cm_id_priv->timeout_ms = param->timeout_ms;
3142         cm_id_priv->max_cm_retries = param->max_cm_retries;
3143         ret = cm_alloc_msg(cm_id_priv, &msg);
3144         if (ret)
3145                 goto out;
3146
3147         cm_format_sidr_req((struct cm_sidr_req_msg *) msg->mad, cm_id_priv,
3148                            param);
3149         msg->timeout_ms = cm_id_priv->timeout_ms;
3150         msg->context[1] = (void *) (unsigned long) IB_CM_SIDR_REQ_SENT;
3151
3152         spin_lock_irqsave(&cm_id_priv->lock, flags);
3153         if (cm_id->state == IB_CM_IDLE)
3154                 ret = ib_post_send_mad(msg, NULL);
3155         else
3156                 ret = -EINVAL;
3157
3158         if (ret) {
3159                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3160                 cm_free_msg(msg);
3161                 goto out;
3162         }
3163         cm_id->state = IB_CM_SIDR_REQ_SENT;
3164         cm_id_priv->msg = msg;
3165         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3166 out:
3167         return ret;
3168 }
3169 EXPORT_SYMBOL(ib_send_cm_sidr_req);
3170
3171 static void cm_format_sidr_req_event(struct cm_work *work,
3172                                      const struct cm_id_private *rx_cm_id,
3173                                      struct ib_cm_id *listen_id)
3174 {
3175         struct cm_sidr_req_msg *sidr_req_msg;
3176         struct ib_cm_sidr_req_event_param *param;
3177
3178         sidr_req_msg = (struct cm_sidr_req_msg *)
3179                                 work->mad_recv_wc->recv_buf.mad;
3180         param = &work->cm_event.param.sidr_req_rcvd;
3181         param->pkey = __be16_to_cpu(sidr_req_msg->pkey);
3182         param->listen_id = listen_id;
3183         param->service_id = sidr_req_msg->service_id;
3184         param->bth_pkey = cm_get_bth_pkey(work);
3185         param->port = work->port->port_num;
3186         param->sgid_index = rx_cm_id->av.ah_attr.grh.sgid_index;
3187         work->cm_event.private_data = &sidr_req_msg->private_data;
3188 }
3189
3190 static int cm_sidr_req_handler(struct cm_work *work)
3191 {
3192         struct ib_cm_id *cm_id;
3193         struct cm_id_private *cm_id_priv, *cur_cm_id_priv;
3194         struct cm_sidr_req_msg *sidr_req_msg;
3195         struct ib_wc *wc;
3196         int ret;
3197
3198         cm_id = ib_create_cm_id(work->port->cm_dev->ib_device, NULL, NULL);
3199         if (IS_ERR(cm_id))
3200                 return PTR_ERR(cm_id);
3201         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3202
3203         /* Record SGID/SLID and request ID for lookup. */
3204         sidr_req_msg = (struct cm_sidr_req_msg *)
3205                                 work->mad_recv_wc->recv_buf.mad;
3206         wc = work->mad_recv_wc->wc;
3207         cm_id_priv->av.dgid.global.subnet_prefix = cpu_to_be64(wc->slid);
3208         cm_id_priv->av.dgid.global.interface_id = 0;
3209         ret = cm_init_av_for_response(work->port, work->mad_recv_wc->wc,
3210                                       work->mad_recv_wc->recv_buf.grh,
3211                                       &cm_id_priv->av);
3212         if (ret)
3213                 goto out;
3214         cm_id_priv->id.remote_id = sidr_req_msg->request_id;
3215         cm_id_priv->tid = sidr_req_msg->hdr.tid;
3216         atomic_inc(&cm_id_priv->work_count);
3217
3218         spin_lock_irq(&cm.lock);
3219         cur_cm_id_priv = cm_insert_remote_sidr(cm_id_priv);
3220         if (cur_cm_id_priv) {
3221                 spin_unlock_irq(&cm.lock);
3222                 atomic_long_inc(&work->port->counter_group[CM_RECV_DUPLICATES].
3223                                 counter[CM_SIDR_REQ_COUNTER]);
3224                 goto out; /* Duplicate message. */
3225         }
3226         cm_id_priv->id.state = IB_CM_SIDR_REQ_RCVD;
3227         cur_cm_id_priv = cm_find_listen(cm_id->device,
3228                                         sidr_req_msg->service_id);
3229         if (!cur_cm_id_priv) {
3230                 spin_unlock_irq(&cm.lock);
3231                 cm_reject_sidr_req(cm_id_priv, IB_SIDR_UNSUPPORTED);
3232                 goto out; /* No match. */
3233         }
3234         atomic_inc(&cur_cm_id_priv->refcount);
3235         atomic_inc(&cm_id_priv->refcount);
3236         spin_unlock_irq(&cm.lock);
3237
3238         cm_id_priv->id.cm_handler = cur_cm_id_priv->id.cm_handler;
3239         cm_id_priv->id.context = cur_cm_id_priv->id.context;
3240         cm_id_priv->id.service_id = sidr_req_msg->service_id;
3241         cm_id_priv->id.service_mask = ~cpu_to_be64(0);
3242
3243         cm_format_sidr_req_event(work, cm_id_priv, &cur_cm_id_priv->id);
3244         cm_process_work(cm_id_priv, work);
3245         cm_deref_id(cur_cm_id_priv);
3246         return 0;
3247 out:
3248         ib_destroy_cm_id(&cm_id_priv->id);
3249         return -EINVAL;
3250 }
3251
3252 static void cm_format_sidr_rep(struct cm_sidr_rep_msg *sidr_rep_msg,
3253                                struct cm_id_private *cm_id_priv,
3254                                struct ib_cm_sidr_rep_param *param)
3255 {
3256         cm_format_mad_hdr(&sidr_rep_msg->hdr, CM_SIDR_REP_ATTR_ID,
3257                           cm_id_priv->tid);
3258         sidr_rep_msg->request_id = cm_id_priv->id.remote_id;
3259         sidr_rep_msg->status = param->status;
3260         cm_sidr_rep_set_qpn(sidr_rep_msg, cpu_to_be32(param->qp_num));
3261         sidr_rep_msg->service_id = cm_id_priv->id.service_id;
3262         sidr_rep_msg->qkey = cpu_to_be32(param->qkey);
3263
3264         if (param->info && param->info_length)
3265                 memcpy(sidr_rep_msg->info, param->info, param->info_length);
3266
3267         if (param->private_data && param->private_data_len)
3268                 memcpy(sidr_rep_msg->private_data, param->private_data,
3269                        param->private_data_len);
3270 }
3271
3272 int ib_send_cm_sidr_rep(struct ib_cm_id *cm_id,
3273                         struct ib_cm_sidr_rep_param *param)
3274 {
3275         struct cm_id_private *cm_id_priv;
3276         struct ib_mad_send_buf *msg;
3277         unsigned long flags;
3278         int ret;
3279
3280         if ((param->info && param->info_length > IB_CM_SIDR_REP_INFO_LENGTH) ||
3281             (param->private_data &&
3282              param->private_data_len > IB_CM_SIDR_REP_PRIVATE_DATA_SIZE))
3283                 return -EINVAL;
3284
3285         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3286         spin_lock_irqsave(&cm_id_priv->lock, flags);
3287         if (cm_id->state != IB_CM_SIDR_REQ_RCVD) {
3288                 ret = -EINVAL;
3289                 goto error;
3290         }
3291
3292         ret = cm_alloc_msg(cm_id_priv, &msg);
3293         if (ret)
3294                 goto error;
3295
3296         cm_format_sidr_rep((struct cm_sidr_rep_msg *) msg->mad, cm_id_priv,
3297                            param);
3298         ret = ib_post_send_mad(msg, NULL);
3299         if (ret) {
3300                 spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3301                 cm_free_msg(msg);
3302                 return ret;
3303         }
3304         cm_id->state = IB_CM_IDLE;
3305         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3306
3307         spin_lock_irqsave(&cm.lock, flags);
3308         if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
3309                 rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
3310                 RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
3311         }
3312         spin_unlock_irqrestore(&cm.lock, flags);
3313         return 0;
3314
3315 error:  spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3316         return ret;
3317 }
3318 EXPORT_SYMBOL(ib_send_cm_sidr_rep);
3319
3320 static void cm_format_sidr_rep_event(struct cm_work *work)
3321 {
3322         struct cm_sidr_rep_msg *sidr_rep_msg;
3323         struct ib_cm_sidr_rep_event_param *param;
3324
3325         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3326                                 work->mad_recv_wc->recv_buf.mad;
3327         param = &work->cm_event.param.sidr_rep_rcvd;
3328         param->status = sidr_rep_msg->status;
3329         param->qkey = be32_to_cpu(sidr_rep_msg->qkey);
3330         param->qpn = be32_to_cpu(cm_sidr_rep_get_qpn(sidr_rep_msg));
3331         param->info = &sidr_rep_msg->info;
3332         param->info_len = sidr_rep_msg->info_length;
3333         work->cm_event.private_data = &sidr_rep_msg->private_data;
3334 }
3335
3336 static int cm_sidr_rep_handler(struct cm_work *work)
3337 {
3338         struct cm_sidr_rep_msg *sidr_rep_msg;
3339         struct cm_id_private *cm_id_priv;
3340
3341         sidr_rep_msg = (struct cm_sidr_rep_msg *)
3342                                 work->mad_recv_wc->recv_buf.mad;
3343         cm_id_priv = cm_acquire_id(sidr_rep_msg->request_id, 0);
3344         if (!cm_id_priv)
3345                 return -EINVAL; /* Unmatched reply. */
3346
3347         spin_lock_irq(&cm_id_priv->lock);
3348         if (cm_id_priv->id.state != IB_CM_SIDR_REQ_SENT) {
3349                 spin_unlock_irq(&cm_id_priv->lock);
3350                 goto out;
3351         }
3352         cm_id_priv->id.state = IB_CM_IDLE;
3353         ib_cancel_mad(cm_id_priv->av.port->mad_agent, cm_id_priv->msg);
3354         spin_unlock_irq(&cm_id_priv->lock);
3355
3356         cm_format_sidr_rep_event(work);
3357         cm_process_work(cm_id_priv, work);
3358         return 0;
3359 out:
3360         cm_deref_id(cm_id_priv);
3361         return -EINVAL;
3362 }
3363
3364 static void cm_process_send_error(struct ib_mad_send_buf *msg,
3365                                   enum ib_wc_status wc_status)
3366 {
3367         struct cm_id_private *cm_id_priv;
3368         struct ib_cm_event cm_event;
3369         enum ib_cm_state state;
3370         int ret;
3371
3372         memset(&cm_event, 0, sizeof cm_event);
3373         cm_id_priv = msg->context[0];
3374
3375         /* Discard old sends or ones without a response. */
3376         spin_lock_irq(&cm_id_priv->lock);
3377         state = (enum ib_cm_state) (unsigned long) msg->context[1];
3378         if (msg != cm_id_priv->msg || state != cm_id_priv->id.state)
3379                 goto discard;
3380
3381         switch (state) {
3382         case IB_CM_REQ_SENT:
3383         case IB_CM_MRA_REQ_RCVD:
3384                 cm_reset_to_idle(cm_id_priv);
3385                 cm_event.event = IB_CM_REQ_ERROR;
3386                 break;
3387         case IB_CM_REP_SENT:
3388         case IB_CM_MRA_REP_RCVD:
3389                 cm_reset_to_idle(cm_id_priv);
3390                 cm_event.event = IB_CM_REP_ERROR;
3391                 break;
3392         case IB_CM_DREQ_SENT:
3393                 cm_enter_timewait(cm_id_priv);
3394                 cm_event.event = IB_CM_DREQ_ERROR;
3395                 break;
3396         case IB_CM_SIDR_REQ_SENT:
3397                 cm_id_priv->id.state = IB_CM_IDLE;
3398                 cm_event.event = IB_CM_SIDR_REQ_ERROR;
3399                 break;
3400         default:
3401                 goto discard;
3402         }
3403         spin_unlock_irq(&cm_id_priv->lock);
3404         cm_event.param.send_status = wc_status;
3405
3406         /* No other events can occur on the cm_id at this point. */
3407         ret = cm_id_priv->id.cm_handler(&cm_id_priv->id, &cm_event);
3408         cm_free_msg(msg);
3409         if (ret)
3410                 ib_destroy_cm_id(&cm_id_priv->id);
3411         return;
3412 discard:
3413         spin_unlock_irq(&cm_id_priv->lock);
3414         cm_free_msg(msg);
3415 }
3416
3417 static void cm_send_handler(struct ib_mad_agent *mad_agent,
3418                             struct ib_mad_send_wc *mad_send_wc)
3419 {
3420         struct ib_mad_send_buf *msg = mad_send_wc->send_buf;
3421         struct cm_port *port;
3422         u16 attr_index;
3423
3424         port = mad_agent->context;
3425         attr_index = be16_to_cpu(((struct ib_mad_hdr *)
3426                                   msg->mad)->attr_id) - CM_ATTR_ID_OFFSET;
3427
3428         /*
3429          * If the send was in response to a received message (context[0] is not
3430          * set to a cm_id), and is not a REJ, then it is a send that was
3431          * manually retried.
3432          */
3433         if (!msg->context[0] && (attr_index != CM_REJ_COUNTER))
3434                 msg->retries = 1;
3435
3436         atomic_long_add(1 + msg->retries,
3437                         &port->counter_group[CM_XMIT].counter[attr_index]);
3438         if (msg->retries)
3439                 atomic_long_add(msg->retries,
3440                                 &port->counter_group[CM_XMIT_RETRIES].
3441                                 counter[attr_index]);
3442
3443         switch (mad_send_wc->status) {
3444         case IB_WC_SUCCESS:
3445         case IB_WC_WR_FLUSH_ERR:
3446                 cm_free_msg(msg);
3447                 break;
3448         default:
3449                 if (msg->context[0] && msg->context[1])
3450                         cm_process_send_error(msg, mad_send_wc->status);
3451                 else
3452                         cm_free_msg(msg);
3453                 break;
3454         }
3455 }
3456
3457 static void cm_work_handler(struct work_struct *_work)
3458 {
3459         struct cm_work *work = container_of(_work, struct cm_work, work.work);
3460         int ret;
3461
3462         switch (work->cm_event.event) {
3463         case IB_CM_REQ_RECEIVED:
3464                 ret = cm_req_handler(work);
3465                 break;
3466         case IB_CM_MRA_RECEIVED:
3467                 ret = cm_mra_handler(work);
3468                 break;
3469         case IB_CM_REJ_RECEIVED:
3470                 ret = cm_rej_handler(work);
3471                 break;
3472         case IB_CM_REP_RECEIVED:
3473                 ret = cm_rep_handler(work);
3474                 break;
3475         case IB_CM_RTU_RECEIVED:
3476                 ret = cm_rtu_handler(work);
3477                 break;
3478         case IB_CM_USER_ESTABLISHED:
3479                 ret = cm_establish_handler(work);
3480                 break;
3481         case IB_CM_DREQ_RECEIVED:
3482                 ret = cm_dreq_handler(work);
3483                 break;
3484         case IB_CM_DREP_RECEIVED:
3485                 ret = cm_drep_handler(work);
3486                 break;
3487         case IB_CM_SIDR_REQ_RECEIVED:
3488                 ret = cm_sidr_req_handler(work);
3489                 break;
3490         case IB_CM_SIDR_REP_RECEIVED:
3491                 ret = cm_sidr_rep_handler(work);
3492                 break;
3493         case IB_CM_LAP_RECEIVED:
3494                 ret = cm_lap_handler(work);
3495                 break;
3496         case IB_CM_APR_RECEIVED:
3497                 ret = cm_apr_handler(work);
3498                 break;
3499         case IB_CM_TIMEWAIT_EXIT:
3500                 ret = cm_timewait_handler(work);
3501                 break;
3502         default:
3503                 ret = -EINVAL;
3504                 break;
3505         }
3506         if (ret)
3507                 cm_free_work(work);
3508 }
3509
3510 static int cm_establish(struct ib_cm_id *cm_id)
3511 {
3512         struct cm_id_private *cm_id_priv;
3513         struct cm_work *work;
3514         unsigned long flags;
3515         int ret = 0;
3516         struct cm_device *cm_dev;
3517
3518         cm_dev = ib_get_client_data(cm_id->device, &cm_client);
3519         if (!cm_dev)
3520                 return -ENODEV;
3521
3522         work = kmalloc(sizeof *work, GFP_ATOMIC);
3523         if (!work)
3524                 return -ENOMEM;
3525
3526         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3527         spin_lock_irqsave(&cm_id_priv->lock, flags);
3528         switch (cm_id->state)
3529         {
3530         case IB_CM_REP_SENT:
3531         case IB_CM_MRA_REP_RCVD:
3532                 cm_id->state = IB_CM_ESTABLISHED;
3533                 break;
3534         case IB_CM_ESTABLISHED:
3535                 ret = -EISCONN;
3536                 break;
3537         default:
3538                 ret = -EINVAL;
3539                 break;
3540         }
3541         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3542
3543         if (ret) {
3544                 kfree(work);
3545                 goto out;
3546         }
3547
3548         /*
3549          * The CM worker thread may try to destroy the cm_id before it
3550          * can execute this work item.  To prevent potential deadlock,
3551          * we need to find the cm_id once we're in the context of the
3552          * worker thread, rather than holding a reference on it.
3553          */
3554         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3555         work->local_id = cm_id->local_id;
3556         work->remote_id = cm_id->remote_id;
3557         work->mad_recv_wc = NULL;
3558         work->cm_event.event = IB_CM_USER_ESTABLISHED;
3559
3560         /* Check if the device started its remove_one */
3561         spin_lock_irqsave(&cm.lock, flags);
3562         if (!cm_dev->going_down) {
3563                 queue_delayed_work(cm.wq, &work->work, 0);
3564         } else {
3565                 kfree(work);
3566                 ret = -ENODEV;
3567         }
3568         spin_unlock_irqrestore(&cm.lock, flags);
3569
3570 out:
3571         return ret;
3572 }
3573
3574 static int cm_migrate(struct ib_cm_id *cm_id)
3575 {
3576         struct cm_id_private *cm_id_priv;
3577         struct cm_av tmp_av;
3578         unsigned long flags;
3579         int tmp_send_port_not_ready;
3580         int ret = 0;
3581
3582         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3583         spin_lock_irqsave(&cm_id_priv->lock, flags);
3584         if (cm_id->state == IB_CM_ESTABLISHED &&
3585             (cm_id->lap_state == IB_CM_LAP_UNINIT ||
3586              cm_id->lap_state == IB_CM_LAP_IDLE)) {
3587                 cm_id->lap_state = IB_CM_LAP_IDLE;
3588                 /* Swap address vector */
3589                 tmp_av = cm_id_priv->av;
3590                 cm_id_priv->av = cm_id_priv->alt_av;
3591                 cm_id_priv->alt_av = tmp_av;
3592                 /* Swap port send ready state */
3593                 tmp_send_port_not_ready = cm_id_priv->prim_send_port_not_ready;
3594                 cm_id_priv->prim_send_port_not_ready = cm_id_priv->altr_send_port_not_ready;
3595                 cm_id_priv->altr_send_port_not_ready = tmp_send_port_not_ready;
3596         } else
3597                 ret = -EINVAL;
3598         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3599
3600         return ret;
3601 }
3602
3603 int ib_cm_notify(struct ib_cm_id *cm_id, enum ib_event_type event)
3604 {
3605         int ret;
3606
3607         switch (event) {
3608         case IB_EVENT_COMM_EST:
3609                 ret = cm_establish(cm_id);
3610                 break;
3611         case IB_EVENT_PATH_MIG:
3612                 ret = cm_migrate(cm_id);
3613                 break;
3614         default:
3615                 ret = -EINVAL;
3616         }
3617         return ret;
3618 }
3619 EXPORT_SYMBOL(ib_cm_notify);
3620
3621 static void cm_recv_handler(struct ib_mad_agent *mad_agent,
3622                             struct ib_mad_send_buf *send_buf,
3623                             struct ib_mad_recv_wc *mad_recv_wc)
3624 {
3625         struct cm_port *port = mad_agent->context;
3626         struct cm_work *work;
3627         enum ib_cm_event_type event;
3628         u16 attr_id;
3629         int paths = 0;
3630         int going_down = 0;
3631
3632         switch (mad_recv_wc->recv_buf.mad->mad_hdr.attr_id) {
3633         case CM_REQ_ATTR_ID:
3634                 paths = 1 + (((struct cm_req_msg *) mad_recv_wc->recv_buf.mad)->
3635                                                     alt_local_lid != 0);
3636                 event = IB_CM_REQ_RECEIVED;
3637                 break;
3638         case CM_MRA_ATTR_ID:
3639                 event = IB_CM_MRA_RECEIVED;
3640                 break;
3641         case CM_REJ_ATTR_ID:
3642                 event = IB_CM_REJ_RECEIVED;
3643                 break;
3644         case CM_REP_ATTR_ID:
3645                 event = IB_CM_REP_RECEIVED;
3646                 break;
3647         case CM_RTU_ATTR_ID:
3648                 event = IB_CM_RTU_RECEIVED;
3649                 break;
3650         case CM_DREQ_ATTR_ID:
3651                 event = IB_CM_DREQ_RECEIVED;
3652                 break;
3653         case CM_DREP_ATTR_ID:
3654                 event = IB_CM_DREP_RECEIVED;
3655                 break;
3656         case CM_SIDR_REQ_ATTR_ID:
3657                 event = IB_CM_SIDR_REQ_RECEIVED;
3658                 break;
3659         case CM_SIDR_REP_ATTR_ID:
3660                 event = IB_CM_SIDR_REP_RECEIVED;
3661                 break;
3662         case CM_LAP_ATTR_ID:
3663                 paths = 1;
3664                 event = IB_CM_LAP_RECEIVED;
3665                 break;
3666         case CM_APR_ATTR_ID:
3667                 event = IB_CM_APR_RECEIVED;
3668                 break;
3669         default:
3670                 ib_free_recv_mad(mad_recv_wc);
3671                 return;
3672         }
3673
3674         attr_id = be16_to_cpu(mad_recv_wc->recv_buf.mad->mad_hdr.attr_id);
3675         atomic_long_inc(&port->counter_group[CM_RECV].
3676                         counter[attr_id - CM_ATTR_ID_OFFSET]);
3677
3678         work = kmalloc(sizeof *work + sizeof(struct ib_sa_path_rec) * paths,
3679                        GFP_KERNEL);
3680         if (!work) {
3681                 ib_free_recv_mad(mad_recv_wc);
3682                 return;
3683         }
3684
3685         INIT_DELAYED_WORK(&work->work, cm_work_handler);
3686         work->cm_event.event = event;
3687         work->mad_recv_wc = mad_recv_wc;
3688         work->port = port;
3689
3690         /* Check if the device started its remove_one */
3691         spin_lock_irq(&cm.lock);
3692         if (!port->cm_dev->going_down)
3693                 queue_delayed_work(cm.wq, &work->work, 0);
3694         else
3695                 going_down = 1;
3696         spin_unlock_irq(&cm.lock);
3697
3698         if (going_down) {
3699                 kfree(work);
3700                 ib_free_recv_mad(mad_recv_wc);
3701         }
3702 }
3703
3704 static int cm_init_qp_init_attr(struct cm_id_private *cm_id_priv,
3705                                 struct ib_qp_attr *qp_attr,
3706                                 int *qp_attr_mask)
3707 {
3708         unsigned long flags;
3709         int ret;
3710
3711         spin_lock_irqsave(&cm_id_priv->lock, flags);
3712         switch (cm_id_priv->id.state) {
3713         case IB_CM_REQ_SENT:
3714         case IB_CM_MRA_REQ_RCVD:
3715         case IB_CM_REQ_RCVD:
3716         case IB_CM_MRA_REQ_SENT:
3717         case IB_CM_REP_RCVD:
3718         case IB_CM_MRA_REP_SENT:
3719         case IB_CM_REP_SENT:
3720         case IB_CM_MRA_REP_RCVD:
3721         case IB_CM_ESTABLISHED:
3722                 *qp_attr_mask = IB_QP_STATE | IB_QP_ACCESS_FLAGS |
3723                                 IB_QP_PKEY_INDEX | IB_QP_PORT;
3724                 qp_attr->qp_access_flags = IB_ACCESS_REMOTE_WRITE;
3725                 if (cm_id_priv->responder_resources)
3726                         qp_attr->qp_access_flags |= IB_ACCESS_REMOTE_READ |
3727                                                     IB_ACCESS_REMOTE_ATOMIC;
3728                 qp_attr->pkey_index = cm_id_priv->av.pkey_index;
3729                 qp_attr->port_num = cm_id_priv->av.port->port_num;
3730                 ret = 0;
3731                 break;
3732         default:
3733                 ret = -EINVAL;
3734                 break;
3735         }
3736         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3737         return ret;
3738 }
3739
3740 static int cm_init_qp_rtr_attr(struct cm_id_private *cm_id_priv,
3741                                struct ib_qp_attr *qp_attr,
3742                                int *qp_attr_mask)
3743 {
3744         unsigned long flags;
3745         int ret;
3746
3747         spin_lock_irqsave(&cm_id_priv->lock, flags);
3748         switch (cm_id_priv->id.state) {
3749         case IB_CM_REQ_RCVD:
3750         case IB_CM_MRA_REQ_SENT:
3751         case IB_CM_REP_RCVD:
3752         case IB_CM_MRA_REP_SENT:
3753         case IB_CM_REP_SENT:
3754         case IB_CM_MRA_REP_RCVD:
3755         case IB_CM_ESTABLISHED:
3756                 *qp_attr_mask = IB_QP_STATE | IB_QP_AV | IB_QP_PATH_MTU |
3757                                 IB_QP_DEST_QPN | IB_QP_RQ_PSN;
3758                 qp_attr->ah_attr = cm_id_priv->av.ah_attr;
3759                 qp_attr->path_mtu = cm_id_priv->path_mtu;
3760                 qp_attr->dest_qp_num = be32_to_cpu(cm_id_priv->remote_qpn);
3761                 qp_attr->rq_psn = be32_to_cpu(cm_id_priv->rq_psn);
3762                 if (cm_id_priv->qp_type == IB_QPT_RC ||
3763                     cm_id_priv->qp_type == IB_QPT_XRC_TGT) {
3764                         *qp_attr_mask |= IB_QP_MAX_DEST_RD_ATOMIC |
3765                                          IB_QP_MIN_RNR_TIMER;
3766                         qp_attr->max_dest_rd_atomic =
3767                                         cm_id_priv->responder_resources;
3768                         qp_attr->min_rnr_timer = 0;
3769                 }
3770                 if (cm_id_priv->alt_av.ah_attr.dlid) {
3771                         *qp_attr_mask |= IB_QP_ALT_PATH;
3772                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3773                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3774                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3775                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3776                 }
3777                 ret = 0;
3778                 break;
3779         default:
3780                 ret = -EINVAL;
3781                 break;
3782         }
3783         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3784         return ret;
3785 }
3786
3787 static int cm_init_qp_rts_attr(struct cm_id_private *cm_id_priv,
3788                                struct ib_qp_attr *qp_attr,
3789                                int *qp_attr_mask)
3790 {
3791         unsigned long flags;
3792         int ret;
3793
3794         spin_lock_irqsave(&cm_id_priv->lock, flags);
3795         switch (cm_id_priv->id.state) {
3796         /* Allow transition to RTS before sending REP */
3797         case IB_CM_REQ_RCVD:
3798         case IB_CM_MRA_REQ_SENT:
3799
3800         case IB_CM_REP_RCVD:
3801         case IB_CM_MRA_REP_SENT:
3802         case IB_CM_REP_SENT:
3803         case IB_CM_MRA_REP_RCVD:
3804         case IB_CM_ESTABLISHED:
3805                 if (cm_id_priv->id.lap_state == IB_CM_LAP_UNINIT) {
3806                         *qp_attr_mask = IB_QP_STATE | IB_QP_SQ_PSN;
3807                         qp_attr->sq_psn = be32_to_cpu(cm_id_priv->sq_psn);
3808                         switch (cm_id_priv->qp_type) {
3809                         case IB_QPT_RC:
3810                         case IB_QPT_XRC_INI:
3811                                 *qp_attr_mask |= IB_QP_RETRY_CNT | IB_QP_RNR_RETRY |
3812                                                  IB_QP_MAX_QP_RD_ATOMIC;
3813                                 qp_attr->retry_cnt = cm_id_priv->retry_count;
3814                                 qp_attr->rnr_retry = cm_id_priv->rnr_retry_count;
3815                                 qp_attr->max_rd_atomic = cm_id_priv->initiator_depth;
3816                                 /* fall through */
3817                         case IB_QPT_XRC_TGT:
3818                                 *qp_attr_mask |= IB_QP_TIMEOUT;
3819                                 qp_attr->timeout = cm_id_priv->av.timeout;
3820                                 break;
3821                         default:
3822                                 break;
3823                         }
3824                         if (cm_id_priv->alt_av.ah_attr.dlid) {
3825                                 *qp_attr_mask |= IB_QP_PATH_MIG_STATE;
3826                                 qp_attr->path_mig_state = IB_MIG_REARM;
3827                         }
3828                 } else {
3829                         *qp_attr_mask = IB_QP_ALT_PATH | IB_QP_PATH_MIG_STATE;
3830                         qp_attr->alt_port_num = cm_id_priv->alt_av.port->port_num;
3831                         qp_attr->alt_pkey_index = cm_id_priv->alt_av.pkey_index;
3832                         qp_attr->alt_timeout = cm_id_priv->alt_av.timeout;
3833                         qp_attr->alt_ah_attr = cm_id_priv->alt_av.ah_attr;
3834                         qp_attr->path_mig_state = IB_MIG_REARM;
3835                 }
3836                 ret = 0;
3837                 break;
3838         default:
3839                 ret = -EINVAL;
3840                 break;
3841         }
3842         spin_unlock_irqrestore(&cm_id_priv->lock, flags);
3843         return ret;
3844 }
3845
3846 int ib_cm_init_qp_attr(struct ib_cm_id *cm_id,
3847                        struct ib_qp_attr *qp_attr,
3848                        int *qp_attr_mask)
3849 {
3850         struct cm_id_private *cm_id_priv;
3851         int ret;
3852
3853         cm_id_priv = container_of(cm_id, struct cm_id_private, id);
3854         switch (qp_attr->qp_state) {
3855         case IB_QPS_INIT:
3856                 ret = cm_init_qp_init_attr(cm_id_priv, qp_attr, qp_attr_mask);
3857                 break;
3858         case IB_QPS_RTR:
3859                 ret = cm_init_qp_rtr_attr(cm_id_priv, qp_attr, qp_attr_mask);
3860                 break;
3861         case IB_QPS_RTS:
3862                 ret = cm_init_qp_rts_attr(cm_id_priv, qp_attr, qp_attr_mask);
3863                 break;
3864         default:
3865                 ret = -EINVAL;
3866                 break;
3867         }
3868         return ret;
3869 }
3870 EXPORT_SYMBOL(ib_cm_init_qp_attr);
3871
3872 static ssize_t cm_show_counter(struct kobject *obj, struct attribute *attr,
3873                                char *buf)
3874 {
3875         struct cm_counter_group *group;
3876         struct cm_counter_attribute *cm_attr;
3877
3878         group = container_of(obj, struct cm_counter_group, obj);
3879         cm_attr = container_of(attr, struct cm_counter_attribute, attr);
3880
3881         return sprintf(buf, "%ld\n",
3882                        atomic_long_read(&group->counter[cm_attr->index]));
3883 }
3884
3885 static const struct sysfs_ops cm_counter_ops = {
3886         .show = cm_show_counter
3887 };
3888
3889 static struct kobj_type cm_counter_obj_type = {
3890         .sysfs_ops = &cm_counter_ops,
3891         .default_attrs = cm_counter_default_attrs
3892 };
3893
3894 static void cm_release_port_obj(struct kobject *obj)
3895 {
3896         struct cm_port *cm_port;
3897
3898         cm_port = container_of(obj, struct cm_port, port_obj);
3899         kfree(cm_port);
3900 }
3901
3902 static struct kobj_type cm_port_obj_type = {
3903         .release = cm_release_port_obj
3904 };
3905
3906 static char *cm_devnode(struct device *dev, umode_t *mode)
3907 {
3908         if (mode)
3909                 *mode = 0666;
3910         return kasprintf(GFP_KERNEL, "infiniband/%s", dev_name(dev));
3911 }
3912
3913 struct class cm_class = {
3914         .owner   = THIS_MODULE,
3915         .name    = "infiniband_cm",
3916         .devnode = cm_devnode,
3917 };
3918 EXPORT_SYMBOL(cm_class);
3919
3920 static int cm_create_port_fs(struct cm_port *port)
3921 {
3922         int i, ret;
3923
3924         ret = kobject_init_and_add(&port->port_obj, &cm_port_obj_type,
3925                                    &port->cm_dev->device->kobj,
3926                                    "%d", port->port_num);
3927         if (ret) {
3928                 kfree(port);
3929                 return ret;
3930         }
3931
3932         for (i = 0; i < CM_COUNTER_GROUPS; i++) {
3933                 ret = kobject_init_and_add(&port->counter_group[i].obj,
3934                                            &cm_counter_obj_type,
3935                                            &port->port_obj,
3936                                            "%s", counter_group_names[i]);
3937                 if (ret)
3938                         goto error;
3939         }
3940
3941         return 0;
3942
3943 error:
3944         while (i--)
3945                 kobject_put(&port->counter_group[i].obj);
3946         kobject_put(&port->port_obj);
3947         return ret;
3948
3949 }
3950
3951 static void cm_remove_port_fs(struct cm_port *port)
3952 {
3953         int i;
3954
3955         for (i = 0; i < CM_COUNTER_GROUPS; i++)
3956                 kobject_put(&port->counter_group[i].obj);
3957
3958         kobject_put(&port->port_obj);
3959 }
3960
3961 static void cm_add_one(struct ib_device *ib_device)
3962 {
3963         struct cm_device *cm_dev;
3964         struct cm_port *port;
3965         struct ib_mad_reg_req reg_req = {
3966                 .mgmt_class = IB_MGMT_CLASS_CM,
3967                 .mgmt_class_version = IB_CM_CLASS_VERSION,
3968         };
3969         struct ib_port_modify port_modify = {
3970                 .set_port_cap_mask = IB_PORT_CM_SUP
3971         };
3972         unsigned long flags;
3973         int ret;
3974         int count = 0;
3975         u8 i;
3976
3977         cm_dev = kzalloc(sizeof(*cm_dev) + sizeof(*port) *
3978                          ib_device->phys_port_cnt, GFP_KERNEL);
3979         if (!cm_dev)
3980                 return;
3981
3982         cm_dev->ib_device = ib_device;
3983         cm_dev->ack_delay = ib_device->attrs.local_ca_ack_delay;
3984         cm_dev->going_down = 0;
3985         cm_dev->device = device_create(&cm_class, &ib_device->dev,
3986                                        MKDEV(0, 0), NULL,
3987                                        "%s", ib_device->name);
3988         if (IS_ERR(cm_dev->device)) {
3989                 kfree(cm_dev);
3990                 return;
3991         }
3992
3993         set_bit(IB_MGMT_METHOD_SEND, reg_req.method_mask);
3994         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
3995                 if (!rdma_cap_ib_cm(ib_device, i))
3996                         continue;
3997
3998                 port = kzalloc(sizeof *port, GFP_KERNEL);
3999                 if (!port)
4000                         goto error1;
4001
4002                 cm_dev->port[i-1] = port;
4003                 port->cm_dev = cm_dev;
4004                 port->port_num = i;
4005
4006                 INIT_LIST_HEAD(&port->cm_priv_prim_list);
4007                 INIT_LIST_HEAD(&port->cm_priv_altr_list);
4008
4009                 ret = cm_create_port_fs(port);
4010                 if (ret)
4011                         goto error1;
4012
4013                 port->mad_agent = ib_register_mad_agent(ib_device, i,
4014                                                         IB_QPT_GSI,
4015                                                         &reg_req,
4016                                                         0,
4017                                                         cm_send_handler,
4018                                                         cm_recv_handler,
4019                                                         port,
4020                                                         0);
4021                 if (IS_ERR(port->mad_agent))
4022                         goto error2;
4023
4024                 ret = ib_modify_port(ib_device, i, 0, &port_modify);
4025                 if (ret)
4026                         goto error3;
4027
4028                 count++;
4029         }
4030
4031         if (!count)
4032                 goto free;
4033
4034         ib_set_client_data(ib_device, &cm_client, cm_dev);
4035
4036         write_lock_irqsave(&cm.device_lock, flags);
4037         list_add_tail(&cm_dev->list, &cm.device_list);
4038         write_unlock_irqrestore(&cm.device_lock, flags);
4039         return;
4040
4041 error3:
4042         ib_unregister_mad_agent(port->mad_agent);
4043 error2:
4044         cm_remove_port_fs(port);
4045 error1:
4046         port_modify.set_port_cap_mask = 0;
4047         port_modify.clr_port_cap_mask = IB_PORT_CM_SUP;
4048         while (--i) {
4049                 if (!rdma_cap_ib_cm(ib_device, i))
4050                         continue;
4051
4052                 port = cm_dev->port[i-1];
4053                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4054                 ib_unregister_mad_agent(port->mad_agent);
4055                 cm_remove_port_fs(port);
4056         }
4057 free:
4058         device_unregister(cm_dev->device);
4059         kfree(cm_dev);
4060 }
4061
4062 static void cm_remove_one(struct ib_device *ib_device, void *client_data)
4063 {
4064         struct cm_device *cm_dev = client_data;
4065         struct cm_port *port;
4066         struct cm_id_private *cm_id_priv;
4067         struct ib_mad_agent *cur_mad_agent;
4068         struct ib_port_modify port_modify = {
4069                 .clr_port_cap_mask = IB_PORT_CM_SUP
4070         };
4071         unsigned long flags;
4072         int i;
4073
4074         if (!cm_dev)
4075                 return;
4076
4077         write_lock_irqsave(&cm.device_lock, flags);
4078         list_del(&cm_dev->list);
4079         write_unlock_irqrestore(&cm.device_lock, flags);
4080
4081         spin_lock_irq(&cm.lock);
4082         cm_dev->going_down = 1;
4083         spin_unlock_irq(&cm.lock);
4084
4085         for (i = 1; i <= ib_device->phys_port_cnt; i++) {
4086                 if (!rdma_cap_ib_cm(ib_device, i))
4087                         continue;
4088
4089                 port = cm_dev->port[i-1];
4090                 ib_modify_port(ib_device, port->port_num, 0, &port_modify);
4091                 /* Mark all the cm_id's as not valid */
4092                 spin_lock_irq(&cm.lock);
4093                 list_for_each_entry(cm_id_priv, &port->cm_priv_altr_list, altr_list)
4094                         cm_id_priv->altr_send_port_not_ready = 1;
4095                 list_for_each_entry(cm_id_priv, &port->cm_priv_prim_list, prim_list)
4096                         cm_id_priv->prim_send_port_not_ready = 1;
4097                 spin_unlock_irq(&cm.lock);
4098                 /*
4099                  * We flush the queue here after the going_down set, this
4100                  * verify that no new works will be queued in the recv handler,
4101                  * after that we can call the unregister_mad_agent
4102                  */
4103                 flush_workqueue(cm.wq);
4104                 spin_lock_irq(&cm.state_lock);
4105                 cur_mad_agent = port->mad_agent;
4106                 port->mad_agent = NULL;
4107                 spin_unlock_irq(&cm.state_lock);
4108                 ib_unregister_mad_agent(cur_mad_agent);
4109                 cm_remove_port_fs(port);
4110         }
4111
4112         device_unregister(cm_dev->device);
4113         kfree(cm_dev);
4114 }
4115
4116 static int __init ib_cm_init(void)
4117 {
4118         int ret;
4119
4120         memset(&cm, 0, sizeof cm);
4121         INIT_LIST_HEAD(&cm.device_list);
4122         rwlock_init(&cm.device_lock);
4123         spin_lock_init(&cm.lock);
4124         spin_lock_init(&cm.state_lock);
4125         cm.listen_service_table = RB_ROOT;
4126         cm.listen_service_id = be64_to_cpu(IB_CM_ASSIGN_SERVICE_ID);
4127         cm.remote_id_table = RB_ROOT;
4128         cm.remote_qp_table = RB_ROOT;
4129         cm.remote_sidr_table = RB_ROOT;
4130         idr_init(&cm.local_id_table);
4131         get_random_bytes(&cm.random_id_operand, sizeof cm.random_id_operand);
4132         INIT_LIST_HEAD(&cm.timewait_list);
4133
4134         ret = class_register(&cm_class);
4135         if (ret) {
4136                 ret = -ENOMEM;
4137                 goto error1;
4138         }
4139
4140         cm.wq = create_workqueue("ib_cm");
4141         if (!cm.wq) {
4142                 ret = -ENOMEM;
4143                 goto error2;
4144         }
4145
4146         ret = ib_register_client(&cm_client);
4147         if (ret)
4148                 goto error3;
4149
4150         return 0;
4151 error3:
4152         destroy_workqueue(cm.wq);
4153 error2:
4154         class_unregister(&cm_class);
4155 error1:
4156         idr_destroy(&cm.local_id_table);
4157         return ret;
4158 }
4159
4160 static void __exit ib_cm_cleanup(void)
4161 {
4162         struct cm_timewait_info *timewait_info, *tmp;
4163
4164         spin_lock_irq(&cm.lock);
4165         list_for_each_entry(timewait_info, &cm.timewait_list, list)
4166                 cancel_delayed_work(&timewait_info->work.work);
4167         spin_unlock_irq(&cm.lock);
4168
4169         ib_unregister_client(&cm_client);
4170         destroy_workqueue(cm.wq);
4171
4172         list_for_each_entry_safe(timewait_info, tmp, &cm.timewait_list, list) {
4173                 cancel_delayed_work_sync(&timewait_info->work.work);
4174                 list_del(&timewait_info->list);
4175                 kfree(timewait_info);
4176         }
4177
4178         class_unregister(&cm_class);
4179         idr_destroy(&cm.local_id_table);
4180 }
4181
4182 module_init_order(ib_cm_init, SI_ORDER_SECOND);
4183 module_exit_order(ib_cm_cleanup, SI_ORDER_SECOND);
4184