]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_multicast.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / sys / ofed / drivers / infiniband / ulp / ipoib / ipoib_multicast.c
1 /*
2  * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3  * Copyright (c) 2005 Sun Microsystems, Inc. All rights reserved.
4  * Copyright (c) 2004 Voltaire, Inc. All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34
35 #include "ipoib.h"
36
37 #include <linux/delay.h>
38 #include <linux/completion.h>
39
40 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
41 static int mcast_debug_level = 1;
42
43 module_param(mcast_debug_level, int, 0644);
44 MODULE_PARM_DESC(mcast_debug_level,
45                  "Enable multicast debug tracing if > 0");
46 #endif
47
48 static DEFINE_MUTEX(mcast_mutex);
49
50 struct ipoib_mcast_iter {
51         struct ipoib_dev_priv *priv;
52         union ib_gid       mgid;
53         unsigned long      created;
54         unsigned int       queuelen;
55         unsigned int       complete;
56         unsigned int       send_only;
57 };
58
59 static void ipoib_mcast_free(struct ipoib_mcast *mcast)
60 {
61         struct ifnet *dev = mcast->priv->dev;
62         int tx_dropped = 0;
63
64         ipoib_dbg_mcast(mcast->priv, "deleting multicast group %16D\n",
65                         mcast->mcmember.mgid.raw, ":");
66
67         if (mcast->ah)
68                 ipoib_put_ah(mcast->ah);
69
70         tx_dropped = mcast->pkt_queue.ifq_len;
71         _IF_DRAIN(&mcast->pkt_queue);   /* XXX Locking. */
72
73         dev->if_oerrors += tx_dropped;
74
75         kfree(mcast);
76 }
77
78 static struct ipoib_mcast *ipoib_mcast_alloc(struct ipoib_dev_priv *priv,
79                                              int can_sleep)
80 {
81         struct ipoib_mcast *mcast;
82
83         mcast = kzalloc(sizeof *mcast, can_sleep ? GFP_KERNEL : GFP_ATOMIC);
84         if (!mcast)
85                 return NULL;
86
87         mcast->priv = priv;
88         mcast->created = jiffies;
89         mcast->backoff = 1;
90
91         INIT_LIST_HEAD(&mcast->list);
92         bzero(&mcast->pkt_queue, sizeof(mcast->pkt_queue));
93
94         return mcast;
95 }
96
97 static struct ipoib_mcast *__ipoib_mcast_find(struct ipoib_dev_priv *priv,
98     void *mgid)
99 {
100         struct rb_node *n = priv->multicast_tree.rb_node;
101
102         while (n) {
103                 struct ipoib_mcast *mcast;
104                 int ret;
105
106                 mcast = rb_entry(n, struct ipoib_mcast, rb_node);
107
108                 ret = memcmp(mgid, mcast->mcmember.mgid.raw,
109                              sizeof (union ib_gid));
110                 if (ret < 0)
111                         n = n->rb_left;
112                 else if (ret > 0)
113                         n = n->rb_right;
114                 else
115                         return mcast;
116         }
117
118         return NULL;
119 }
120
121 static int __ipoib_mcast_add(struct ipoib_dev_priv *priv,
122     struct ipoib_mcast *mcast)
123 {
124         struct rb_node **n = &priv->multicast_tree.rb_node, *pn = NULL;
125
126         while (*n) {
127                 struct ipoib_mcast *tmcast;
128                 int ret;
129
130                 pn = *n;
131                 tmcast = rb_entry(pn, struct ipoib_mcast, rb_node);
132
133                 ret = memcmp(mcast->mcmember.mgid.raw, tmcast->mcmember.mgid.raw,
134                              sizeof (union ib_gid));
135                 if (ret < 0)
136                         n = &pn->rb_left;
137                 else if (ret > 0)
138                         n = &pn->rb_right;
139                 else
140                         return -EEXIST;
141         }
142
143         rb_link_node(&mcast->rb_node, pn, n);
144         rb_insert_color(&mcast->rb_node, &priv->multicast_tree);
145
146         return 0;
147 }
148
149 static int ipoib_mcast_join_finish(struct ipoib_mcast *mcast,
150                                    struct ib_sa_mcmember_rec *mcmember)
151 {
152         struct ipoib_dev_priv *priv = mcast->priv;
153         struct ifnet *dev = priv->dev;
154         struct ipoib_ah *ah;
155         int ret;
156         int set_qkey = 0;
157
158         mcast->mcmember = *mcmember;
159
160         /* Set the cached Q_Key before we attach if it's the broadcast group */
161         if (!memcmp(mcast->mcmember.mgid.raw, dev->if_broadcastaddr + 4,
162                     sizeof (union ib_gid))) {
163                 spin_lock_irq(&priv->lock);
164                 if (!priv->broadcast) {
165                         spin_unlock_irq(&priv->lock);
166                         return -EAGAIN;
167                 }
168                 priv->qkey = be32_to_cpu(priv->broadcast->mcmember.qkey);
169                 spin_unlock_irq(&priv->lock);
170                 priv->tx_wr.wr.ud.remote_qkey = priv->qkey;
171                 set_qkey = 1;
172         }
173
174         if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
175                 if (test_and_set_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
176                         ipoib_warn(priv, "multicast group %16D already attached\n",
177                                    mcast->mcmember.mgid.raw, ":");
178
179                         return 0;
180                 }
181
182                 ret = ipoib_mcast_attach(priv, be16_to_cpu(mcast->mcmember.mlid),
183                                          &mcast->mcmember.mgid, set_qkey);
184                 if (ret < 0) {
185                         ipoib_warn(priv, "couldn't attach QP to multicast group %16D\n",
186                                    mcast->mcmember.mgid.raw, ":");
187
188                         clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags);
189                         return ret;
190                 }
191         }
192
193         {
194                 struct ib_ah_attr av = {
195                         .dlid          = be16_to_cpu(mcast->mcmember.mlid),
196                         .port_num      = priv->port,
197                         .sl            = mcast->mcmember.sl,
198                         .ah_flags      = IB_AH_GRH,
199                         .static_rate   = mcast->mcmember.rate,
200                         .grh           = {
201                                 .flow_label    = be32_to_cpu(mcast->mcmember.flow_label),
202                                 .hop_limit     = mcast->mcmember.hop_limit,
203                                 .sgid_index    = 0,
204                                 .traffic_class = mcast->mcmember.traffic_class
205                         }
206                 };
207                 av.grh.dgid = mcast->mcmember.mgid;
208
209                 ah = ipoib_create_ah(priv, priv->pd, &av);
210                 if (!ah) {
211                         ipoib_warn(priv, "ib_address_create failed\n");
212                 } else {
213                         spin_lock_irq(&priv->lock);
214                         mcast->ah = ah;
215                         spin_unlock_irq(&priv->lock);
216
217                         ipoib_dbg_mcast(priv, "MGID %16D AV %p, LID 0x%04x, SL %d\n",
218                                         mcast->mcmember.mgid.raw, ":",
219                                         mcast->ah->ah,
220                                         be16_to_cpu(mcast->mcmember.mlid),
221                                         mcast->mcmember.sl);
222                 }
223         }
224
225         /* actually send any queued packets */
226         while (mcast->pkt_queue.ifq_len) {
227                 struct mbuf *mb;
228                 _IF_DEQUEUE(&mcast->pkt_queue, mb);
229                 mb->m_pkthdr.rcvif = dev;
230
231                 if (dev->if_transmit(dev, mb))
232                         ipoib_warn(priv, "dev_queue_xmit failed to requeue packet\n");
233         }
234
235         return 0;
236 }
237
238 static int
239 ipoib_mcast_sendonly_join_complete(int status,
240                                    struct ib_sa_multicast *multicast)
241 {
242         struct ipoib_mcast *mcast = multicast->context;
243         struct ipoib_dev_priv *priv = mcast->priv;
244
245         /* We trap for port events ourselves. */
246         if (status == -ENETRESET)
247                 return 0;
248
249         if (!status)
250                 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
251
252         if (status) {
253                 if (mcast->logcount++ < 20)
254                         ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n",
255                                         mcast->mcmember.mgid.raw, ":", status);
256
257                 /* Flush out any queued packets */
258                 priv->dev->if_oerrors += mcast->pkt_queue.ifq_len;
259                 _IF_DRAIN(&mcast->pkt_queue);
260
261                 /* Clear the busy flag so we try again */
262                 status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY,
263                                             &mcast->flags);
264         }
265         return status;
266 }
267
268 static int ipoib_mcast_sendonly_join(struct ipoib_mcast *mcast)
269 {
270         struct ipoib_dev_priv *priv = mcast->priv;
271         struct ib_sa_mcmember_rec rec = {
272 #if 0                           /* Some SMs don't support send-only yet */
273                 .join_state = 4
274 #else
275                 .join_state = 1
276 #endif
277         };
278         int ret = 0;
279
280         if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)) {
281                 ipoib_dbg_mcast(priv, "device shutting down, no multicast joins\n");
282                 return -ENODEV;
283         }
284
285         if (test_and_set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)) {
286                 ipoib_dbg_mcast(priv, "multicast entry busy, skipping\n");
287                 return -EBUSY;
288         }
289
290         rec.mgid     = mcast->mcmember.mgid;
291         rec.port_gid = priv->local_gid;
292         rec.pkey     = cpu_to_be16(priv->pkey);
293
294         mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca,
295                                          priv->port, &rec,
296                                          IB_SA_MCMEMBER_REC_MGID        |
297                                          IB_SA_MCMEMBER_REC_PORT_GID    |
298                                          IB_SA_MCMEMBER_REC_PKEY        |
299                                          IB_SA_MCMEMBER_REC_JOIN_STATE,
300                                          GFP_ATOMIC,
301                                          ipoib_mcast_sendonly_join_complete,
302                                          mcast);
303         if (IS_ERR(mcast->mc)) {
304                 ret = PTR_ERR(mcast->mc);
305                 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
306                 ipoib_warn(priv, "ib_sa_join_multicast failed (ret = %d)\n",
307                            ret);
308         } else {
309                 ipoib_dbg_mcast(priv, "no multicast record for %16D, starting join\n",
310                                 mcast->mcmember.mgid.raw, ":");
311         }
312
313         return ret;
314 }
315
316 void ipoib_mcast_carrier_on_task(struct work_struct *work)
317 {
318         struct ipoib_dev_priv *priv = container_of(work, struct ipoib_dev_priv,
319                                                    carrier_on_task);
320         struct ib_port_attr attr;
321
322         /*
323          * Take rtnl_lock to avoid racing with ipoib_stop() and
324          * turning the carrier back on while a device is being
325          * removed.
326          */
327         if (ib_query_port(priv->ca, priv->port, &attr) ||
328             attr.state != IB_PORT_ACTIVE) {
329                 ipoib_dbg(priv, "Keeping carrier off until IB port is active\n");
330                 return;
331         }
332         if_link_state_change(priv->dev, LINK_STATE_UP);
333 }
334
335 static int ipoib_mcast_join_complete(int status,
336                                      struct ib_sa_multicast *multicast)
337 {
338         struct ipoib_mcast *mcast = multicast->context;
339         struct ipoib_dev_priv *priv = mcast->priv;
340
341         ipoib_dbg_mcast(priv, "join completion for %16D (status %d)\n",
342                         mcast->mcmember.mgid.raw, ":", status);
343
344         /* We trap for port events ourselves. */
345         if (status == -ENETRESET)
346                 return 0;
347
348         if (!status)
349                 status = ipoib_mcast_join_finish(mcast, &multicast->rec);
350
351         if (!status) {
352                 mcast->backoff = 1;
353                 mutex_lock(&mcast_mutex);
354                 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
355                         queue_delayed_work(ipoib_workqueue,
356                                            &priv->mcast_task, 0);
357                 mutex_unlock(&mcast_mutex);
358
359                 /*
360                  * Defer carrier on work to ipoib_workqueue to avoid a
361                  * deadlock on rtnl_lock here.
362                  */
363                 if (mcast == priv->broadcast)
364                         queue_work(ipoib_workqueue, &priv->carrier_on_task);
365
366                 return 0;
367         }
368
369         if (mcast->logcount++ < 20) {
370                 if (status == -ETIMEDOUT || status == -EAGAIN) {
371                         ipoib_dbg_mcast(priv, "multicast join failed for %16D, status %d\n",
372                                         mcast->mcmember.mgid.raw, ":", status);
373                 } else {
374                         ipoib_warn(priv, "multicast join failed for %16D, status %d\n",
375                                    mcast->mcmember.mgid.raw, ":", status);
376                 }
377         }
378
379         mcast->backoff *= 2;
380         if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
381                 mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
382
383         /* Clear the busy flag so we try again */
384         status = test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
385
386         mutex_lock(&mcast_mutex);
387         spin_lock_irq(&priv->lock);
388         if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
389                 queue_delayed_work(ipoib_workqueue, &priv->mcast_task,
390                                    mcast->backoff * HZ);
391         spin_unlock_irq(&priv->lock);
392         mutex_unlock(&mcast_mutex);
393
394         return status;
395 }
396
397 static void ipoib_mcast_join(struct ipoib_dev_priv *priv,
398     struct ipoib_mcast *mcast, int create)
399 {
400         struct ib_sa_mcmember_rec rec = {
401                 .join_state = 1
402         };
403         ib_sa_comp_mask comp_mask;
404         int ret = 0;
405
406         ipoib_dbg_mcast(priv, "joining MGID %16D\n",
407             mcast->mcmember.mgid.raw, ":");
408
409         rec.mgid     = mcast->mcmember.mgid;
410         rec.port_gid = priv->local_gid;
411         rec.pkey     = cpu_to_be16(priv->pkey);
412
413         comp_mask =
414                 IB_SA_MCMEMBER_REC_MGID         |
415                 IB_SA_MCMEMBER_REC_PORT_GID     |
416                 IB_SA_MCMEMBER_REC_PKEY         |
417                 IB_SA_MCMEMBER_REC_JOIN_STATE;
418
419         if (create) {
420                 comp_mask |=
421                         IB_SA_MCMEMBER_REC_QKEY                 |
422                         IB_SA_MCMEMBER_REC_MTU_SELECTOR         |
423                         IB_SA_MCMEMBER_REC_MTU                  |
424                         IB_SA_MCMEMBER_REC_TRAFFIC_CLASS        |
425                         IB_SA_MCMEMBER_REC_RATE_SELECTOR        |
426                         IB_SA_MCMEMBER_REC_RATE                 |
427                         IB_SA_MCMEMBER_REC_SL                   |
428                         IB_SA_MCMEMBER_REC_FLOW_LABEL           |
429                         IB_SA_MCMEMBER_REC_HOP_LIMIT;
430
431                 rec.qkey          = priv->broadcast->mcmember.qkey;
432                 rec.mtu_selector  = IB_SA_EQ;
433                 rec.mtu           = priv->broadcast->mcmember.mtu;
434                 rec.traffic_class = priv->broadcast->mcmember.traffic_class;
435                 rec.rate_selector = IB_SA_EQ;
436                 rec.rate          = priv->broadcast->mcmember.rate;
437                 rec.sl            = priv->broadcast->mcmember.sl;
438                 rec.flow_label    = priv->broadcast->mcmember.flow_label;
439                 rec.hop_limit     = priv->broadcast->mcmember.hop_limit;
440         }
441
442         set_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
443         mcast->mc = ib_sa_join_multicast(&ipoib_sa_client, priv->ca, priv->port,
444                                          &rec, comp_mask, GFP_KERNEL,
445                                          ipoib_mcast_join_complete, mcast);
446         if (IS_ERR(mcast->mc)) {
447                 clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags);
448                 ret = PTR_ERR(mcast->mc);
449                 ipoib_warn(priv, "ib_sa_join_multicast failed, status %d\n", ret);
450
451                 mcast->backoff *= 2;
452                 if (mcast->backoff > IPOIB_MAX_BACKOFF_SECONDS)
453                         mcast->backoff = IPOIB_MAX_BACKOFF_SECONDS;
454
455                 mutex_lock(&mcast_mutex);
456                 if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
457                         queue_delayed_work(ipoib_workqueue,
458                                            &priv->mcast_task,
459                                            mcast->backoff * HZ);
460                 mutex_unlock(&mcast_mutex);
461         }
462 }
463
464 void ipoib_mcast_join_task(struct work_struct *work)
465 {
466         struct ipoib_dev_priv *priv =
467                 container_of(work, struct ipoib_dev_priv, mcast_task.work);
468         struct ifnet *dev = priv->dev;
469
470         ipoib_dbg_mcast(priv, "Running join task. flags 0x%lX\n", priv->flags);
471
472         if (!test_bit(IPOIB_MCAST_RUN, &priv->flags))
473                 return;
474
475         if (ib_query_gid(priv->ca, priv->port, 0, &priv->local_gid))
476                 ipoib_warn(priv, "ib_query_gid() failed\n");
477         else
478                 memcpy(IF_LLADDR(dev) + 4, priv->local_gid.raw, sizeof (union ib_gid));
479
480         {
481                 struct ib_port_attr attr;
482
483                 if (!ib_query_port(priv->ca, priv->port, &attr))
484                         priv->local_lid = attr.lid;
485                 else
486                         ipoib_warn(priv, "ib_query_port failed\n");
487         }
488
489         if (!priv->broadcast) {
490                 struct ipoib_mcast *broadcast;
491
492                 if (!test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
493                         return;
494
495                 broadcast = ipoib_mcast_alloc(priv, 1);
496                 if (!broadcast) {
497                         ipoib_warn(priv, "failed to allocate broadcast group\n");
498                         mutex_lock(&mcast_mutex);
499                         if (test_bit(IPOIB_MCAST_RUN, &priv->flags))
500                                 queue_delayed_work(ipoib_workqueue,
501                                                    &priv->mcast_task, HZ);
502                         mutex_unlock(&mcast_mutex);
503                         return;
504                 }
505
506                 spin_lock_irq(&priv->lock);
507                 memcpy(broadcast->mcmember.mgid.raw, dev->if_broadcastaddr + 4,
508                        sizeof (union ib_gid));
509                 priv->broadcast = broadcast;
510
511                 __ipoib_mcast_add(priv, priv->broadcast);
512                 spin_unlock_irq(&priv->lock);
513         }
514
515         if (priv->broadcast &&
516             !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
517                 if (priv->broadcast &&
518                     !test_bit(IPOIB_MCAST_FLAG_BUSY, &priv->broadcast->flags))
519                         ipoib_mcast_join(priv, priv->broadcast, 0);
520                 return;
521         }
522
523         while (1) {
524                 struct ipoib_mcast *mcast = NULL;
525
526                 spin_lock_irq(&priv->lock);
527                 list_for_each_entry(mcast, &priv->multicast_list, list) {
528                         if (!test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)
529                             && !test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags)
530                             && !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
531                                 /* Found the next unjoined group */
532                                 break;
533                         }
534                 }
535                 spin_unlock_irq(&priv->lock);
536
537                 if (&mcast->list == &priv->multicast_list) {
538                         /* All done */
539                         break;
540                 }
541
542                 ipoib_mcast_join(priv, mcast, 1);
543                 return;
544         }
545
546         spin_lock_irq(&priv->lock);
547         if (priv->broadcast)
548                 priv->mcast_mtu = IPOIB_UD_MTU(ib_mtu_enum_to_int(priv->broadcast->mcmember.mtu));
549         else
550                 priv->mcast_mtu = priv->admin_mtu;
551         spin_unlock_irq(&priv->lock);
552
553         if (!ipoib_cm_admin_enabled(priv))
554                 ipoib_change_mtu(priv, min(priv->mcast_mtu, priv->admin_mtu));
555
556         ipoib_dbg_mcast(priv, "successfully joined all multicast groups\n");
557
558         clear_bit(IPOIB_MCAST_RUN, &priv->flags);
559 }
560
561 int ipoib_mcast_start_thread(struct ipoib_dev_priv *priv)
562 {
563         ipoib_dbg_mcast(priv, "starting multicast thread flags 0x%lX\n",
564             priv->flags);
565
566         mutex_lock(&mcast_mutex);
567         if (!test_and_set_bit(IPOIB_MCAST_RUN, &priv->flags))
568                 queue_delayed_work(ipoib_workqueue, &priv->mcast_task, 0);
569         mutex_unlock(&mcast_mutex);
570
571         return 0;
572 }
573
574 int ipoib_mcast_stop_thread(struct ipoib_dev_priv *priv, int flush)
575 {
576
577         ipoib_dbg_mcast(priv, "stopping multicast thread\n");
578
579         mutex_lock(&mcast_mutex);
580         clear_bit(IPOIB_MCAST_RUN, &priv->flags);
581         cancel_delayed_work(&priv->mcast_task);
582         mutex_unlock(&mcast_mutex);
583
584         if (flush)
585                 flush_workqueue(ipoib_workqueue);
586
587         return 0;
588 }
589
590 static int ipoib_mcast_leave(struct ipoib_dev_priv *priv, struct ipoib_mcast *mcast)
591 {
592         int ret = 0;
593
594         if (test_and_clear_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
595                 ib_sa_free_multicast(mcast->mc);
596
597         if (test_and_clear_bit(IPOIB_MCAST_FLAG_ATTACHED, &mcast->flags)) {
598                 ipoib_dbg_mcast(priv, "leaving MGID %16D\n",
599                                 mcast->mcmember.mgid.raw, ":");
600
601                 /* Remove ourselves from the multicast group */
602                 ret = ib_detach_mcast(priv->qp, &mcast->mcmember.mgid,
603                                       be16_to_cpu(mcast->mcmember.mlid));
604                 if (ret)
605                         ipoib_warn(priv, "ib_detach_mcast failed (result = %d)\n", ret);
606         }
607
608         return 0;
609 }
610
611 void
612 ipoib_mcast_send(struct ipoib_dev_priv *priv, void *mgid, struct mbuf *mb)
613 {
614         struct ifnet *dev = priv->dev;
615         struct ipoib_mcast *mcast;
616
617         if (!test_bit(IPOIB_FLAG_OPER_UP, &priv->flags)         ||
618             !priv->broadcast                                    ||
619             !test_bit(IPOIB_MCAST_FLAG_ATTACHED, &priv->broadcast->flags)) {
620                 ++dev->if_oerrors;
621                 m_freem(mb);
622                 return;
623         }
624
625         mcast = __ipoib_mcast_find(priv, mgid);
626         if (!mcast) {
627                 /* Let's create a new send only group now */
628                 ipoib_dbg_mcast(priv, "setting up send only multicast group for %16D\n",
629                                 mgid, ":");
630
631                 mcast = ipoib_mcast_alloc(priv, 0);
632                 if (!mcast) {
633                         ipoib_warn(priv, "unable to allocate memory for "
634                                    "multicast structure\n");
635                         ++dev->if_oerrors;
636                         m_freem(mb);
637                         goto out;
638                 }
639
640                 set_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags);
641                 memcpy(mcast->mcmember.mgid.raw, mgid, sizeof (union ib_gid));
642                 __ipoib_mcast_add(priv, mcast);
643                 list_add_tail(&mcast->list, &priv->multicast_list);
644         }
645
646         if (!mcast->ah) {
647                 if (mcast->pkt_queue.ifq_len < IPOIB_MAX_MCAST_QUEUE) {
648                         _IF_ENQUEUE(&mcast->pkt_queue, mb);
649                 } else {
650                         ++dev->if_oerrors;
651                         m_freem(mb);
652                 }
653
654                 if (test_bit(IPOIB_MCAST_FLAG_BUSY, &mcast->flags))
655                         ipoib_dbg_mcast(priv, "no address vector, "
656                                         "but multicast join already started\n");
657                 else if (test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags))
658                         ipoib_mcast_sendonly_join(mcast);
659
660                 /*
661                  * If lookup completes between here and out:, don't
662                  * want to send packet twice.
663                  */
664                 mcast = NULL;
665         }
666
667 out:
668         if (mcast && mcast->ah)
669                 ipoib_send(priv, mb, mcast->ah, IB_MULTICAST_QPN);
670 }
671
672 void ipoib_mcast_dev_flush(struct ipoib_dev_priv *priv)
673 {
674         LIST_HEAD(remove_list);
675         struct ipoib_mcast *mcast, *tmcast;
676         unsigned long flags;
677
678         ipoib_dbg_mcast(priv, "flushing multicast list\n");
679
680         spin_lock_irqsave(&priv->lock, flags);
681
682         list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
683                 list_del(&mcast->list);
684                 rb_erase(&mcast->rb_node, &priv->multicast_tree);
685                 list_add_tail(&mcast->list, &remove_list);
686         }
687
688         if (priv->broadcast) {
689                 rb_erase(&priv->broadcast->rb_node, &priv->multicast_tree);
690                 list_add_tail(&priv->broadcast->list, &remove_list);
691                 priv->broadcast = NULL;
692         }
693
694         spin_unlock_irqrestore(&priv->lock, flags);
695
696         list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
697                 ipoib_mcast_leave(priv, mcast);
698                 ipoib_mcast_free(mcast);
699         }
700 }
701
702 static int ipoib_mcast_addr_is_valid(const u8 *addr, unsigned int addrlen,
703                                      const u8 *broadcast)
704 {
705         if (addrlen != INFINIBAND_ALEN)
706                 return 0;
707         /* reserved QPN, prefix, scope */
708         if (memcmp(addr, broadcast, 6))
709                 return 0;
710         /* signature lower, pkey */
711         if (memcmp(addr + 7, broadcast + 7, 3))
712                 return 0;
713         return 1;
714 }
715
716 void ipoib_mcast_restart_task(struct work_struct *work)
717 {
718         struct ipoib_dev_priv *priv =
719                 container_of(work, struct ipoib_dev_priv, restart_task);
720         ipoib_mcast_restart(priv);
721 }
722
723 void ipoib_mcast_restart(struct ipoib_dev_priv *priv)
724 {
725         struct ifnet *dev = priv->dev;
726         struct ifmultiaddr *ifma;
727         struct ipoib_mcast *mcast, *tmcast;
728         LIST_HEAD(remove_list);
729         struct ib_sa_mcmember_rec rec;
730         int addrlen;
731
732         ipoib_dbg_mcast(priv, "restarting multicast task flags 0x%lX\n",
733             priv->flags);
734
735         ipoib_mcast_stop_thread(priv, 0);
736
737         if_maddr_rlock(dev);
738         spin_lock(&priv->lock);
739
740         /*
741          * Unfortunately, the networking core only gives us a list of all of
742          * the multicast hardware addresses. We need to figure out which ones
743          * are new and which ones have been removed
744          */
745
746         /* Clear out the found flag */
747         list_for_each_entry(mcast, &priv->multicast_list, list)
748                 clear_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
749
750         /* Mark all of the entries that are found or don't exist */
751
752
753         TAILQ_FOREACH(ifma, &dev->if_multiaddrs, ifma_link) {
754                 union ib_gid mgid;
755                 uint8_t *addr;
756
757                 if (ifma->ifma_addr->sa_family != AF_LINK)
758                         continue;
759                 addr = LLADDR((struct sockaddr_dl *)ifma->ifma_addr);
760                 addrlen = ((struct sockaddr_dl *)ifma->ifma_addr)->sdl_alen;
761                 if (!ipoib_mcast_addr_is_valid(addr, addrlen,
762                                                dev->if_broadcastaddr))
763                         continue;
764
765                 memcpy(mgid.raw, addr + 4, sizeof mgid);
766
767                 mcast = __ipoib_mcast_find(priv, &mgid);
768                 if (!mcast || test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
769                         struct ipoib_mcast *nmcast;
770
771                         /* ignore group which is directly joined by userspace */
772                         if (test_bit(IPOIB_FLAG_UMCAST, &priv->flags) &&
773                             !ib_sa_get_mcmember_rec(priv->ca, priv->port, &mgid, &rec)) {
774                                 ipoib_dbg_mcast(priv, "ignoring multicast entry for mgid %16D\n",
775                                                 mgid.raw, ":");
776                                 continue;
777                         }
778
779                         /* Not found or send-only group, let's add a new entry */
780                         ipoib_dbg_mcast(priv, "adding multicast entry for mgid %16D\n",
781                                         mgid.raw, ":");
782
783                         nmcast = ipoib_mcast_alloc(priv, 0);
784                         if (!nmcast) {
785                                 ipoib_warn(priv, "unable to allocate memory for multicast structure\n");
786                                 continue;
787                         }
788
789                         set_bit(IPOIB_MCAST_FLAG_FOUND, &nmcast->flags);
790
791                         nmcast->mcmember.mgid = mgid;
792
793                         if (mcast) {
794                                 /* Destroy the send only entry */
795                                 list_move_tail(&mcast->list, &remove_list);
796
797                                 rb_replace_node(&mcast->rb_node,
798                                                 &nmcast->rb_node,
799                                                 &priv->multicast_tree);
800                         } else
801                                 __ipoib_mcast_add(priv, nmcast);
802
803                         list_add_tail(&nmcast->list, &priv->multicast_list);
804                 }
805
806                 if (mcast)
807                         set_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags);
808         }
809
810         /* Remove all of the entries don't exist anymore */
811         list_for_each_entry_safe(mcast, tmcast, &priv->multicast_list, list) {
812                 if (!test_bit(IPOIB_MCAST_FLAG_FOUND, &mcast->flags) &&
813                     !test_bit(IPOIB_MCAST_FLAG_SENDONLY, &mcast->flags)) {
814                         ipoib_dbg_mcast(priv, "deleting multicast group %16D\n",
815                                         mcast->mcmember.mgid.raw, ":");
816
817                         rb_erase(&mcast->rb_node, &priv->multicast_tree);
818
819                         /* Move to the remove list */
820                         list_move_tail(&mcast->list, &remove_list);
821                 }
822         }
823
824         spin_unlock(&priv->lock);
825         if_maddr_runlock(dev);
826
827         /* We have to cancel outside of the spinlock */
828         list_for_each_entry_safe(mcast, tmcast, &remove_list, list) {
829                 ipoib_mcast_leave(mcast->priv, mcast);
830                 ipoib_mcast_free(mcast);
831         }
832
833         if (test_bit(IPOIB_FLAG_ADMIN_UP, &priv->flags))
834                 ipoib_mcast_start_thread(priv);
835 }
836
837 #ifdef CONFIG_INFINIBAND_IPOIB_DEBUG
838
839 struct ipoib_mcast_iter *ipoib_mcast_iter_init(struct ipoib_dev_priv *priv)
840 {
841         struct ipoib_mcast_iter *iter;
842
843         iter = kmalloc(sizeof *iter, GFP_KERNEL);
844         if (!iter)
845                 return NULL;
846
847         iter->priv = priv;
848         memset(iter->mgid.raw, 0, 16);
849
850         if (ipoib_mcast_iter_next(iter)) {
851                 kfree(iter);
852                 return NULL;
853         }
854
855         return iter;
856 }
857
858 int ipoib_mcast_iter_next(struct ipoib_mcast_iter *iter)
859 {
860         struct ipoib_dev_priv *priv = iter->priv;
861         struct rb_node *n;
862         struct ipoib_mcast *mcast;
863         int ret = 1;
864
865         spin_lock_irq(&priv->lock);
866
867         n = rb_first(&priv->multicast_tree);
868
869         while (n) {
870                 mcast = rb_entry(n, struct ipoib_mcast, rb_node);
871
872                 if (memcmp(iter->mgid.raw, mcast->mcmember.mgid.raw,
873                            sizeof (union ib_gid)) < 0) {
874                         iter->mgid      = mcast->mcmember.mgid;
875                         iter->created   = mcast->created;
876                         iter->queuelen  = mcast->pkt_queue.ifq_len;
877                         iter->complete  = !!mcast->ah;
878                         iter->send_only = !!(mcast->flags & (1 << IPOIB_MCAST_FLAG_SENDONLY));
879
880                         ret = 0;
881
882                         break;
883                 }
884
885                 n = rb_next(n);
886         }
887
888         spin_unlock_irq(&priv->lock);
889
890         return ret;
891 }
892
893 void ipoib_mcast_iter_read(struct ipoib_mcast_iter *iter,
894                            union ib_gid *mgid,
895                            unsigned long *created,
896                            unsigned int *queuelen,
897                            unsigned int *complete,
898                            unsigned int *send_only)
899 {
900         *mgid      = iter->mgid;
901         *created   = iter->created;
902         *queuelen  = iter->queuelen;
903         *complete  = iter->complete;
904         *send_only = iter->send_only;
905 }
906
907 #endif /* CONFIG_INFINIBAND_IPOIB_DEBUG */