]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - sys/ofed/drivers/infiniband/core/notice.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / sys / ofed / drivers / infiniband / core / notice.c
1 /*
2  * Copyright (c) 2006 Intel Corporation.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32
33 #include <linux/completion.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/err.h>
36 #include <linux/interrupt.h>
37 #include <linux/pci.h>
38 #include <linux/bitops.h>
39 #include <linux/random.h>
40
41 #include "sa.h"
42
43 MODULE_AUTHOR("Sean Hefty");
44 MODULE_DESCRIPTION("InfiniBand InformInfo & Notice event handling");
45 MODULE_LICENSE("Dual BSD/GPL");
46
47 static void inform_add_one(struct ib_device *device);
48 static void inform_remove_one(struct ib_device *device);
49
50 static struct ib_client inform_client = {
51         .name   = "ib_notice",
52         .add    = inform_add_one,
53         .remove = inform_remove_one
54 };
55
56 static struct ib_sa_client      sa_client;
57 static struct workqueue_struct  *inform_wq;
58
59 struct inform_device;
60
61 struct inform_port {
62         struct inform_device    *dev;
63         spinlock_t              lock;
64         struct rb_root          table;
65         atomic_t                refcount;
66         struct completion       comp;
67         u8                      port_num;
68 };
69
70 struct inform_device {
71         struct ib_device        *device;
72         struct ib_event_handler event_handler;
73         int                     start_port;
74         int                     end_port;
75         struct inform_port      port[0];
76 };
77
78 enum inform_state {
79         INFORM_IDLE,
80         INFORM_REGISTERING,
81         INFORM_MEMBER,
82         INFORM_BUSY,
83         INFORM_ERROR
84 };
85
86 struct inform_member;
87
88 struct inform_group {
89         u16                     trap_number;
90         struct rb_node          node;
91         struct inform_port      *port;
92         spinlock_t              lock;
93         struct work_struct      work;
94         struct list_head        pending_list;
95         struct list_head        active_list;
96         struct list_head        notice_list;
97         struct inform_member    *last_join;
98         int                     members;
99         enum inform_state       join_state; /* State relative to SA */
100         atomic_t                refcount;
101         enum inform_state       state;
102         struct ib_sa_query      *query;
103         int                     query_id;
104 };
105
106 struct inform_member {
107         struct ib_inform_info   info;
108         struct ib_sa_client     *client;
109         struct inform_group     *group;
110         struct list_head        list;
111         enum inform_state       state;
112         atomic_t                refcount;
113         struct completion       comp;
114 };
115
116 struct inform_notice {
117         struct list_head        list;
118         struct ib_sa_notice     notice;
119 };
120
121 static void reg_handler(int status, struct ib_sa_inform *inform,
122                          void *context);
123 static void unreg_handler(int status, struct ib_sa_inform *inform,
124                           void *context);
125
126 static struct inform_group *inform_find(struct inform_port *port,
127                                         u16 trap_number)
128 {
129         struct rb_node *node = port->table.rb_node;
130         struct inform_group *group;
131
132         while (node) {
133                 group = rb_entry(node, struct inform_group, node);
134                 if (trap_number < group->trap_number)
135                         node = node->rb_left;
136                 else if (trap_number > group->trap_number)
137                         node = node->rb_right;
138                 else
139                         return group;
140         }
141         return NULL;
142 }
143
144 static struct inform_group *inform_insert(struct inform_port *port,
145                                           struct inform_group *group)
146 {
147         struct rb_node **link = &port->table.rb_node;
148         struct rb_node *parent = NULL;
149         struct inform_group *cur_group;
150
151         while (*link) {
152                 parent = *link;
153                 cur_group = rb_entry(parent, struct inform_group, node);
154                 if (group->trap_number < cur_group->trap_number)
155                         link = &(*link)->rb_left;
156                 else if (group->trap_number > cur_group->trap_number)
157                         link = &(*link)->rb_right;
158                 else
159                         return cur_group;
160         }
161         rb_link_node(&group->node, parent, link);
162         rb_insert_color(&group->node, &port->table);
163         return NULL;
164 }
165
166 static void deref_port(struct inform_port *port)
167 {
168         if (atomic_dec_and_test(&port->refcount))
169                 complete(&port->comp);
170 }
171
172 static void release_group(struct inform_group *group)
173 {
174         struct inform_port *port = group->port;
175         unsigned long flags;
176
177         spin_lock_irqsave(&port->lock, flags);
178         if (atomic_dec_and_test(&group->refcount)) {
179                 rb_erase(&group->node, &port->table);
180                 spin_unlock_irqrestore(&port->lock, flags);
181                 kfree(group);
182                 deref_port(port);
183         } else
184                 spin_unlock_irqrestore(&port->lock, flags);
185 }
186
187 static void deref_member(struct inform_member *member)
188 {
189         if (atomic_dec_and_test(&member->refcount))
190                 complete(&member->comp);
191 }
192
193 static void queue_reg(struct inform_member *member)
194 {
195         struct inform_group *group = member->group;
196         unsigned long flags;
197
198         spin_lock_irqsave(&group->lock, flags);
199         list_add(&member->list, &group->pending_list);
200         if (group->state == INFORM_IDLE) {
201                 group->state = INFORM_BUSY;
202                 atomic_inc(&group->refcount);
203                 queue_work(inform_wq, &group->work);
204         }
205         spin_unlock_irqrestore(&group->lock, flags);
206 }
207
208 static int send_reg(struct inform_group *group, struct inform_member *member)
209 {
210         struct inform_port *port = group->port;
211         struct ib_sa_inform inform;
212         int ret;
213
214         memset(&inform, 0, sizeof inform);
215         inform.lid_range_begin = cpu_to_be16(0xFFFF);
216         inform.is_generic = 1;
217         inform.subscribe = 1;
218         inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL);
219         inform.trap.generic.trap_num = cpu_to_be16(member->info.trap_number);
220         inform.trap.generic.resp_time = 19;
221         inform.trap.generic.producer_type =
222                                 cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL);
223
224         group->last_join = member;
225         ret = ib_sa_informinfo_query(&sa_client, port->dev->device,
226                                      port->port_num, &inform, 3000, GFP_KERNEL,
227                                      reg_handler, group,&group->query);
228         if (ret >= 0) {
229                 group->query_id = ret;
230                 ret = 0;
231         }
232         return ret;
233 }
234
235 static int send_unreg(struct inform_group *group)
236 {
237         struct inform_port *port = group->port;
238         struct ib_sa_inform inform;
239         int ret;
240
241         memset(&inform, 0, sizeof inform);
242         inform.lid_range_begin = cpu_to_be16(0xFFFF);
243         inform.is_generic = 1;
244         inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL);
245         inform.trap.generic.trap_num = cpu_to_be16(group->trap_number);
246         inform.trap.generic.qpn = IB_QP1;
247         inform.trap.generic.resp_time = 19;
248         inform.trap.generic.producer_type =
249                                 cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL);
250
251         ret = ib_sa_informinfo_query(&sa_client, port->dev->device,
252                                      port->port_num, &inform, 3000, GFP_KERNEL,
253                                      unreg_handler, group, &group->query);
254         if (ret >= 0) {
255                 group->query_id = ret;
256                 ret = 0;
257         }
258         return ret;
259 }
260
261 static void join_group(struct inform_group *group, struct inform_member *member)
262 {
263         member->state = INFORM_MEMBER;
264         group->members++;
265         list_move(&member->list, &group->active_list);
266 }
267
268 static int fail_join(struct inform_group *group, struct inform_member *member,
269                      int status)
270 {
271         spin_lock_irq(&group->lock);
272         list_del_init(&member->list);
273         spin_unlock_irq(&group->lock);
274         return member->info.callback(status, &member->info, NULL);
275 }
276
277 static void process_group_error(struct inform_group *group)
278 {
279         struct inform_member *member;
280         int ret;
281
282         spin_lock_irq(&group->lock);
283         while (!list_empty(&group->active_list)) {
284                 member = list_entry(group->active_list.next,
285                                     struct inform_member, list);
286                 atomic_inc(&member->refcount);
287                 list_del_init(&member->list);
288                 group->members--;
289                 member->state = INFORM_ERROR;
290                 spin_unlock_irq(&group->lock);
291
292                 ret = member->info.callback(-ENETRESET, &member->info, NULL);
293                 deref_member(member);
294                 if (ret)
295                         ib_sa_unregister_inform_info(&member->info);
296                 spin_lock_irq(&group->lock);
297         }
298
299         group->join_state = INFORM_IDLE;
300         group->state = INFORM_BUSY;
301         spin_unlock_irq(&group->lock);
302 }
303
304 /*
305  * Report a notice to all active subscribers.  We use a temporary list to
306  * handle unsubscription requests while the notice is being reported, which
307  * avoids holding the group lock while in the user's callback.
308  */
309 static void process_notice(struct inform_group *group,
310                            struct inform_notice *info_notice)
311 {
312         struct inform_member *member;
313         struct list_head list;
314         int ret;
315
316         INIT_LIST_HEAD(&list);
317
318         spin_lock_irq(&group->lock);
319         list_splice_init(&group->active_list, &list);
320         while (!list_empty(&list)) {
321
322                 member = list_entry(list.next, struct inform_member, list);
323                 atomic_inc(&member->refcount);
324                 list_move(&member->list, &group->active_list);
325                 spin_unlock_irq(&group->lock);
326
327                 ret = member->info.callback(0, &member->info,
328                                             &info_notice->notice);
329                 deref_member(member);
330                 if (ret)
331                         ib_sa_unregister_inform_info(&member->info);
332                 spin_lock_irq(&group->lock);
333         }
334         spin_unlock_irq(&group->lock);
335 }
336
337 static void inform_work_handler(struct work_struct *work)
338 {
339         struct inform_group *group;
340         struct inform_member *member;
341         struct ib_inform_info *info;
342         struct inform_notice *info_notice;
343         int status, ret;
344
345         group = container_of(work, typeof(*group), work);
346 retest:
347         spin_lock_irq(&group->lock);
348         while (!list_empty(&group->pending_list) ||
349                !list_empty(&group->notice_list) ||
350                (group->state == INFORM_ERROR)) {
351
352                 if (group->state == INFORM_ERROR) {
353                         spin_unlock_irq(&group->lock);
354                         process_group_error(group);
355                         goto retest;
356                 }
357
358                 if (!list_empty(&group->notice_list)) {
359                         info_notice = list_entry(group->notice_list.next,
360                                                  struct inform_notice, list);
361                         list_del(&info_notice->list);
362                         spin_unlock_irq(&group->lock);
363                         process_notice(group, info_notice);
364                         kfree(info_notice);
365                         goto retest;
366                 }
367
368                 member = list_entry(group->pending_list.next,
369                                     struct inform_member, list);
370                 info = &member->info;
371                 atomic_inc(&member->refcount);
372
373                 if (group->join_state == INFORM_MEMBER) {
374                         join_group(group, member);
375                         spin_unlock_irq(&group->lock);
376                         ret = info->callback(0, info, NULL);
377                 } else {
378                         spin_unlock_irq(&group->lock);
379                         status = send_reg(group, member);
380                         if (!status) {
381                                 deref_member(member);
382                                 return;
383                         }
384                         ret = fail_join(group, member, status);
385                 }
386
387                 deref_member(member);
388                 if (ret)
389                         ib_sa_unregister_inform_info(&member->info);
390                 spin_lock_irq(&group->lock);
391         }
392
393         if (!group->members && (group->join_state == INFORM_MEMBER)) {
394                 group->join_state = INFORM_IDLE;
395                 spin_unlock_irq(&group->lock);
396                 if (send_unreg(group))
397                         goto retest;
398         } else {
399                 group->state = INFORM_IDLE;
400                 spin_unlock_irq(&group->lock);
401                 release_group(group);
402         }
403 }
404
405 /*
406  * Fail a join request if it is still active - at the head of the pending queue.
407  */
408 static void process_join_error(struct inform_group *group, int status)
409 {
410         struct inform_member *member;
411         int ret;
412
413         spin_lock_irq(&group->lock);
414         member = list_entry(group->pending_list.next,
415                             struct inform_member, list);
416         if (group->last_join == member) {
417                 atomic_inc(&member->refcount);
418                 list_del_init(&member->list);
419                 spin_unlock_irq(&group->lock);
420                 ret = member->info.callback(status, &member->info, NULL);
421                 deref_member(member);
422                 if (ret)
423                         ib_sa_unregister_inform_info(&member->info);
424         } else
425                 spin_unlock_irq(&group->lock);
426 }
427
428 static void reg_handler(int status, struct ib_sa_inform *inform, void *context)
429 {
430         struct inform_group *group = context;
431
432         if (status)
433                 process_join_error(group, status);
434         else
435                 group->join_state = INFORM_MEMBER;
436
437         inform_work_handler(&group->work);
438 }
439
440 static void unreg_handler(int status, struct ib_sa_inform *rec, void *context)
441 {
442         struct inform_group *group = context;
443
444         inform_work_handler(&group->work);
445 }
446
447 int notice_dispatch(struct ib_device *device, u8 port_num,
448                     struct ib_sa_notice *notice)
449 {
450         struct inform_device *dev;
451         struct inform_port *port;
452         struct inform_group *group;
453         struct inform_notice *info_notice;
454
455         dev = ib_get_client_data(device, &inform_client);
456         if (!dev)
457                 return 0; /* No one to give notice to. */
458
459         port = &dev->port[port_num - dev->start_port];
460         spin_lock_irq(&port->lock);
461         group = inform_find(port, __be16_to_cpu(notice->trap.
462                                                 generic.trap_num));
463         if (!group) {
464                 spin_unlock_irq(&port->lock);
465                 return 0;
466         }
467
468         atomic_inc(&group->refcount);
469         spin_unlock_irq(&port->lock);
470
471         info_notice = kmalloc(sizeof *info_notice, GFP_KERNEL);
472         if (!info_notice) {
473                 release_group(group);
474                 return -ENOMEM;
475         }
476
477         info_notice->notice = *notice;
478
479         spin_lock_irq(&group->lock);
480         list_add(&info_notice->list, &group->notice_list);
481         if (group->state == INFORM_IDLE) {
482                 group->state = INFORM_BUSY;
483                 spin_unlock_irq(&group->lock);
484                 inform_work_handler(&group->work);
485         } else {
486                 spin_unlock_irq(&group->lock);
487                 release_group(group);
488         }
489
490         return 0;
491 }
492
493 static struct inform_group *acquire_group(struct inform_port *port,
494                                           u16 trap_number, gfp_t gfp_mask)
495 {
496         struct inform_group *group, *cur_group;
497         unsigned long flags;
498
499         spin_lock_irqsave(&port->lock, flags);
500         group = inform_find(port, trap_number);
501         if (group)
502                 goto found;
503         spin_unlock_irqrestore(&port->lock, flags);
504
505         group = kzalloc(sizeof *group, gfp_mask);
506         if (!group)
507                 return NULL;
508
509         group->port = port;
510         group->trap_number = trap_number;
511         INIT_LIST_HEAD(&group->pending_list);
512         INIT_LIST_HEAD(&group->active_list);
513         INIT_LIST_HEAD(&group->notice_list);
514         INIT_WORK(&group->work, inform_work_handler);
515         spin_lock_init(&group->lock);
516
517         spin_lock_irqsave(&port->lock, flags);
518         cur_group = inform_insert(port, group);
519         if (cur_group) {
520                 kfree(group);
521                 group = cur_group;
522         } else
523                 atomic_inc(&port->refcount);
524 found:
525         atomic_inc(&group->refcount);
526         spin_unlock_irqrestore(&port->lock, flags);
527         return group;
528 }
529
530 /*
531  * We serialize all join requests to a single group to make our lives much
532  * easier.  Otherwise, two users could try to join the same group
533  * simultaneously, with different configurations, one could leave while the
534  * join is in progress, etc., which makes locking around error recovery
535  * difficult.
536  */
537 struct ib_inform_info *
538 ib_sa_register_inform_info(struct ib_sa_client *client,
539                            struct ib_device *device, u8 port_num,
540                            u16 trap_number, gfp_t gfp_mask,
541                            int (*callback)(int status,
542                                            struct ib_inform_info *info,
543                                            struct ib_sa_notice *notice),
544                            void *context)
545 {
546         struct inform_device *dev;
547         struct inform_member *member;
548         struct ib_inform_info *info;
549         int ret;
550
551         dev = ib_get_client_data(device, &inform_client);
552         if (!dev)
553                 return ERR_PTR(-ENODEV);
554
555         member = kzalloc(sizeof *member, gfp_mask);
556         if (!member)
557                 return ERR_PTR(-ENOMEM);
558
559         ib_sa_client_get(client);
560         member->client = client;
561         member->info.trap_number = trap_number;
562         member->info.callback = callback;
563         member->info.context = context;
564         init_completion(&member->comp);
565         atomic_set(&member->refcount, 1);
566         member->state = INFORM_REGISTERING;
567
568         member->group = acquire_group(&dev->port[port_num - dev->start_port],
569                                       trap_number, gfp_mask);
570         if (!member->group) {
571                 ret = -ENOMEM;
572                 goto err;
573         }
574
575         /*
576          * The user will get the info structure in their callback.  They
577          * could then free the info structure before we can return from
578          * this routine.  So we save the pointer to return before queuing
579          * any callback.
580          */
581         info = &member->info;
582         queue_reg(member);
583         return info;
584
585 err:
586         ib_sa_client_put(member->client);
587         kfree(member);
588         return ERR_PTR(ret);
589 }
590 EXPORT_SYMBOL(ib_sa_register_inform_info);
591
592 void ib_sa_unregister_inform_info(struct ib_inform_info *info)
593 {
594         struct inform_member *member;
595         struct inform_group *group;
596
597         member = container_of(info, struct inform_member, info);
598         group = member->group;
599
600         spin_lock_irq(&group->lock);
601         if (member->state == INFORM_MEMBER)
602                 group->members--;
603
604         list_del_init(&member->list);
605
606         if (group->state == INFORM_IDLE) {
607                 group->state = INFORM_BUSY;
608                 spin_unlock_irq(&group->lock);
609                 /* Continue to hold reference on group until callback */
610                 queue_work(inform_wq, &group->work);
611         } else {
612                 spin_unlock_irq(&group->lock);
613                 release_group(group);
614         }
615
616         deref_member(member);
617         wait_for_completion(&member->comp);
618         ib_sa_client_put(member->client);
619         kfree(member);
620 }
621 EXPORT_SYMBOL(ib_sa_unregister_inform_info);
622
623 static void inform_groups_lost(struct inform_port *port)
624 {
625         struct inform_group *group;
626         struct rb_node *node;
627         unsigned long flags;
628
629         spin_lock_irqsave(&port->lock, flags);
630         for (node = rb_first(&port->table); node; node = rb_next(node)) {
631                 group = rb_entry(node, struct inform_group, node);
632                 spin_lock(&group->lock);
633                 if (group->state == INFORM_IDLE) {
634                         atomic_inc(&group->refcount);
635                         queue_work(inform_wq, &group->work);
636                 }
637                 group->state = INFORM_ERROR;
638                 spin_unlock(&group->lock);
639         }
640         spin_unlock_irqrestore(&port->lock, flags);
641 }
642
643 static void inform_event_handler(struct ib_event_handler *handler,
644                                 struct ib_event *event)
645 {
646         struct inform_device *dev;
647
648         dev = container_of(handler, struct inform_device, event_handler);
649
650         switch (event->event) {
651         case IB_EVENT_PORT_ERR:
652         case IB_EVENT_LID_CHANGE:
653         case IB_EVENT_SM_CHANGE:
654         case IB_EVENT_CLIENT_REREGISTER:
655                 inform_groups_lost(&dev->port[event->element.port_num -
656                                               dev->start_port]);
657                 break;
658         default:
659                 break;
660         }
661 }
662
663 static void inform_add_one(struct ib_device *device)
664 {
665         struct inform_device *dev;
666         struct inform_port *port;
667         int i;
668
669         if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
670                 return;
671
672         dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
673                       GFP_KERNEL);
674         if (!dev)
675                 return;
676
677         if (device->node_type == RDMA_NODE_IB_SWITCH)
678                 dev->start_port = dev->end_port = 0;
679         else {
680                 dev->start_port = 1;
681                 dev->end_port = device->phys_port_cnt;
682         }
683
684         for (i = 0; i <= dev->end_port - dev->start_port; i++) {
685                 port = &dev->port[i];
686                 port->dev = dev;
687                 port->port_num = dev->start_port + i;
688                 spin_lock_init(&port->lock);
689                 port->table = RB_ROOT;
690                 init_completion(&port->comp);
691                 atomic_set(&port->refcount, 1);
692         }
693
694         dev->device = device;
695         ib_set_client_data(device, &inform_client, dev);
696
697         INIT_IB_EVENT_HANDLER(&dev->event_handler, device, inform_event_handler);
698         ib_register_event_handler(&dev->event_handler);
699 }
700
701 static void inform_remove_one(struct ib_device *device)
702 {
703         struct inform_device *dev;
704         struct inform_port *port;
705         int i;
706
707         dev = ib_get_client_data(device, &inform_client);
708         if (!dev)
709                 return;
710
711         ib_unregister_event_handler(&dev->event_handler);
712         flush_workqueue(inform_wq);
713
714         for (i = 0; i <= dev->end_port - dev->start_port; i++) {
715                 port = &dev->port[i];
716                 deref_port(port);
717                 wait_for_completion(&port->comp);
718         }
719
720         kfree(dev);
721 }
722
723 int notice_init(void)
724 {
725         int ret;
726
727         inform_wq = create_singlethread_workqueue("ib_inform");
728         if (!inform_wq)
729                 return -ENOMEM;
730
731         ib_sa_register_client(&sa_client);
732
733         ret = ib_register_client(&inform_client);
734         if (ret)
735                 goto err;
736         return 0;
737
738 err:
739         ib_sa_unregister_client(&sa_client);
740         destroy_workqueue(inform_wq);
741         return ret;
742 }
743
744 void notice_cleanup(void)
745 {
746         ib_unregister_client(&inform_client);
747         ib_sa_unregister_client(&sa_client);
748         destroy_workqueue(inform_wq);
749 }