2 * Copyright (c) 2006 Intel Corporation. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/completion.h>
34 #include <linux/dma-mapping.h>
35 #include <linux/err.h>
36 #include <linux/interrupt.h>
37 #include <linux/pci.h>
38 #include <linux/bitops.h>
39 #include <linux/random.h>
43 MODULE_AUTHOR("Sean Hefty");
44 MODULE_DESCRIPTION("InfiniBand InformInfo & Notice event handling");
45 MODULE_LICENSE("Dual BSD/GPL");
47 static void inform_add_one(struct ib_device *device);
48 static void inform_remove_one(struct ib_device *device);
50 static struct ib_client inform_client = {
52 .add = inform_add_one,
53 .remove = inform_remove_one
56 static struct ib_sa_client sa_client;
57 static struct workqueue_struct *inform_wq;
62 struct inform_device *dev;
66 struct completion comp;
70 struct inform_device {
71 struct ib_device *device;
72 struct ib_event_handler event_handler;
75 struct inform_port port[0];
91 struct inform_port *port;
93 struct work_struct work;
94 struct list_head pending_list;
95 struct list_head active_list;
96 struct list_head notice_list;
97 struct inform_member *last_join;
99 enum inform_state join_state; /* State relative to SA */
101 enum inform_state state;
102 struct ib_sa_query *query;
106 struct inform_member {
107 struct ib_inform_info info;
108 struct ib_sa_client *client;
109 struct inform_group *group;
110 struct list_head list;
111 enum inform_state state;
113 struct completion comp;
116 struct inform_notice {
117 struct list_head list;
118 struct ib_sa_notice notice;
121 static void reg_handler(int status, struct ib_sa_inform *inform,
123 static void unreg_handler(int status, struct ib_sa_inform *inform,
126 static struct inform_group *inform_find(struct inform_port *port,
129 struct rb_node *node = port->table.rb_node;
130 struct inform_group *group;
133 group = rb_entry(node, struct inform_group, node);
134 if (trap_number < group->trap_number)
135 node = node->rb_left;
136 else if (trap_number > group->trap_number)
137 node = node->rb_right;
144 static struct inform_group *inform_insert(struct inform_port *port,
145 struct inform_group *group)
147 struct rb_node **link = &port->table.rb_node;
148 struct rb_node *parent = NULL;
149 struct inform_group *cur_group;
153 cur_group = rb_entry(parent, struct inform_group, node);
154 if (group->trap_number < cur_group->trap_number)
155 link = &(*link)->rb_left;
156 else if (group->trap_number > cur_group->trap_number)
157 link = &(*link)->rb_right;
161 rb_link_node(&group->node, parent, link);
162 rb_insert_color(&group->node, &port->table);
166 static void deref_port(struct inform_port *port)
168 if (atomic_dec_and_test(&port->refcount))
169 complete(&port->comp);
172 static void release_group(struct inform_group *group)
174 struct inform_port *port = group->port;
177 spin_lock_irqsave(&port->lock, flags);
178 if (atomic_dec_and_test(&group->refcount)) {
179 rb_erase(&group->node, &port->table);
180 spin_unlock_irqrestore(&port->lock, flags);
184 spin_unlock_irqrestore(&port->lock, flags);
187 static void deref_member(struct inform_member *member)
189 if (atomic_dec_and_test(&member->refcount))
190 complete(&member->comp);
193 static void queue_reg(struct inform_member *member)
195 struct inform_group *group = member->group;
198 spin_lock_irqsave(&group->lock, flags);
199 list_add(&member->list, &group->pending_list);
200 if (group->state == INFORM_IDLE) {
201 group->state = INFORM_BUSY;
202 atomic_inc(&group->refcount);
203 queue_work(inform_wq, &group->work);
205 spin_unlock_irqrestore(&group->lock, flags);
208 static int send_reg(struct inform_group *group, struct inform_member *member)
210 struct inform_port *port = group->port;
211 struct ib_sa_inform inform;
214 memset(&inform, 0, sizeof inform);
215 inform.lid_range_begin = cpu_to_be16(0xFFFF);
216 inform.is_generic = 1;
217 inform.subscribe = 1;
218 inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL);
219 inform.trap.generic.trap_num = cpu_to_be16(member->info.trap_number);
220 inform.trap.generic.resp_time = 19;
221 inform.trap.generic.producer_type =
222 cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL);
224 group->last_join = member;
225 ret = ib_sa_informinfo_query(&sa_client, port->dev->device,
226 port->port_num, &inform, 3000, GFP_KERNEL,
227 reg_handler, group,&group->query);
229 group->query_id = ret;
235 static int send_unreg(struct inform_group *group)
237 struct inform_port *port = group->port;
238 struct ib_sa_inform inform;
241 memset(&inform, 0, sizeof inform);
242 inform.lid_range_begin = cpu_to_be16(0xFFFF);
243 inform.is_generic = 1;
244 inform.type = cpu_to_be16(IB_SA_EVENT_TYPE_ALL);
245 inform.trap.generic.trap_num = cpu_to_be16(group->trap_number);
246 inform.trap.generic.qpn = IB_QP1;
247 inform.trap.generic.resp_time = 19;
248 inform.trap.generic.producer_type =
249 cpu_to_be32(IB_SA_EVENT_PRODUCER_TYPE_ALL);
251 ret = ib_sa_informinfo_query(&sa_client, port->dev->device,
252 port->port_num, &inform, 3000, GFP_KERNEL,
253 unreg_handler, group, &group->query);
255 group->query_id = ret;
261 static void join_group(struct inform_group *group, struct inform_member *member)
263 member->state = INFORM_MEMBER;
265 list_move(&member->list, &group->active_list);
268 static int fail_join(struct inform_group *group, struct inform_member *member,
271 spin_lock_irq(&group->lock);
272 list_del_init(&member->list);
273 spin_unlock_irq(&group->lock);
274 return member->info.callback(status, &member->info, NULL);
277 static void process_group_error(struct inform_group *group)
279 struct inform_member *member;
282 spin_lock_irq(&group->lock);
283 while (!list_empty(&group->active_list)) {
284 member = list_entry(group->active_list.next,
285 struct inform_member, list);
286 atomic_inc(&member->refcount);
287 list_del_init(&member->list);
289 member->state = INFORM_ERROR;
290 spin_unlock_irq(&group->lock);
292 ret = member->info.callback(-ENETRESET, &member->info, NULL);
293 deref_member(member);
295 ib_sa_unregister_inform_info(&member->info);
296 spin_lock_irq(&group->lock);
299 group->join_state = INFORM_IDLE;
300 group->state = INFORM_BUSY;
301 spin_unlock_irq(&group->lock);
305 * Report a notice to all active subscribers. We use a temporary list to
306 * handle unsubscription requests while the notice is being reported, which
307 * avoids holding the group lock while in the user's callback.
309 static void process_notice(struct inform_group *group,
310 struct inform_notice *info_notice)
312 struct inform_member *member;
313 struct list_head list;
316 INIT_LIST_HEAD(&list);
318 spin_lock_irq(&group->lock);
319 list_splice_init(&group->active_list, &list);
320 while (!list_empty(&list)) {
322 member = list_entry(list.next, struct inform_member, list);
323 atomic_inc(&member->refcount);
324 list_move(&member->list, &group->active_list);
325 spin_unlock_irq(&group->lock);
327 ret = member->info.callback(0, &member->info,
328 &info_notice->notice);
329 deref_member(member);
331 ib_sa_unregister_inform_info(&member->info);
332 spin_lock_irq(&group->lock);
334 spin_unlock_irq(&group->lock);
337 static void inform_work_handler(struct work_struct *work)
339 struct inform_group *group;
340 struct inform_member *member;
341 struct ib_inform_info *info;
342 struct inform_notice *info_notice;
345 group = container_of(work, typeof(*group), work);
347 spin_lock_irq(&group->lock);
348 while (!list_empty(&group->pending_list) ||
349 !list_empty(&group->notice_list) ||
350 (group->state == INFORM_ERROR)) {
352 if (group->state == INFORM_ERROR) {
353 spin_unlock_irq(&group->lock);
354 process_group_error(group);
358 if (!list_empty(&group->notice_list)) {
359 info_notice = list_entry(group->notice_list.next,
360 struct inform_notice, list);
361 list_del(&info_notice->list);
362 spin_unlock_irq(&group->lock);
363 process_notice(group, info_notice);
368 member = list_entry(group->pending_list.next,
369 struct inform_member, list);
370 info = &member->info;
371 atomic_inc(&member->refcount);
373 if (group->join_state == INFORM_MEMBER) {
374 join_group(group, member);
375 spin_unlock_irq(&group->lock);
376 ret = info->callback(0, info, NULL);
378 spin_unlock_irq(&group->lock);
379 status = send_reg(group, member);
381 deref_member(member);
384 ret = fail_join(group, member, status);
387 deref_member(member);
389 ib_sa_unregister_inform_info(&member->info);
390 spin_lock_irq(&group->lock);
393 if (!group->members && (group->join_state == INFORM_MEMBER)) {
394 group->join_state = INFORM_IDLE;
395 spin_unlock_irq(&group->lock);
396 if (send_unreg(group))
399 group->state = INFORM_IDLE;
400 spin_unlock_irq(&group->lock);
401 release_group(group);
406 * Fail a join request if it is still active - at the head of the pending queue.
408 static void process_join_error(struct inform_group *group, int status)
410 struct inform_member *member;
413 spin_lock_irq(&group->lock);
414 member = list_entry(group->pending_list.next,
415 struct inform_member, list);
416 if (group->last_join == member) {
417 atomic_inc(&member->refcount);
418 list_del_init(&member->list);
419 spin_unlock_irq(&group->lock);
420 ret = member->info.callback(status, &member->info, NULL);
421 deref_member(member);
423 ib_sa_unregister_inform_info(&member->info);
425 spin_unlock_irq(&group->lock);
428 static void reg_handler(int status, struct ib_sa_inform *inform, void *context)
430 struct inform_group *group = context;
433 process_join_error(group, status);
435 group->join_state = INFORM_MEMBER;
437 inform_work_handler(&group->work);
440 static void unreg_handler(int status, struct ib_sa_inform *rec, void *context)
442 struct inform_group *group = context;
444 inform_work_handler(&group->work);
447 int notice_dispatch(struct ib_device *device, u8 port_num,
448 struct ib_sa_notice *notice)
450 struct inform_device *dev;
451 struct inform_port *port;
452 struct inform_group *group;
453 struct inform_notice *info_notice;
455 dev = ib_get_client_data(device, &inform_client);
457 return 0; /* No one to give notice to. */
459 port = &dev->port[port_num - dev->start_port];
460 spin_lock_irq(&port->lock);
461 group = inform_find(port, __be16_to_cpu(notice->trap.
464 spin_unlock_irq(&port->lock);
468 atomic_inc(&group->refcount);
469 spin_unlock_irq(&port->lock);
471 info_notice = kmalloc(sizeof *info_notice, GFP_KERNEL);
473 release_group(group);
477 info_notice->notice = *notice;
479 spin_lock_irq(&group->lock);
480 list_add(&info_notice->list, &group->notice_list);
481 if (group->state == INFORM_IDLE) {
482 group->state = INFORM_BUSY;
483 spin_unlock_irq(&group->lock);
484 inform_work_handler(&group->work);
486 spin_unlock_irq(&group->lock);
487 release_group(group);
493 static struct inform_group *acquire_group(struct inform_port *port,
494 u16 trap_number, gfp_t gfp_mask)
496 struct inform_group *group, *cur_group;
499 spin_lock_irqsave(&port->lock, flags);
500 group = inform_find(port, trap_number);
503 spin_unlock_irqrestore(&port->lock, flags);
505 group = kzalloc(sizeof *group, gfp_mask);
510 group->trap_number = trap_number;
511 INIT_LIST_HEAD(&group->pending_list);
512 INIT_LIST_HEAD(&group->active_list);
513 INIT_LIST_HEAD(&group->notice_list);
514 INIT_WORK(&group->work, inform_work_handler);
515 spin_lock_init(&group->lock);
517 spin_lock_irqsave(&port->lock, flags);
518 cur_group = inform_insert(port, group);
523 atomic_inc(&port->refcount);
525 atomic_inc(&group->refcount);
526 spin_unlock_irqrestore(&port->lock, flags);
531 * We serialize all join requests to a single group to make our lives much
532 * easier. Otherwise, two users could try to join the same group
533 * simultaneously, with different configurations, one could leave while the
534 * join is in progress, etc., which makes locking around error recovery
537 struct ib_inform_info *
538 ib_sa_register_inform_info(struct ib_sa_client *client,
539 struct ib_device *device, u8 port_num,
540 u16 trap_number, gfp_t gfp_mask,
541 int (*callback)(int status,
542 struct ib_inform_info *info,
543 struct ib_sa_notice *notice),
546 struct inform_device *dev;
547 struct inform_member *member;
548 struct ib_inform_info *info;
551 dev = ib_get_client_data(device, &inform_client);
553 return ERR_PTR(-ENODEV);
555 member = kzalloc(sizeof *member, gfp_mask);
557 return ERR_PTR(-ENOMEM);
559 ib_sa_client_get(client);
560 member->client = client;
561 member->info.trap_number = trap_number;
562 member->info.callback = callback;
563 member->info.context = context;
564 init_completion(&member->comp);
565 atomic_set(&member->refcount, 1);
566 member->state = INFORM_REGISTERING;
568 member->group = acquire_group(&dev->port[port_num - dev->start_port],
569 trap_number, gfp_mask);
570 if (!member->group) {
576 * The user will get the info structure in their callback. They
577 * could then free the info structure before we can return from
578 * this routine. So we save the pointer to return before queuing
581 info = &member->info;
586 ib_sa_client_put(member->client);
590 EXPORT_SYMBOL(ib_sa_register_inform_info);
592 void ib_sa_unregister_inform_info(struct ib_inform_info *info)
594 struct inform_member *member;
595 struct inform_group *group;
597 member = container_of(info, struct inform_member, info);
598 group = member->group;
600 spin_lock_irq(&group->lock);
601 if (member->state == INFORM_MEMBER)
604 list_del_init(&member->list);
606 if (group->state == INFORM_IDLE) {
607 group->state = INFORM_BUSY;
608 spin_unlock_irq(&group->lock);
609 /* Continue to hold reference on group until callback */
610 queue_work(inform_wq, &group->work);
612 spin_unlock_irq(&group->lock);
613 release_group(group);
616 deref_member(member);
617 wait_for_completion(&member->comp);
618 ib_sa_client_put(member->client);
621 EXPORT_SYMBOL(ib_sa_unregister_inform_info);
623 static void inform_groups_lost(struct inform_port *port)
625 struct inform_group *group;
626 struct rb_node *node;
629 spin_lock_irqsave(&port->lock, flags);
630 for (node = rb_first(&port->table); node; node = rb_next(node)) {
631 group = rb_entry(node, struct inform_group, node);
632 spin_lock(&group->lock);
633 if (group->state == INFORM_IDLE) {
634 atomic_inc(&group->refcount);
635 queue_work(inform_wq, &group->work);
637 group->state = INFORM_ERROR;
638 spin_unlock(&group->lock);
640 spin_unlock_irqrestore(&port->lock, flags);
643 static void inform_event_handler(struct ib_event_handler *handler,
644 struct ib_event *event)
646 struct inform_device *dev;
648 dev = container_of(handler, struct inform_device, event_handler);
650 switch (event->event) {
651 case IB_EVENT_PORT_ERR:
652 case IB_EVENT_LID_CHANGE:
653 case IB_EVENT_SM_CHANGE:
654 case IB_EVENT_CLIENT_REREGISTER:
655 inform_groups_lost(&dev->port[event->element.port_num -
663 static void inform_add_one(struct ib_device *device)
665 struct inform_device *dev;
666 struct inform_port *port;
669 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
672 dev = kmalloc(sizeof *dev + device->phys_port_cnt * sizeof *port,
677 if (device->node_type == RDMA_NODE_IB_SWITCH)
678 dev->start_port = dev->end_port = 0;
681 dev->end_port = device->phys_port_cnt;
684 for (i = 0; i <= dev->end_port - dev->start_port; i++) {
685 port = &dev->port[i];
687 port->port_num = dev->start_port + i;
688 spin_lock_init(&port->lock);
689 port->table = RB_ROOT;
690 init_completion(&port->comp);
691 atomic_set(&port->refcount, 1);
694 dev->device = device;
695 ib_set_client_data(device, &inform_client, dev);
697 INIT_IB_EVENT_HANDLER(&dev->event_handler, device, inform_event_handler);
698 ib_register_event_handler(&dev->event_handler);
701 static void inform_remove_one(struct ib_device *device)
703 struct inform_device *dev;
704 struct inform_port *port;
707 dev = ib_get_client_data(device, &inform_client);
711 ib_unregister_event_handler(&dev->event_handler);
712 flush_workqueue(inform_wq);
714 for (i = 0; i <= dev->end_port - dev->start_port; i++) {
715 port = &dev->port[i];
717 wait_for_completion(&port->comp);
723 int notice_init(void)
727 inform_wq = create_singlethread_workqueue("ib_inform");
731 ib_sa_register_client(&sa_client);
733 ret = ib_register_client(&inform_client);
739 ib_sa_unregister_client(&sa_client);
740 destroy_workqueue(inform_wq);
744 void notice_cleanup(void)
746 ib_unregister_client(&inform_client);
747 ib_sa_unregister_client(&sa_client);
748 destroy_workqueue(inform_wq);