2 * Copyright (c) 2006 Intel Corporation. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33 #include <linux/dma-mapping.h>
34 #include <linux/err.h>
35 #include <linux/interrupt.h>
36 #include <linux/rbtree.h>
37 #include <linux/mutex.h>
38 #include <linux/spinlock.h>
39 #include <linux/pci.h>
40 #include <linux/miscdevice.h>
41 #include <linux/random.h>
43 #include <rdma/ib_cache.h>
44 #include <rdma/ib_sa.h>
47 MODULE_AUTHOR("Sean Hefty");
48 MODULE_DESCRIPTION("InfiniBand subnet administration caching");
49 MODULE_LICENSE("Dual BSD/GPL");
52 SA_DB_MAX_PATHS_PER_DEST = 0x7F,
53 SA_DB_MIN_RETRY_TIMER = 4000, /* 4 sec */
54 SA_DB_MAX_RETRY_TIMER = 256000 /* 256 sec */
57 static int set_paths_per_dest(const char *val, struct kernel_param *kp);
58 static unsigned long paths_per_dest = 0;
59 module_param_call(paths_per_dest, set_paths_per_dest, param_get_ulong,
60 &paths_per_dest, 0644);
61 MODULE_PARM_DESC(paths_per_dest, "Maximum number of paths to retrieve "
62 "to each destination (DGID). Set to 0 "
65 static int set_subscribe_inform_info(const char *val, struct kernel_param *kp);
66 static char subscribe_inform_info = 1;
67 module_param_call(subscribe_inform_info, set_subscribe_inform_info,
68 param_get_bool, &subscribe_inform_info, 0644);
69 MODULE_PARM_DESC(subscribe_inform_info,
70 "Subscribe for SA InformInfo/Notice events.");
72 static int do_refresh(const char *val, struct kernel_param *kp);
73 module_param_call(refresh, do_refresh, NULL, NULL, 0200);
75 static unsigned long retry_timer = SA_DB_MIN_RETRY_TIMER;
77 enum sa_db_lookup_method {
78 SA_DB_LOOKUP_LEAST_USED,
82 static int set_lookup_method(const char *val, struct kernel_param *kp);
83 static int get_lookup_method(char *buf, struct kernel_param *kp);
84 static unsigned long lookup_method;
85 module_param_call(lookup_method, set_lookup_method, get_lookup_method,
86 &lookup_method, 0644);
87 MODULE_PARM_DESC(lookup_method, "Method used to return path records when "
88 "multiple paths exist to a given destination.");
90 static void sa_db_add_dev(struct ib_device *device);
91 static void sa_db_remove_dev(struct ib_device *device);
93 static struct ib_client sa_db_client = {
96 .remove = sa_db_remove_dev
99 static LIST_HEAD(dev_list);
100 static DEFINE_MUTEX(lock);
101 static rwlock_t rwlock;
102 static struct workqueue_struct *sa_wq;
103 static struct ib_sa_client sa_client;
112 struct sa_db_device *dev;
113 struct ib_mad_agent *agent;
114 /* Limit number of outstanding MADs to SA to reduce SA flooding */
115 struct ib_mad_send_buf *msg;
118 struct ib_inform_info *in_info;
119 struct ib_inform_info *out_info;
120 struct rb_root paths;
121 struct list_head update_list;
122 unsigned long update_id;
123 enum sa_db_state state;
124 struct work_struct work;
129 struct sa_db_device {
130 struct list_head list;
131 struct ib_device *device;
132 struct ib_event_handler event_handler;
135 struct sa_db_port port[0];
138 struct ib_sa_iterator {
139 struct ib_sa_iterator *next;
142 struct ib_sa_attr_iter {
143 struct ib_sa_iterator *iter;
147 struct ib_sa_attr_list {
148 struct ib_sa_iterator iter;
149 struct ib_sa_iterator *tail;
155 struct ib_path_rec_info {
156 struct ib_sa_iterator iter; /* keep first */
157 struct ib_sa_path_rec rec;
158 unsigned long lookups;
161 struct ib_sa_mad_iter {
162 struct ib_mad_recv_wc *recv_wc;
163 struct ib_mad_recv_buf *recv_buf;
172 enum sa_update_type {
179 struct list_head list;
181 enum sa_update_type type;
184 struct sa_path_request {
185 struct work_struct work;
186 struct ib_sa_client *client;
187 void (*callback)(int, struct ib_sa_path_rec *, void *);
189 struct ib_sa_path_rec path_rec;
192 static void process_updates(struct sa_db_port *port);
194 static void free_attr_list(struct ib_sa_attr_list *attr_list)
196 struct ib_sa_iterator *cur;
198 for (cur = attr_list->iter.next; cur; cur = attr_list->iter.next) {
199 attr_list->iter.next = cur->next;
202 attr_list->tail = &attr_list->iter;
205 static void remove_attr(struct rb_root *root, struct ib_sa_attr_list *attr_list)
207 rb_erase(&attr_list->node, root);
208 free_attr_list(attr_list);
212 static void remove_all_attrs(struct rb_root *root)
214 struct rb_node *node, *next_node;
215 struct ib_sa_attr_list *attr_list;
217 write_lock_irq(&rwlock);
218 for (node = rb_first(root); node; node = next_node) {
219 next_node = rb_next(node);
220 attr_list = rb_entry(node, struct ib_sa_attr_list, node);
221 remove_attr(root, attr_list);
223 write_unlock_irq(&rwlock);
226 static void remove_old_attrs(struct rb_root *root, unsigned long update_id)
228 struct rb_node *node, *next_node;
229 struct ib_sa_attr_list *attr_list;
231 write_lock_irq(&rwlock);
232 for (node = rb_first(root); node; node = next_node) {
233 next_node = rb_next(node);
234 attr_list = rb_entry(node, struct ib_sa_attr_list, node);
235 if (attr_list->update_id != update_id)
236 remove_attr(root, attr_list);
238 write_unlock_irq(&rwlock);
241 static struct ib_sa_attr_list *insert_attr_list(struct rb_root *root,
242 struct ib_sa_attr_list *attr_list)
244 struct rb_node **link = &root->rb_node;
245 struct rb_node *parent = NULL;
246 struct ib_sa_attr_list *cur_attr_list;
251 cur_attr_list = rb_entry(parent, struct ib_sa_attr_list, node);
252 cmp = memcmp(&cur_attr_list->gid, &attr_list->gid,
253 sizeof attr_list->gid);
255 link = &(*link)->rb_left;
257 link = &(*link)->rb_right;
259 return cur_attr_list;
261 rb_link_node(&attr_list->node, parent, link);
262 rb_insert_color(&attr_list->node, root);
266 static struct ib_sa_attr_list *find_attr_list(struct rb_root *root, u8 *gid)
268 struct rb_node *node = root->rb_node;
269 struct ib_sa_attr_list *attr_list;
273 attr_list = rb_entry(node, struct ib_sa_attr_list, node);
274 cmp = memcmp(&attr_list->gid, gid, sizeof attr_list->gid);
276 node = node->rb_left;
278 node = node->rb_right;
285 static int insert_attr(struct rb_root *root, unsigned long update_id, void *key,
286 struct ib_sa_iterator *iter)
288 struct ib_sa_attr_list *attr_list;
291 write_lock_irq(&rwlock);
292 attr_list = find_attr_list(root, key);
294 write_unlock_irq(&rwlock);
295 attr_list = kmalloc(sizeof *attr_list, GFP_KERNEL);
299 attr_list->iter.next = NULL;
300 attr_list->tail = &attr_list->iter;
301 attr_list->update_id = update_id;
302 memcpy(attr_list->gid.raw, key, sizeof attr_list->gid);
304 write_lock_irq(&rwlock);
305 err = insert_attr_list(root, attr_list);
307 write_unlock_irq(&rwlock);
311 } else if (attr_list->update_id != update_id) {
312 free_attr_list(attr_list);
313 attr_list->update_id = update_id;
316 attr_list->tail->next = iter;
318 attr_list->tail = iter;
319 write_unlock_irq(&rwlock);
323 static struct ib_sa_mad_iter *ib_sa_iter_create(struct ib_mad_recv_wc *mad_recv_wc)
325 struct ib_sa_mad_iter *iter;
326 struct ib_sa_mad *mad = (struct ib_sa_mad *) mad_recv_wc->recv_buf.mad;
327 int attr_size, attr_offset;
329 attr_offset = be16_to_cpu(mad->sa_hdr.attr_offset) * 8;
330 attr_size = 64; /* path record length */
331 if (attr_offset < attr_size)
332 return ERR_PTR(-EINVAL);
334 iter = kzalloc(sizeof *iter + attr_size, GFP_KERNEL);
336 return ERR_PTR(-ENOMEM);
338 iter->data_left = mad_recv_wc->mad_len - IB_MGMT_SA_HDR;
339 iter->recv_wc = mad_recv_wc;
340 iter->recv_buf = &mad_recv_wc->recv_buf;
341 iter->attr_offset = attr_offset;
342 iter->attr_size = attr_size;
346 static void ib_sa_iter_free(struct ib_sa_mad_iter *iter)
351 static void *ib_sa_iter_next(struct ib_sa_mad_iter *iter)
353 struct ib_sa_mad *mad;
354 int left, offset = 0;
356 while (iter->data_left >= iter->attr_offset) {
357 while (iter->data_offset < IB_MGMT_SA_DATA) {
358 mad = (struct ib_sa_mad *) iter->recv_buf->mad;
360 left = IB_MGMT_SA_DATA - iter->data_offset;
361 if (left < iter->attr_size) {
362 /* copy first piece of the attribute */
363 iter->attr = &iter->attr_data;
365 &mad->data[iter->data_offset], left);
369 /* copy the second piece of the attribute */
370 memcpy(iter->attr + offset, &mad->data[0],
371 iter->attr_size - offset);
372 iter->data_offset = iter->attr_size - offset;
375 iter->attr = &mad->data[iter->data_offset];
376 iter->data_offset += iter->attr_size;
379 iter->data_left -= iter->attr_offset;
382 iter->data_offset = 0;
383 iter->recv_buf = list_entry(iter->recv_buf->list.next,
384 struct ib_mad_recv_buf, list);
392 * Copy path records from a received response and insert them into our cache.
393 * A path record in the MADs are in network order, packed, and may
394 * span multiple MAD buffers, just to make our life hard.
396 static void update_path_db(struct sa_db_port *port,
397 struct ib_mad_recv_wc *mad_recv_wc,
398 enum sa_update_type type)
400 struct ib_sa_mad_iter *iter;
401 struct ib_path_rec_info *path_info;
405 iter = ib_sa_iter_create(mad_recv_wc);
409 port->update_id += (type == SA_UPDATE_FULL);
411 while ((attr = ib_sa_iter_next(iter)) &&
412 (path_info = kmalloc(sizeof *path_info, GFP_KERNEL))) {
414 ib_sa_unpack_attr(&path_info->rec, attr, IB_SA_ATTR_PATH_REC);
416 ret = insert_attr(&port->paths, port->update_id,
417 path_info->rec.dgid.raw, &path_info->iter);
423 ib_sa_iter_free(iter);
425 if (type == SA_UPDATE_FULL)
426 remove_old_attrs(&port->paths, port->update_id);
429 static struct ib_mad_send_buf *get_sa_msg(struct sa_db_port *port,
430 struct update_info *update)
432 struct ib_ah_attr ah_attr;
433 struct ib_mad_send_buf *msg;
435 msg = ib_create_send_mad(port->agent, 1, 0, 0, IB_MGMT_SA_HDR,
436 IB_MGMT_SA_DATA, GFP_KERNEL);
440 memset(&ah_attr, 0, sizeof ah_attr);
441 ah_attr.dlid = port->sm_lid;
442 ah_attr.sl = port->sm_sl;
443 ah_attr.port_num = port->port_num;
445 msg->ah = ib_create_ah(port->agent->qp->pd, &ah_attr);
446 if (IS_ERR(msg->ah)) {
447 ib_free_send_mad(msg);
451 msg->timeout_ms = retry_timer;
453 msg->context[0] = port;
454 msg->context[1] = update;
458 static __be64 form_tid(u32 hi_tid)
461 return cpu_to_be64((((u64) hi_tid) << 32) |
462 ((u32) atomic_inc_return(&tid)));
465 static void format_path_req(struct sa_db_port *port,
466 struct update_info *update,
467 struct ib_mad_send_buf *msg)
469 struct ib_sa_mad *mad = msg->mad;
470 struct ib_sa_path_rec path_rec;
472 mad->mad_hdr.base_version = IB_MGMT_BASE_VERSION;
473 mad->mad_hdr.mgmt_class = IB_MGMT_CLASS_SUBN_ADM;
474 mad->mad_hdr.class_version = IB_SA_CLASS_VERSION;
475 mad->mad_hdr.method = IB_SA_METHOD_GET_TABLE;
476 mad->mad_hdr.attr_id = cpu_to_be16(IB_SA_ATTR_PATH_REC);
477 mad->mad_hdr.tid = form_tid(msg->mad_agent->hi_tid);
479 mad->sa_hdr.comp_mask = IB_SA_PATH_REC_SGID | IB_SA_PATH_REC_NUMB_PATH;
481 path_rec.sgid = port->gid;
482 path_rec.numb_path = (u8) paths_per_dest;
484 if (update->type == SA_UPDATE_ADD) {
485 mad->sa_hdr.comp_mask |= IB_SA_PATH_REC_DGID;
486 memcpy(&path_rec.dgid, &update->gid, sizeof path_rec.dgid);
489 ib_sa_pack_attr(mad->data, &path_rec, IB_SA_ATTR_PATH_REC);
492 static int send_query(struct sa_db_port *port,
493 struct update_info *update)
497 port->msg = get_sa_msg(port, update);
501 format_path_req(port, update, port->msg);
503 ret = ib_post_send_mad(port->msg, NULL);
510 ib_destroy_ah(port->msg->ah);
511 ib_free_send_mad(port->msg);
515 static void add_update(struct sa_db_port *port, u8 *gid,
516 enum sa_update_type type)
518 struct update_info *update;
520 update = kmalloc(sizeof *update, GFP_KERNEL);
523 memcpy(&update->gid, gid, sizeof update->gid);
525 list_add(&update->list, &port->update_list);
528 if (port->state == SA_DB_IDLE) {
529 port->state = SA_DB_REFRESH;
530 process_updates(port);
534 static void clean_update_list(struct sa_db_port *port)
536 struct update_info *update;
538 while (!list_empty(&port->update_list)) {
539 update = list_entry(port->update_list.next,
540 struct update_info, list);
541 list_del(&update->list);
546 static int notice_handler(int status, struct ib_inform_info *info,
547 struct ib_sa_notice *notice)
549 struct sa_db_port *port = info->context;
550 struct ib_sa_notice_data_gid *gid_data;
551 struct ib_inform_info **pinfo;
552 enum sa_update_type type;
554 if (info->trap_number == IB_SA_SM_TRAP_GID_IN_SERVICE) {
555 pinfo = &port->in_info;
556 type = SA_UPDATE_ADD;
558 pinfo = &port->out_info;
559 type = SA_UPDATE_REMOVE;
563 if (port->state == SA_DB_DESTROY || !*pinfo) {
569 gid_data = (struct ib_sa_notice_data_gid *)
570 ¬ice->data_details;
571 add_update(port, gid_data->gid, type);
573 } else if (status == -ENETRESET) {
578 *pinfo = ERR_PTR(-EINVAL);
579 port->state = SA_DB_IDLE;
580 clean_update_list(port);
582 queue_work(sa_wq, &port->work);
588 static int reg_in_info(struct sa_db_port *port)
592 port->in_info = ib_sa_register_inform_info(&sa_client,
595 IB_SA_SM_TRAP_GID_IN_SERVICE,
596 GFP_KERNEL, notice_handler,
598 if (IS_ERR(port->in_info))
599 ret = PTR_ERR(port->in_info);
604 static int reg_out_info(struct sa_db_port *port)
608 port->out_info = ib_sa_register_inform_info(&sa_client,
611 IB_SA_SM_TRAP_GID_OUT_OF_SERVICE,
612 GFP_KERNEL, notice_handler,
614 if (IS_ERR(port->out_info))
615 ret = PTR_ERR(port->out_info);
620 static void unsubscribe_port(struct sa_db_port *port)
622 if (port->in_info && !IS_ERR(port->in_info))
623 ib_sa_unregister_inform_info(port->in_info);
625 if (port->out_info && !IS_ERR(port->out_info))
626 ib_sa_unregister_inform_info(port->out_info);
628 port->out_info = NULL;
629 port->in_info = NULL;
633 static void cleanup_port(struct sa_db_port *port)
635 unsubscribe_port(port);
637 clean_update_list(port);
638 remove_all_attrs(&port->paths);
641 static int update_port_info(struct sa_db_port *port)
643 struct ib_port_attr port_attr;
646 ret = ib_query_port(port->dev->device, port->port_num, &port_attr);
650 if (port_attr.state != IB_PORT_ACTIVE)
653 port->sm_lid = port_attr.sm_lid;
654 port->sm_sl = port_attr.sm_sl;
658 static void process_updates(struct sa_db_port *port)
660 struct update_info *update;
661 struct ib_sa_attr_list *attr_list;
664 if (!paths_per_dest || update_port_info(port)) {
669 /* Event registration is an optimization, so ignore failures. */
670 if (subscribe_inform_info) {
671 if (!port->out_info) {
672 ret = reg_out_info(port);
677 if (!port->in_info) {
678 ret = reg_in_info(port);
683 unsubscribe_port(port);
685 while (!list_empty(&port->update_list)) {
686 update = list_entry(port->update_list.next,
687 struct update_info, list);
689 if (update->type == SA_UPDATE_REMOVE) {
690 write_lock_irq(&rwlock);
691 attr_list = find_attr_list(&port->paths,
694 remove_attr(&port->paths, attr_list);
695 write_unlock_irq(&rwlock);
697 ret = send_query(port, update);
702 list_del(&update->list);
706 port->state = SA_DB_IDLE;
709 static void refresh_port_db(struct sa_db_port *port)
711 if (port->state == SA_DB_DESTROY)
714 if (port->state == SA_DB_REFRESH) {
715 clean_update_list(port);
716 ib_cancel_mad(port->agent, port->msg);
719 add_update(port, NULL, SA_UPDATE_FULL);
722 static void refresh_dev_db(struct sa_db_device *dev)
726 for (i = 0; i < dev->port_count; i++)
727 refresh_port_db(&dev->port[i]);
730 static void refresh_db(void)
732 struct sa_db_device *dev;
734 list_for_each_entry(dev, &dev_list, list)
738 static int do_refresh(const char *val, struct kernel_param *kp)
746 static int get_lookup_method(char *buf, struct kernel_param *kp)
749 "%c %d round robin\n"
751 (lookup_method == SA_DB_LOOKUP_LEAST_USED) ? '*' : ' ',
752 SA_DB_LOOKUP_LEAST_USED,
753 (lookup_method == SA_DB_LOOKUP_RANDOM) ? '*' : ' ',
754 SA_DB_LOOKUP_RANDOM);
757 static int set_lookup_method(const char *val, struct kernel_param *kp)
759 unsigned long method;
762 method = simple_strtoul(val, NULL, 0);
765 case SA_DB_LOOKUP_LEAST_USED:
766 case SA_DB_LOOKUP_RANDOM:
767 lookup_method = method;
777 static int set_paths_per_dest(const char *val, struct kernel_param *kp)
782 ret = param_set_ulong(val, kp);
786 if (paths_per_dest > SA_DB_MAX_PATHS_PER_DEST)
787 paths_per_dest = SA_DB_MAX_PATHS_PER_DEST;
794 static int set_subscribe_inform_info(const char *val, struct kernel_param *kp)
798 ret = param_set_bool(val, kp);
802 return do_refresh(val, kp);
805 static void port_work_handler(struct work_struct *work)
807 struct sa_db_port *port;
809 port = container_of(work, typeof(*port), work);
811 refresh_port_db(port);
815 static void handle_event(struct ib_event_handler *event_handler,
816 struct ib_event *event)
818 struct sa_db_device *dev;
819 struct sa_db_port *port;
821 dev = container_of(event_handler, typeof(*dev), event_handler);
822 port = &dev->port[event->element.port_num - dev->start_port];
824 switch (event->event) {
825 case IB_EVENT_PORT_ERR:
826 case IB_EVENT_LID_CHANGE:
827 case IB_EVENT_SM_CHANGE:
828 case IB_EVENT_CLIENT_REREGISTER:
829 case IB_EVENT_PKEY_CHANGE:
830 case IB_EVENT_PORT_ACTIVE:
831 queue_work(sa_wq, &port->work);
838 static void ib_free_path_iter(struct ib_sa_attr_iter *iter)
840 read_unlock_irqrestore(&rwlock, iter->flags);
843 static int ib_create_path_iter(struct ib_device *device, u8 port_num,
844 union ib_gid *dgid, struct ib_sa_attr_iter *iter)
846 struct sa_db_device *dev;
847 struct sa_db_port *port;
848 struct ib_sa_attr_list *list;
850 dev = ib_get_client_data(device, &sa_db_client);
854 port = &dev->port[port_num - dev->start_port];
856 read_lock_irqsave(&rwlock, iter->flags);
857 list = find_attr_list(&port->paths, dgid->raw);
859 ib_free_path_iter(iter);
863 iter->iter = &list->iter;
867 static struct ib_sa_path_rec *ib_get_next_path(struct ib_sa_attr_iter *iter)
869 struct ib_path_rec_info *next_path;
871 iter->iter = iter->iter->next;
873 next_path = container_of(iter->iter, struct ib_path_rec_info, iter);
874 return &next_path->rec;
879 static int cmp_rec(struct ib_sa_path_rec *src,
880 struct ib_sa_path_rec *dst, ib_sa_comp_mask comp_mask)
882 /* DGID check already done */
883 if (comp_mask & IB_SA_PATH_REC_SGID &&
884 memcmp(&src->sgid, &dst->sgid, sizeof src->sgid))
886 if (comp_mask & IB_SA_PATH_REC_DLID && src->dlid != dst->dlid)
888 if (comp_mask & IB_SA_PATH_REC_SLID && src->slid != dst->slid)
890 if (comp_mask & IB_SA_PATH_REC_RAW_TRAFFIC &&
891 src->raw_traffic != dst->raw_traffic)
894 if (comp_mask & IB_SA_PATH_REC_FLOW_LABEL &&
895 src->flow_label != dst->flow_label)
897 if (comp_mask & IB_SA_PATH_REC_HOP_LIMIT &&
898 src->hop_limit != dst->hop_limit)
900 if (comp_mask & IB_SA_PATH_REC_TRAFFIC_CLASS &&
901 src->traffic_class != dst->traffic_class)
903 if (comp_mask & IB_SA_PATH_REC_REVERSIBLE &&
904 dst->reversible && !src->reversible)
906 /* Numb path check already done */
907 if (comp_mask & IB_SA_PATH_REC_PKEY && src->pkey != dst->pkey)
910 if (comp_mask & IB_SA_PATH_REC_SL && src->sl != dst->sl)
913 if (ib_sa_check_selector(comp_mask, IB_SA_PATH_REC_MTU_SELECTOR,
914 IB_SA_PATH_REC_MTU, dst->mtu_selector,
917 if (ib_sa_check_selector(comp_mask, IB_SA_PATH_REC_RATE_SELECTOR,
918 IB_SA_PATH_REC_RATE, dst->rate_selector,
919 src->rate, dst->rate))
921 if (ib_sa_check_selector(comp_mask,
922 IB_SA_PATH_REC_PACKET_LIFE_TIME_SELECTOR,
923 IB_SA_PATH_REC_PACKET_LIFE_TIME,
924 dst->packet_life_time_selector,
925 src->packet_life_time, dst->packet_life_time))
931 static struct ib_sa_path_rec *get_random_path(struct ib_sa_attr_iter *iter,
932 struct ib_sa_path_rec *req_path,
933 ib_sa_comp_mask comp_mask)
935 struct ib_sa_path_rec *path, *rand_path = NULL;
938 for (path = ib_get_next_path(iter); path;
939 path = ib_get_next_path(iter)) {
940 if (!cmp_rec(path, req_path, comp_mask)) {
941 get_random_bytes(&num, sizeof num);
942 if ((num % ++count) == 0)
950 static struct ib_sa_path_rec *get_next_path(struct ib_sa_attr_iter *iter,
951 struct ib_sa_path_rec *req_path,
952 ib_sa_comp_mask comp_mask)
954 struct ib_path_rec_info *cur_path, *next_path = NULL;
955 struct ib_sa_path_rec *path;
956 unsigned long lookups = ~0;
958 for (path = ib_get_next_path(iter); path;
959 path = ib_get_next_path(iter)) {
960 if (!cmp_rec(path, req_path, comp_mask)) {
962 cur_path = container_of(iter->iter, struct ib_path_rec_info,
964 if (cur_path->lookups < lookups) {
965 lookups = cur_path->lookups;
966 next_path = cur_path;
972 next_path->lookups++;
973 return &next_path->rec;
978 static void report_path(struct work_struct *work)
980 struct sa_path_request *req;
982 req = container_of(work, struct sa_path_request, work);
983 req->callback(0, &req->path_rec, req->context);
984 ib_sa_client_put(req->client);
989 * ib_sa_path_rec_get - Start a Path get query
991 * @device:device to send query on
992 * @port_num: port number to send query on
993 * @rec:Path Record to send in query
994 * @comp_mask:component mask to send in query
995 * @timeout_ms:time to wait for response
996 * @gfp_mask:GFP mask to use for internal allocations
997 * @callback:function called when query completes, times out or is
999 * @context:opaque user context passed to callback
1000 * @sa_query:query context, used to cancel query
1002 * Send a Path Record Get query to the SA to look up a path. The
1003 * callback function will be called when the query completes (or
1004 * fails); status is 0 for a successful response, -EINTR if the query
1005 * is canceled, -ETIMEDOUT is the query timed out, or -EIO if an error
1006 * occurred sending the query. The resp parameter of the callback is
1007 * only valid if status is 0.
1009 * If the return value of ib_sa_path_rec_get() is negative, it is an
1010 * error code. Otherwise it is a query ID that can be used to cancel
1013 int ib_sa_path_rec_get(struct ib_sa_client *client,
1014 struct ib_device *device, u8 port_num,
1015 struct ib_sa_path_rec *rec,
1016 ib_sa_comp_mask comp_mask,
1017 int timeout_ms, gfp_t gfp_mask,
1018 void (*callback)(int status,
1019 struct ib_sa_path_rec *resp,
1022 struct ib_sa_query **sa_query)
1024 struct sa_path_request *req;
1025 struct ib_sa_attr_iter iter;
1026 struct ib_sa_path_rec *path_rec;
1029 if (!paths_per_dest)
1032 if (!(comp_mask & IB_SA_PATH_REC_DGID) ||
1033 !(comp_mask & IB_SA_PATH_REC_NUMB_PATH) || rec->numb_path != 1)
1036 req = kmalloc(sizeof *req, gfp_mask);
1040 ret = ib_create_path_iter(device, port_num, &rec->dgid, &iter);
1044 if (lookup_method == SA_DB_LOOKUP_RANDOM)
1045 path_rec = get_random_path(&iter, rec, comp_mask);
1047 path_rec = get_next_path(&iter, rec, comp_mask);
1052 memcpy(&req->path_rec, path_rec, sizeof *path_rec);
1053 ib_free_path_iter(&iter);
1055 INIT_WORK(&req->work, report_path);
1056 req->client = client;
1057 req->callback = callback;
1058 req->context = context;
1060 ib_sa_client_get(client);
1061 queue_work(sa_wq, &req->work);
1062 *sa_query = ERR_PTR(-EEXIST);
1066 ib_free_path_iter(&iter);
1070 return ib_sa_path_rec_query(client, device, port_num, rec, comp_mask,
1071 timeout_ms, gfp_mask, callback, context,
1074 EXPORT_SYMBOL(ib_sa_path_rec_get);
1076 static void recv_handler(struct ib_mad_agent *mad_agent,
1077 struct ib_mad_recv_wc *mad_recv_wc)
1079 struct sa_db_port *port;
1080 struct update_info *update;
1081 struct ib_mad_send_buf *msg;
1082 enum sa_update_type type;
1084 msg = (struct ib_mad_send_buf *) (unsigned long) mad_recv_wc->wc->wr_id;
1085 port = msg->context[0];
1086 update = msg->context[1];
1089 if (port->state == SA_DB_DESTROY ||
1090 update != list_entry(port->update_list.next,
1091 struct update_info, list)) {
1092 mutex_unlock(&lock);
1094 type = update->type;
1095 mutex_unlock(&lock);
1096 update_path_db(mad_agent->context, mad_recv_wc, type);
1099 ib_free_recv_mad(mad_recv_wc);
1102 static void send_handler(struct ib_mad_agent *agent,
1103 struct ib_mad_send_wc *mad_send_wc)
1105 struct ib_mad_send_buf *msg;
1106 struct sa_db_port *port;
1107 struct update_info *update;
1110 msg = mad_send_wc->send_buf;
1111 port = msg->context[0];
1112 update = msg->context[1];
1115 if (port->state == SA_DB_DESTROY)
1118 if (update == list_entry(port->update_list.next,
1119 struct update_info, list)) {
1121 if (mad_send_wc->status == IB_WC_RESP_TIMEOUT_ERR &&
1122 msg->timeout_ms < SA_DB_MAX_RETRY_TIMER) {
1124 msg->timeout_ms <<= 1;
1125 ret = ib_post_send_mad(msg, NULL);
1127 mutex_unlock(&lock);
1131 list_del(&update->list);
1134 process_updates(port);
1136 mutex_unlock(&lock);
1138 ib_destroy_ah(msg->ah);
1139 ib_free_send_mad(msg);
1142 static int init_port(struct sa_db_device *dev, int port_num)
1144 struct sa_db_port *port;
1147 port = &dev->port[port_num - dev->start_port];
1149 port->port_num = port_num;
1150 INIT_WORK(&port->work, port_work_handler);
1151 port->paths = RB_ROOT;
1152 INIT_LIST_HEAD(&port->update_list);
1154 ret = ib_get_cached_gid(dev->device, port_num, 0, &port->gid);
1158 port->agent = ib_register_mad_agent(dev->device, port_num, IB_QPT_GSI,
1159 NULL, IB_MGMT_RMPP_VERSION,
1160 send_handler, recv_handler, port);
1161 if (IS_ERR(port->agent))
1162 ret = PTR_ERR(port->agent);
1167 static void destroy_port(struct sa_db_port *port)
1170 port->state = SA_DB_DESTROY;
1171 mutex_unlock(&lock);
1173 ib_unregister_mad_agent(port->agent);
1175 flush_workqueue(sa_wq);
1178 static void sa_db_add_dev(struct ib_device *device)
1180 struct sa_db_device *dev;
1181 struct sa_db_port *port;
1184 if (rdma_node_get_transport(device->node_type) != RDMA_TRANSPORT_IB)
1187 if (device->node_type == RDMA_NODE_IB_SWITCH) {
1191 e = device->phys_port_cnt;
1194 dev = kzalloc(sizeof *dev + (e - s + 1) * sizeof *port, GFP_KERNEL);
1198 dev->start_port = s;
1199 dev->port_count = e - s + 1;
1200 dev->device = device;
1201 for (i = 0; i < dev->port_count; i++) {
1202 ret = init_port(dev, s + i);
1207 ib_set_client_data(device, &sa_db_client, dev);
1209 INIT_IB_EVENT_HANDLER(&dev->event_handler, device, handle_event);
1212 list_add_tail(&dev->list, &dev_list);
1213 refresh_dev_db(dev);
1214 mutex_unlock(&lock);
1216 ib_register_event_handler(&dev->event_handler);
1220 destroy_port(&dev->port[i]);
1224 static void sa_db_remove_dev(struct ib_device *device)
1226 struct sa_db_device *dev;
1229 dev = ib_get_client_data(device, &sa_db_client);
1233 ib_unregister_event_handler(&dev->event_handler);
1234 flush_workqueue(sa_wq);
1236 for (i = 0; i < dev->port_count; i++)
1237 destroy_port(&dev->port[i]);
1240 list_del(&dev->list);
1241 mutex_unlock(&lock);
1246 int sa_db_init(void)
1250 rwlock_init(&rwlock);
1251 sa_wq = create_singlethread_workqueue("local_sa");
1255 ib_sa_register_client(&sa_client);
1256 ret = ib_register_client(&sa_db_client);
1263 ib_sa_unregister_client(&sa_client);
1264 destroy_workqueue(sa_wq);
1268 void sa_db_cleanup(void)
1270 ib_unregister_client(&sa_db_client);
1271 ib_sa_unregister_client(&sa_client);
1272 destroy_workqueue(sa_wq);