]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/ofed/drivers/infiniband/core/ib_roce_gid_mgmt.c
Include eventhandler.h in more compilation units
[FreeBSD/FreeBSD.git] / sys / ofed / drivers / infiniband / core / ib_roce_gid_mgmt.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2015-2017, Mellanox Technologies inc.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include "core_priv.h"
39 #include <sys/eventhandler.h>
40
41 #include <linux/in.h>
42 #include <linux/in6.h>
43 #include <linux/rcupdate.h>
44
45 #include <rdma/ib_cache.h>
46 #include <rdma/ib_addr.h>
47
48 #include <netinet6/scope6_var.h>
49
50 static struct workqueue_struct *roce_gid_mgmt_wq;
51
52 enum gid_op_type {
53         GID_DEL = 0,
54         GID_ADD
55 };
56
57 struct roce_netdev_event_work {
58         struct work_struct work;
59         struct net_device *ndev;
60 };
61
62 struct roce_rescan_work {
63         struct work_struct      work;
64         struct ib_device        *ib_dev;
65 };
66
67 static const struct {
68         bool (*is_supported)(const struct ib_device *device, u8 port_num);
69         enum ib_gid_type gid_type;
70 } PORT_CAP_TO_GID_TYPE[] = {
71         {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
72         {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
73 };
74
75 #define CAP_TO_GID_TABLE_SIZE   ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
76
77 unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
78 {
79         int i;
80         unsigned int ret_flags = 0;
81
82         if (!rdma_protocol_roce(ib_dev, port))
83                 return 1UL << IB_GID_TYPE_IB;
84
85         for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
86                 if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
87                         ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
88
89         return ret_flags;
90 }
91 EXPORT_SYMBOL(roce_gid_type_mask_support);
92
93 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
94     u8 port, union ib_gid *gid, struct net_device *ndev)
95 {
96         int i;
97         unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
98         struct ib_gid_attr gid_attr;
99
100         memset(&gid_attr, 0, sizeof(gid_attr));
101         gid_attr.ndev = ndev;
102
103         for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
104                 if ((1UL << i) & gid_type_mask) {
105                         gid_attr.gid_type = i;
106                         switch (gid_op) {
107                         case GID_ADD:
108                                 ib_cache_gid_add(ib_dev, port,
109                                                  gid, &gid_attr);
110                                 break;
111                         case GID_DEL:
112                                 ib_cache_gid_del(ib_dev, port,
113                                                  gid, &gid_attr);
114                                 break;
115                         }
116                 }
117         }
118 }
119
120 static int
121 roce_gid_match_netdev(struct ib_device *ib_dev, u8 port,
122     struct net_device *idev, void *cookie)
123 {
124         struct net_device *ndev = (struct net_device *)cookie;
125         if (idev == NULL)
126                 return (0);
127         return (ndev == idev);
128 }
129
130 static int
131 roce_gid_match_all(struct ib_device *ib_dev, u8 port,
132     struct net_device *idev, void *cookie)
133 {
134         if (idev == NULL)
135                 return (0);
136         return (1);
137 }
138
139 static int
140 roce_gid_enum_netdev_default(struct ib_device *ib_dev,
141     u8 port, struct net_device *idev)
142 {
143         unsigned long gid_type_mask;
144
145         gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
146
147         ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask,
148                                      IB_CACHE_GID_DEFAULT_MODE_SET);
149
150         return (hweight_long(gid_type_mask));
151 }
152
153 static void
154 roce_gid_update_addr_callback(struct ib_device *device, u8 port,
155     struct net_device *ndev, void *cookie)
156 {
157         struct ipx_entry {
158                 STAILQ_ENTRY(ipx_entry) entry;
159                 union ipx_addr {
160                         struct sockaddr sa[0];
161                         struct sockaddr_in v4;
162                         struct sockaddr_in6 v6;
163                 } ipx_addr;
164                 struct net_device *ndev;
165         };
166         struct ipx_entry *entry;
167         struct net_device *idev;
168 #if defined(INET) || defined(INET6)
169         struct ifaddr *ifa;
170 #endif
171         VNET_ITERATOR_DECL(vnet_iter);
172         struct ib_gid_attr gid_attr;
173         union ib_gid gid;
174         int default_gids;
175         u16 index_num;
176         int i;
177
178         STAILQ_HEAD(, ipx_entry) ipx_head;
179
180         STAILQ_INIT(&ipx_head);
181
182         /* make sure default GIDs are in */
183         default_gids = roce_gid_enum_netdev_default(device, port, ndev);
184
185         VNET_LIST_RLOCK();
186         VNET_FOREACH(vnet_iter) {
187             CURVNET_SET(vnet_iter);
188             IFNET_RLOCK();
189             CK_STAILQ_FOREACH(idev, &V_ifnet, if_link) {
190                 struct epoch_tracker et;
191
192                 if (idev != ndev) {
193                         if (idev->if_type != IFT_L2VLAN)
194                                 continue;
195                         if (ndev != rdma_vlan_dev_real_dev(idev))
196                                 continue;
197                 }
198
199                 /* clone address information for IPv4 and IPv6 */
200                 NET_EPOCH_ENTER(et);
201 #if defined(INET)
202                 CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
203                         if (ifa->ifa_addr == NULL ||
204                             ifa->ifa_addr->sa_family != AF_INET)
205                                 continue;
206                         entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
207                         if (entry == NULL) {
208                                 pr_warn("roce_gid_update_addr_callback: "
209                                     "couldn't allocate entry for IPv4 update\n");
210                                 continue;
211                         }
212                         entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr);
213                         entry->ndev = idev;
214                         STAILQ_INSERT_TAIL(&ipx_head, entry, entry);
215                 }
216 #endif
217 #if defined(INET6)
218                 CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
219                         if (ifa->ifa_addr == NULL ||
220                             ifa->ifa_addr->sa_family != AF_INET6)
221                                 continue;
222                         entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
223                         if (entry == NULL) {
224                                 pr_warn("roce_gid_update_addr_callback: "
225                                     "couldn't allocate entry for IPv6 update\n");
226                                 continue;
227                         }
228                         entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr);
229                         entry->ndev = idev;
230
231                         /* trash IPv6 scope ID */
232                         sa6_recoverscope(&entry->ipx_addr.v6);
233                         entry->ipx_addr.v6.sin6_scope_id = 0;
234
235                         STAILQ_INSERT_TAIL(&ipx_head, entry, entry);
236                 }
237 #endif
238                 NET_EPOCH_EXIT(et);
239             }
240             IFNET_RUNLOCK();
241             CURVNET_RESTORE();
242         }
243         VNET_LIST_RUNLOCK();
244
245         /* add missing GIDs, if any */
246         STAILQ_FOREACH(entry, &ipx_head, entry) {
247                 unsigned long gid_type_mask = roce_gid_type_mask_support(device, port);
248
249                 if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0)
250                         continue;
251
252                 for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
253                         if (!((1UL << i) & gid_type_mask))
254                                 continue;
255                         /* check if entry found */
256                         if (ib_find_cached_gid_by_port(device, &gid, i,
257                             port, entry->ndev, &index_num) == 0)
258                                 break;
259                 }
260                 if (i != IB_GID_TYPE_SIZE)
261                         continue;
262                 /* add new GID */
263                 update_gid(GID_ADD, device, port, &gid, entry->ndev);
264         }
265
266         /* remove stale GIDs, if any */
267         for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, &gid_attr) == 0; i++) {
268                 union ipx_addr ipx;
269
270                 /* check for valid network device pointer */
271                 ndev = gid_attr.ndev;
272                 if (ndev == NULL)
273                         continue;
274                 dev_put(ndev);
275
276                 /* don't delete empty entries */
277                 if (memcmp(&gid, &zgid, sizeof(zgid)) == 0)
278                         continue;
279
280                 /* zero default */
281                 memset(&ipx, 0, sizeof(ipx));
282
283                 rdma_gid2ip(&ipx.sa[0], &gid);
284
285                 STAILQ_FOREACH(entry, &ipx_head, entry) {
286                         if (entry->ndev == ndev &&
287                             memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0)
288                                 break;
289                 }
290                 /* check if entry found */
291                 if (entry != NULL)
292                         continue;
293
294                 /* remove GID */
295                 update_gid(GID_DEL, device, port, &gid, ndev);
296         }
297
298         while ((entry = STAILQ_FIRST(&ipx_head))) {
299                 STAILQ_REMOVE_HEAD(&ipx_head, entry);
300                 kfree(entry);
301         }
302 }
303
304 static void
305 roce_gid_queue_scan_event_handler(struct work_struct *_work)
306 {
307         struct roce_netdev_event_work *work =
308                 container_of(_work, struct roce_netdev_event_work, work);
309
310         ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev,
311             roce_gid_update_addr_callback, NULL);
312
313         dev_put(work->ndev);
314         kfree(work);
315 }
316
317 static void
318 roce_gid_queue_scan_event(struct net_device *ndev)
319 {
320         struct roce_netdev_event_work *work;
321
322 retry:
323         switch (ndev->if_type) {
324         case IFT_ETHER:
325                 break;
326         case IFT_L2VLAN:
327                 ndev = rdma_vlan_dev_real_dev(ndev);
328                 if (ndev != NULL)
329                         goto retry;
330                 /* FALLTHROUGH */
331         default:
332                 return;
333         }
334
335         work = kmalloc(sizeof(*work), GFP_ATOMIC);
336         if (!work) {
337                 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
338                 return;
339         }
340
341         INIT_WORK(&work->work, roce_gid_queue_scan_event_handler);
342         dev_hold(ndev);
343
344         work->ndev = ndev;
345
346         queue_work(roce_gid_mgmt_wq, &work->work);
347 }
348
349 static void
350 roce_gid_delete_all_event_handler(struct work_struct *_work)
351 {
352         struct roce_netdev_event_work *work =
353                 container_of(_work, struct roce_netdev_event_work, work);
354
355         ib_cache_gid_del_all_by_netdev(work->ndev);
356         dev_put(work->ndev);
357         kfree(work);
358 }
359
360 static void
361 roce_gid_delete_all_event(struct net_device *ndev)
362 {
363         struct roce_netdev_event_work *work;
364
365         work = kmalloc(sizeof(*work), GFP_ATOMIC);
366         if (!work) {
367                 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
368                 return;
369         }
370
371         INIT_WORK(&work->work, roce_gid_delete_all_event_handler);
372         dev_hold(ndev);
373         work->ndev = ndev;
374         queue_work(roce_gid_mgmt_wq, &work->work);
375
376         /* make sure job is complete before returning */
377         flush_workqueue(roce_gid_mgmt_wq);
378 }
379
380 static int
381 inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
382 {
383         struct net_device *ndev = ptr;
384
385         switch (event) {
386         case NETDEV_UNREGISTER:
387                 roce_gid_delete_all_event(ndev);
388                 break;
389         case NETDEV_REGISTER:
390         case NETDEV_CHANGEADDR:
391         case NETDEV_CHANGEIFADDR:
392                 roce_gid_queue_scan_event(ndev);
393                 break;
394         default:
395                 break;
396         }
397         return NOTIFY_DONE;
398 }
399
400 static struct notifier_block nb_inetaddr = {
401         .notifier_call = inetaddr_event
402 };
403
404 static eventhandler_tag eh_ifnet_event;
405
406 static void
407 roce_ifnet_event(void *arg, struct ifnet *ifp, int event)
408 {
409         if (event != IFNET_EVENT_PCP || is_vlan_dev(ifp))
410                 return;
411
412         /* make sure GID table is reloaded */
413         roce_gid_delete_all_event(ifp);
414         roce_gid_queue_scan_event(ifp);
415 }
416
417 static void
418 roce_rescan_device_handler(struct work_struct *_work)
419 {
420         struct roce_rescan_work *work =
421             container_of(_work, struct roce_rescan_work, work);
422
423         ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL,
424             roce_gid_update_addr_callback, NULL);
425         kfree(work);
426 }
427
428 /* Caller must flush system workqueue before removing the ib_device */
429 int roce_rescan_device(struct ib_device *ib_dev)
430 {
431         struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL);
432
433         if (!work)
434                 return -ENOMEM;
435
436         work->ib_dev = ib_dev;
437         INIT_WORK(&work->work, roce_rescan_device_handler);
438         queue_work(roce_gid_mgmt_wq, &work->work);
439
440         return 0;
441 }
442
443 int __init roce_gid_mgmt_init(void)
444 {
445         roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0);
446         if (!roce_gid_mgmt_wq) {
447                 pr_warn("roce_gid_mgmt: can't allocate work queue\n");
448                 return -ENOMEM;
449         }
450
451         register_inetaddr_notifier(&nb_inetaddr);
452
453         /*
454          * We rely on the netdevice notifier to enumerate all existing
455          * devices in the system. Register to this notifier last to
456          * make sure we will not miss any IP add/del callbacks.
457          */
458         register_netdevice_notifier(&nb_inetaddr);
459
460         eh_ifnet_event = EVENTHANDLER_REGISTER(ifnet_event,
461             roce_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
462
463         return 0;
464 }
465
466 void __exit roce_gid_mgmt_cleanup(void)
467 {
468
469         if (eh_ifnet_event != NULL)
470                 EVENTHANDLER_DEREGISTER(ifnet_event, eh_ifnet_event);
471
472         unregister_inetaddr_notifier(&nb_inetaddr);
473         unregister_netdevice_notifier(&nb_inetaddr);
474
475         /*
476          * Ensure all gid deletion tasks complete before we go down,
477          * to avoid any reference to free'd memory. By the time
478          * ib-core is removed, all physical devices have been removed,
479          * so no issue with remaining hardware contexts.
480          */
481         synchronize_rcu();
482         drain_workqueue(roce_gid_mgmt_wq);
483         destroy_workqueue(roce_gid_mgmt_wq);
484 }