]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/ofed/drivers/infiniband/core/ib_roce_gid_mgmt.c
Mechanical cleanup of epoch(9) usage in network stack.
[FreeBSD/FreeBSD.git] / sys / ofed / drivers / infiniband / core / ib_roce_gid_mgmt.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause OR GPL-2.0
3  *
4  * Copyright (c) 2015-2017, Mellanox Technologies inc.  All rights reserved.
5  *
6  * This software is available to you under a choice of one of two
7  * licenses.  You may choose to be licensed under the terms of the GNU
8  * General Public License (GPL) Version 2, available from the file
9  * COPYING in the main directory of this source tree, or the
10  * OpenIB.org BSD license below:
11  *
12  *     Redistribution and use in source and binary forms, with or
13  *     without modification, are permitted provided that the following
14  *     conditions are met:
15  *
16  *      - Redistributions of source code must retain the above
17  *        copyright notice, this list of conditions and the following
18  *        disclaimer.
19  *
20  *      - Redistributions in binary form must reproduce the above
21  *        copyright notice, this list of conditions and the following
22  *        disclaimer in the documentation and/or other materials
23  *        provided with the distribution.
24  *
25  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32  * SOFTWARE.
33  */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include "core_priv.h"
39
40 #include <linux/in.h>
41 #include <linux/in6.h>
42 #include <linux/rcupdate.h>
43
44 #include <rdma/ib_cache.h>
45 #include <rdma/ib_addr.h>
46
47 #include <netinet6/scope6_var.h>
48
49 static struct workqueue_struct *roce_gid_mgmt_wq;
50
51 enum gid_op_type {
52         GID_DEL = 0,
53         GID_ADD
54 };
55
56 struct roce_netdev_event_work {
57         struct work_struct work;
58         struct net_device *ndev;
59 };
60
61 struct roce_rescan_work {
62         struct work_struct      work;
63         struct ib_device        *ib_dev;
64 };
65
66 static const struct {
67         bool (*is_supported)(const struct ib_device *device, u8 port_num);
68         enum ib_gid_type gid_type;
69 } PORT_CAP_TO_GID_TYPE[] = {
70         {rdma_protocol_roce_eth_encap, IB_GID_TYPE_ROCE},
71         {rdma_protocol_roce_udp_encap, IB_GID_TYPE_ROCE_UDP_ENCAP},
72 };
73
74 #define CAP_TO_GID_TABLE_SIZE   ARRAY_SIZE(PORT_CAP_TO_GID_TYPE)
75
76 unsigned long roce_gid_type_mask_support(struct ib_device *ib_dev, u8 port)
77 {
78         int i;
79         unsigned int ret_flags = 0;
80
81         if (!rdma_protocol_roce(ib_dev, port))
82                 return 1UL << IB_GID_TYPE_IB;
83
84         for (i = 0; i < CAP_TO_GID_TABLE_SIZE; i++)
85                 if (PORT_CAP_TO_GID_TYPE[i].is_supported(ib_dev, port))
86                         ret_flags |= 1UL << PORT_CAP_TO_GID_TYPE[i].gid_type;
87
88         return ret_flags;
89 }
90 EXPORT_SYMBOL(roce_gid_type_mask_support);
91
92 static void update_gid(enum gid_op_type gid_op, struct ib_device *ib_dev,
93     u8 port, union ib_gid *gid, struct net_device *ndev)
94 {
95         int i;
96         unsigned long gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
97         struct ib_gid_attr gid_attr;
98
99         memset(&gid_attr, 0, sizeof(gid_attr));
100         gid_attr.ndev = ndev;
101
102         for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
103                 if ((1UL << i) & gid_type_mask) {
104                         gid_attr.gid_type = i;
105                         switch (gid_op) {
106                         case GID_ADD:
107                                 ib_cache_gid_add(ib_dev, port,
108                                                  gid, &gid_attr);
109                                 break;
110                         case GID_DEL:
111                                 ib_cache_gid_del(ib_dev, port,
112                                                  gid, &gid_attr);
113                                 break;
114                         }
115                 }
116         }
117 }
118
119 static int
120 roce_gid_match_netdev(struct ib_device *ib_dev, u8 port,
121     struct net_device *idev, void *cookie)
122 {
123         struct net_device *ndev = (struct net_device *)cookie;
124         if (idev == NULL)
125                 return (0);
126         return (ndev == idev);
127 }
128
129 static int
130 roce_gid_match_all(struct ib_device *ib_dev, u8 port,
131     struct net_device *idev, void *cookie)
132 {
133         if (idev == NULL)
134                 return (0);
135         return (1);
136 }
137
138 static int
139 roce_gid_enum_netdev_default(struct ib_device *ib_dev,
140     u8 port, struct net_device *idev)
141 {
142         unsigned long gid_type_mask;
143
144         gid_type_mask = roce_gid_type_mask_support(ib_dev, port);
145
146         ib_cache_gid_set_default_gid(ib_dev, port, idev, gid_type_mask,
147                                      IB_CACHE_GID_DEFAULT_MODE_SET);
148
149         return (hweight_long(gid_type_mask));
150 }
151
152 static void
153 roce_gid_update_addr_callback(struct ib_device *device, u8 port,
154     struct net_device *ndev, void *cookie)
155 {
156         struct ipx_entry {
157                 STAILQ_ENTRY(ipx_entry) entry;
158                 union ipx_addr {
159                         struct sockaddr sa[0];
160                         struct sockaddr_in v4;
161                         struct sockaddr_in6 v6;
162                 } ipx_addr;
163                 struct net_device *ndev;
164         };
165         struct ipx_entry *entry;
166         struct net_device *idev;
167 #if defined(INET) || defined(INET6)
168         struct ifaddr *ifa;
169 #endif
170         VNET_ITERATOR_DECL(vnet_iter);
171         struct ib_gid_attr gid_attr;
172         union ib_gid gid;
173         int default_gids;
174         u16 index_num;
175         int i;
176
177         STAILQ_HEAD(, ipx_entry) ipx_head;
178
179         STAILQ_INIT(&ipx_head);
180
181         /* make sure default GIDs are in */
182         default_gids = roce_gid_enum_netdev_default(device, port, ndev);
183
184         VNET_LIST_RLOCK();
185         VNET_FOREACH(vnet_iter) {
186             CURVNET_SET(vnet_iter);
187             IFNET_RLOCK();
188             CK_STAILQ_FOREACH(idev, &V_ifnet, if_link) {
189                 struct epoch_tracker et;
190
191                 if (idev != ndev) {
192                         if (idev->if_type != IFT_L2VLAN)
193                                 continue;
194                         if (ndev != rdma_vlan_dev_real_dev(idev))
195                                 continue;
196                 }
197
198                 /* clone address information for IPv4 and IPv6 */
199                 NET_EPOCH_ENTER(et);
200 #if defined(INET)
201                 CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
202                         if (ifa->ifa_addr == NULL ||
203                             ifa->ifa_addr->sa_family != AF_INET)
204                                 continue;
205                         entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
206                         if (entry == NULL) {
207                                 pr_warn("roce_gid_update_addr_callback: "
208                                     "couldn't allocate entry for IPv4 update\n");
209                                 continue;
210                         }
211                         entry->ipx_addr.v4 = *((struct sockaddr_in *)ifa->ifa_addr);
212                         entry->ndev = idev;
213                         STAILQ_INSERT_TAIL(&ipx_head, entry, entry);
214                 }
215 #endif
216 #if defined(INET6)
217                 CK_STAILQ_FOREACH(ifa, &idev->if_addrhead, ifa_link) {
218                         if (ifa->ifa_addr == NULL ||
219                             ifa->ifa_addr->sa_family != AF_INET6)
220                                 continue;
221                         entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
222                         if (entry == NULL) {
223                                 pr_warn("roce_gid_update_addr_callback: "
224                                     "couldn't allocate entry for IPv6 update\n");
225                                 continue;
226                         }
227                         entry->ipx_addr.v6 = *((struct sockaddr_in6 *)ifa->ifa_addr);
228                         entry->ndev = idev;
229
230                         /* trash IPv6 scope ID */
231                         sa6_recoverscope(&entry->ipx_addr.v6);
232                         entry->ipx_addr.v6.sin6_scope_id = 0;
233
234                         STAILQ_INSERT_TAIL(&ipx_head, entry, entry);
235                 }
236 #endif
237                 NET_EPOCH_EXIT(et);
238             }
239             IFNET_RUNLOCK();
240             CURVNET_RESTORE();
241         }
242         VNET_LIST_RUNLOCK();
243
244         /* add missing GIDs, if any */
245         STAILQ_FOREACH(entry, &ipx_head, entry) {
246                 unsigned long gid_type_mask = roce_gid_type_mask_support(device, port);
247
248                 if (rdma_ip2gid(&entry->ipx_addr.sa[0], &gid) != 0)
249                         continue;
250
251                 for (i = 0; i != IB_GID_TYPE_SIZE; i++) {
252                         if (!((1UL << i) & gid_type_mask))
253                                 continue;
254                         /* check if entry found */
255                         if (ib_find_cached_gid_by_port(device, &gid, i,
256                             port, entry->ndev, &index_num) == 0)
257                                 break;
258                 }
259                 if (i != IB_GID_TYPE_SIZE)
260                         continue;
261                 /* add new GID */
262                 update_gid(GID_ADD, device, port, &gid, entry->ndev);
263         }
264
265         /* remove stale GIDs, if any */
266         for (i = default_gids; ib_get_cached_gid(device, port, i, &gid, &gid_attr) == 0; i++) {
267                 union ipx_addr ipx;
268
269                 /* check for valid network device pointer */
270                 ndev = gid_attr.ndev;
271                 if (ndev == NULL)
272                         continue;
273                 dev_put(ndev);
274
275                 /* don't delete empty entries */
276                 if (memcmp(&gid, &zgid, sizeof(zgid)) == 0)
277                         continue;
278
279                 /* zero default */
280                 memset(&ipx, 0, sizeof(ipx));
281
282                 rdma_gid2ip(&ipx.sa[0], &gid);
283
284                 STAILQ_FOREACH(entry, &ipx_head, entry) {
285                         if (entry->ndev == ndev &&
286                             memcmp(&entry->ipx_addr, &ipx, sizeof(ipx)) == 0)
287                                 break;
288                 }
289                 /* check if entry found */
290                 if (entry != NULL)
291                         continue;
292
293                 /* remove GID */
294                 update_gid(GID_DEL, device, port, &gid, ndev);
295         }
296
297         while ((entry = STAILQ_FIRST(&ipx_head))) {
298                 STAILQ_REMOVE_HEAD(&ipx_head, entry);
299                 kfree(entry);
300         }
301 }
302
303 static void
304 roce_gid_queue_scan_event_handler(struct work_struct *_work)
305 {
306         struct roce_netdev_event_work *work =
307                 container_of(_work, struct roce_netdev_event_work, work);
308
309         ib_enum_all_roce_netdevs(roce_gid_match_netdev, work->ndev,
310             roce_gid_update_addr_callback, NULL);
311
312         dev_put(work->ndev);
313         kfree(work);
314 }
315
316 static void
317 roce_gid_queue_scan_event(struct net_device *ndev)
318 {
319         struct roce_netdev_event_work *work;
320
321 retry:
322         switch (ndev->if_type) {
323         case IFT_ETHER:
324                 break;
325         case IFT_L2VLAN:
326                 ndev = rdma_vlan_dev_real_dev(ndev);
327                 if (ndev != NULL)
328                         goto retry;
329                 /* FALLTHROUGH */
330         default:
331                 return;
332         }
333
334         work = kmalloc(sizeof(*work), GFP_ATOMIC);
335         if (!work) {
336                 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
337                 return;
338         }
339
340         INIT_WORK(&work->work, roce_gid_queue_scan_event_handler);
341         dev_hold(ndev);
342
343         work->ndev = ndev;
344
345         queue_work(roce_gid_mgmt_wq, &work->work);
346 }
347
348 static void
349 roce_gid_delete_all_event_handler(struct work_struct *_work)
350 {
351         struct roce_netdev_event_work *work =
352                 container_of(_work, struct roce_netdev_event_work, work);
353
354         ib_cache_gid_del_all_by_netdev(work->ndev);
355         dev_put(work->ndev);
356         kfree(work);
357 }
358
359 static void
360 roce_gid_delete_all_event(struct net_device *ndev)
361 {
362         struct roce_netdev_event_work *work;
363
364         work = kmalloc(sizeof(*work), GFP_ATOMIC);
365         if (!work) {
366                 pr_warn("roce_gid_mgmt: Couldn't allocate work for addr_event\n");
367                 return;
368         }
369
370         INIT_WORK(&work->work, roce_gid_delete_all_event_handler);
371         dev_hold(ndev);
372         work->ndev = ndev;
373         queue_work(roce_gid_mgmt_wq, &work->work);
374
375         /* make sure job is complete before returning */
376         flush_workqueue(roce_gid_mgmt_wq);
377 }
378
379 static int
380 inetaddr_event(struct notifier_block *this, unsigned long event, void *ptr)
381 {
382         struct net_device *ndev = ptr;
383
384         switch (event) {
385         case NETDEV_UNREGISTER:
386                 roce_gid_delete_all_event(ndev);
387                 break;
388         case NETDEV_REGISTER:
389         case NETDEV_CHANGEADDR:
390         case NETDEV_CHANGEIFADDR:
391                 roce_gid_queue_scan_event(ndev);
392                 break;
393         default:
394                 break;
395         }
396         return NOTIFY_DONE;
397 }
398
399 static struct notifier_block nb_inetaddr = {
400         .notifier_call = inetaddr_event
401 };
402
403 static eventhandler_tag eh_ifnet_event;
404
405 static void
406 roce_ifnet_event(void *arg, struct ifnet *ifp, int event)
407 {
408         if (event != IFNET_EVENT_PCP || is_vlan_dev(ifp))
409                 return;
410
411         /* make sure GID table is reloaded */
412         roce_gid_delete_all_event(ifp);
413         roce_gid_queue_scan_event(ifp);
414 }
415
416 static void
417 roce_rescan_device_handler(struct work_struct *_work)
418 {
419         struct roce_rescan_work *work =
420             container_of(_work, struct roce_rescan_work, work);
421
422         ib_enum_roce_netdev(work->ib_dev, roce_gid_match_all, NULL,
423             roce_gid_update_addr_callback, NULL);
424         kfree(work);
425 }
426
427 /* Caller must flush system workqueue before removing the ib_device */
428 int roce_rescan_device(struct ib_device *ib_dev)
429 {
430         struct roce_rescan_work *work = kmalloc(sizeof(*work), GFP_KERNEL);
431
432         if (!work)
433                 return -ENOMEM;
434
435         work->ib_dev = ib_dev;
436         INIT_WORK(&work->work, roce_rescan_device_handler);
437         queue_work(roce_gid_mgmt_wq, &work->work);
438
439         return 0;
440 }
441
442 int __init roce_gid_mgmt_init(void)
443 {
444         roce_gid_mgmt_wq = alloc_ordered_workqueue("roce_gid_mgmt_wq", 0);
445         if (!roce_gid_mgmt_wq) {
446                 pr_warn("roce_gid_mgmt: can't allocate work queue\n");
447                 return -ENOMEM;
448         }
449
450         register_inetaddr_notifier(&nb_inetaddr);
451
452         /*
453          * We rely on the netdevice notifier to enumerate all existing
454          * devices in the system. Register to this notifier last to
455          * make sure we will not miss any IP add/del callbacks.
456          */
457         register_netdevice_notifier(&nb_inetaddr);
458
459         eh_ifnet_event = EVENTHANDLER_REGISTER(ifnet_event,
460             roce_ifnet_event, NULL, EVENTHANDLER_PRI_ANY);
461
462         return 0;
463 }
464
465 void __exit roce_gid_mgmt_cleanup(void)
466 {
467
468         if (eh_ifnet_event != NULL)
469                 EVENTHANDLER_DEREGISTER(ifnet_event, eh_ifnet_event);
470
471         unregister_inetaddr_notifier(&nb_inetaddr);
472         unregister_netdevice_notifier(&nb_inetaddr);
473
474         /*
475          * Ensure all gid deletion tasks complete before we go down,
476          * to avoid any reference to free'd memory. By the time
477          * ib-core is removed, all physical devices have been removed,
478          * so no issue with remaining hardware contexts.
479          */
480         synchronize_rcu();
481         drain_workqueue(roce_gid_mgmt_wq);
482         destroy_workqueue(roce_gid_mgmt_wq);
483 }