]> CyberLeo.Net >> Repos - FreeBSD/stable/9.git/blob - sys/ofed/drivers/infiniband/core/addr.c
MFC r254122, r254123, r256116, r255970, r247671, r269861, r268314, r256269,
[FreeBSD/stable/9.git] / sys / ofed / drivers / infiniband / core / addr.c
1 /*
2  * Copyright (c) 2005 Voltaire Inc.  All rights reserved.
3  * Copyright (c) 2002-2005, Network Appliance, Inc. All rights reserved.
4  * Copyright (c) 1999-2005, Mellanox Technologies, Inc. All rights reserved.
5  * Copyright (c) 2005 Intel Corporation.  All rights reserved.
6  *
7  * This software is available to you under a choice of one of two
8  * licenses.  You may choose to be licensed under the terms of the GNU
9  * General Public License (GPL) Version 2, available from the file
10  * COPYING in the main directory of this source tree, or the
11  * OpenIB.org BSD license below:
12  *
13  *     Redistribution and use in source and binary forms, with or
14  *     without modification, are permitted provided that the following
15  *     conditions are met:
16  *
17  *      - Redistributions of source code must retain the above
18  *        copyright notice, this list of conditions and the following
19  *        disclaimer.
20  *
21  *      - Redistributions in binary form must reproduce the above
22  *        copyright notice, this list of conditions and the following
23  *        disclaimer in the documentation and/or other materials
24  *        provided with the distribution.
25  *
26  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
27  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
28  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
29  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
30  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
31  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
32  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
33  * SOFTWARE.
34  */
35
36 #include <linux/mutex.h>
37 #include <linux/inetdevice.h>
38 #include <linux/workqueue.h>
39 #include <net/route.h>
40 #include <net/netevent.h>
41 #include <rdma/ib_addr.h>
42
43 MODULE_AUTHOR("Sean Hefty");
44 MODULE_DESCRIPTION("IB Address Translation");
45 MODULE_LICENSE("Dual BSD/GPL");
46
47 struct addr_req {
48         struct list_head list;
49         struct sockaddr_storage src_addr;
50         struct sockaddr_storage dst_addr;
51         struct rdma_dev_addr *addr;
52         struct rdma_addr_client *client;
53         void *context;
54         void (*callback)(int status, struct sockaddr *src_addr,
55                          struct rdma_dev_addr *addr, void *context);
56         unsigned long timeout;
57         int status;
58 };
59
60 static void process_req(struct work_struct *work);
61
62 static DEFINE_MUTEX(lock);
63 static LIST_HEAD(req_list);
64 static struct delayed_work work;
65 static struct workqueue_struct *addr_wq;
66
67 void rdma_addr_register_client(struct rdma_addr_client *client)
68 {
69         atomic_set(&client->refcount, 1);
70         init_completion(&client->comp);
71 }
72 EXPORT_SYMBOL(rdma_addr_register_client);
73
74 static inline void put_client(struct rdma_addr_client *client)
75 {
76         if (atomic_dec_and_test(&client->refcount))
77                 complete(&client->comp);
78 }
79
80 void rdma_addr_unregister_client(struct rdma_addr_client *client)
81 {
82         put_client(client);
83         wait_for_completion(&client->comp);
84 }
85 EXPORT_SYMBOL(rdma_addr_unregister_client);
86
87 #ifdef __linux__
88 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
89                      const unsigned char *dst_dev_addr)
90 {
91         dev_addr->dev_type = dev->type;
92         memcpy(dev_addr->src_dev_addr, dev->dev_addr, MAX_ADDR_LEN);
93         memcpy(dev_addr->broadcast, dev->broadcast, MAX_ADDR_LEN);
94         if (dst_dev_addr)
95                 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, MAX_ADDR_LEN);
96         dev_addr->bound_dev_if = dev->ifindex;
97         return 0;
98 }
99 #else
100 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct ifnet *dev,
101                      const unsigned char *dst_dev_addr)
102 {
103         if (dev->if_type == IFT_INFINIBAND)
104                 dev_addr->dev_type = ARPHRD_INFINIBAND;
105         else if (dev->if_type == IFT_ETHER)
106                 dev_addr->dev_type = ARPHRD_ETHER;
107         else
108                 dev_addr->dev_type = 0;
109         memcpy(dev_addr->src_dev_addr, IF_LLADDR(dev), dev->if_addrlen);
110         memcpy(dev_addr->broadcast, __DECONST(char *, dev->if_broadcastaddr),
111             dev->if_addrlen);
112         if (dst_dev_addr)
113                 memcpy(dev_addr->dst_dev_addr, dst_dev_addr, dev->if_addrlen);
114         dev_addr->bound_dev_if = dev->if_index;
115         return 0;
116 }
117 #endif
118 EXPORT_SYMBOL(rdma_copy_addr);
119
120 int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr)
121 {
122         struct net_device *dev;
123         int ret = -EADDRNOTAVAIL;
124
125         if (dev_addr->bound_dev_if) {
126                 dev = dev_get_by_index(&init_net, dev_addr->bound_dev_if);
127                 if (!dev)
128                         return -ENODEV;
129                 ret = rdma_copy_addr(dev_addr, dev, NULL);
130                 dev_put(dev);
131                 return ret;
132         }
133
134         switch (addr->sa_family) {
135 #ifdef INET
136         case AF_INET:
137                 dev = ip_dev_find(NULL,
138                         ((struct sockaddr_in *) addr)->sin_addr.s_addr);
139
140                 if (!dev)
141                         return ret;
142
143                 ret = rdma_copy_addr(dev_addr, dev, NULL);
144                 dev_put(dev);
145                 break;
146 #endif
147
148 #if defined(INET6)
149         case AF_INET6:
150 #ifdef __linux__
151                 read_lock(&dev_base_lock);
152                 for_each_netdev(&init_net, dev) {
153                         if (ipv6_chk_addr(&init_net,
154                                           &((struct sockaddr_in6 *) addr)->sin6_addr,
155                                           dev, 1)) {
156                                 ret = rdma_copy_addr(dev_addr, dev, NULL);
157                                 break;
158                         }
159                 }
160                 read_unlock(&dev_base_lock);
161 #else
162                 {
163                         struct sockaddr_in6 *sin6;
164                         struct ifaddr *ifa;
165                         in_port_t port;
166
167                         sin6 = (struct sockaddr_in6 *)addr;
168                         port = sin6->sin6_port;
169                         sin6->sin6_port = 0;
170                         ifa = ifa_ifwithaddr(addr);
171                         sin6->sin6_port = port;
172                         if (ifa == NULL) {
173                                 ret = -ENODEV;
174                                 break;
175                         }
176                         ret = rdma_copy_addr(dev_addr, ifa->ifa_ifp, NULL);
177                         ifa_free(ifa);
178                         break;
179                 }
180 #endif
181                 break;
182 #endif
183         }
184         return ret;
185 }
186 EXPORT_SYMBOL(rdma_translate_ip);
187
188 static void set_timeout(unsigned long time)
189 {
190         unsigned long delay;
191
192         cancel_delayed_work(&work);
193
194         delay = time - jiffies;
195         if ((long)delay <= 0)
196                 delay = 1;
197
198         queue_delayed_work(addr_wq, &work, delay);
199 }
200
201 static void queue_req(struct addr_req *req)
202 {
203         struct addr_req *temp_req;
204
205         mutex_lock(&lock);
206         list_for_each_entry_reverse(temp_req, &req_list, list) {
207                 if (time_after_eq(req->timeout, temp_req->timeout))
208                         break;
209         }
210
211         list_add(&req->list, &temp_req->list);
212
213         if (req_list.next == &req->list)
214                 set_timeout(req->timeout);
215         mutex_unlock(&lock);
216 }
217
218 #ifdef __linux__
219 static int addr4_resolve(struct sockaddr_in *src_in,
220                          struct sockaddr_in *dst_in,
221                          struct rdma_dev_addr *addr)
222 {
223         __be32 src_ip = src_in->sin_addr.s_addr;
224         __be32 dst_ip = dst_in->sin_addr.s_addr;
225         struct flowi fl;
226         struct rtable *rt;
227         struct neighbour *neigh;
228         int ret;
229
230         memset(&fl, 0, sizeof fl);
231         fl.nl_u.ip4_u.daddr = dst_ip;
232         fl.nl_u.ip4_u.saddr = src_ip;
233         fl.oif = addr->bound_dev_if;
234
235         ret = ip_route_output_key(&init_net, &rt, &fl);
236         if (ret)
237                 goto out;
238
239         src_in->sin_family = AF_INET;
240         src_in->sin_addr.s_addr = rt->rt_src;
241
242         if (rt->idev->dev->flags & IFF_LOOPBACK) {
243                 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
244                 if (!ret)
245                         memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
246                 goto put;
247         }
248
249         /* If the device does ARP internally, return 'done' */
250         if (rt->idev->dev->flags & IFF_NOARP) {
251                 rdma_copy_addr(addr, rt->idev->dev, NULL);
252                 goto put;
253         }
254
255         neigh = neigh_lookup(&arp_tbl, &rt->rt_gateway, rt->idev->dev);
256         if (!neigh || !(neigh->nud_state & NUD_VALID)) {
257                 neigh_event_send(rt->u.dst.neighbour, NULL);
258                 ret = -ENODATA;
259                 if (neigh)
260                         goto release;
261                 goto put;
262         }
263
264         ret = rdma_copy_addr(addr, neigh->dev, neigh->ha);
265 release:
266         neigh_release(neigh);
267 put:
268         ip_rt_put(rt);
269 out:
270         return ret;
271 }
272
273 #if defined(INET6)
274 static int addr6_resolve(struct sockaddr_in6 *src_in,
275                          struct sockaddr_in6 *dst_in,
276                          struct rdma_dev_addr *addr)
277 {
278         struct flowi fl;
279         struct neighbour *neigh;
280         struct dst_entry *dst;
281         int ret;
282
283         memset(&fl, 0, sizeof fl);
284         ipv6_addr_copy(&fl.fl6_dst, &dst_in->sin6_addr);
285         ipv6_addr_copy(&fl.fl6_src, &src_in->sin6_addr);
286         fl.oif = addr->bound_dev_if;
287
288         dst = ip6_route_output(&init_net, NULL, &fl);
289         if ((ret = dst->error))
290                 goto put;
291
292         if (ipv6_addr_any(&fl.fl6_src)) {
293                 ret = ipv6_dev_get_saddr(&init_net, ip6_dst_idev(dst)->dev,
294                                          &fl.fl6_dst, 0, &fl.fl6_src);
295                 if (ret)
296                         goto put;
297
298                 src_in->sin6_family = AF_INET6;
299                 ipv6_addr_copy(&src_in->sin6_addr, &fl.fl6_src);
300         }
301
302         if (dst->dev->flags & IFF_LOOPBACK) {
303                 ret = rdma_translate_ip((struct sockaddr *) dst_in, addr);
304                 if (!ret)
305                         memcpy(addr->dst_dev_addr, addr->src_dev_addr, MAX_ADDR_LEN);
306                 goto put;
307         }
308
309         /* If the device does ARP internally, return 'done' */
310         if (dst->dev->flags & IFF_NOARP) {
311                 ret = rdma_copy_addr(addr, dst->dev, NULL);
312                 goto put;
313         }
314         
315         neigh = dst->neighbour;
316         if (!neigh || !(neigh->nud_state & NUD_VALID)) {
317                 neigh_event_send(dst->neighbour, NULL);
318                 ret = -ENODATA;
319                 goto put;
320         }
321
322         ret = rdma_copy_addr(addr, dst->dev, neigh->ha);
323 put:
324         dst_release(dst);
325         return ret;
326 }
327 #else
328 static int addr6_resolve(struct sockaddr_in6 *src_in,
329                          struct sockaddr_in6 *dst_in,
330                          struct rdma_dev_addr *addr)
331 {
332         return -EADDRNOTAVAIL;
333 }
334 #endif
335
336 #else
337 #include <netinet/if_ether.h>
338
339 static int addr_resolve(struct sockaddr *src_in,
340                         struct sockaddr *dst_in,
341                         struct rdma_dev_addr *addr)
342 {
343         struct sockaddr_in *sin;
344         struct sockaddr_in6 *sin6;
345         struct ifaddr *ifa;
346         struct ifnet *ifp;
347 #if defined(INET) || defined(INET6)
348         struct llentry *lle;
349 #endif
350         struct rtentry *rte;
351         in_port_t port;
352         u_char edst[MAX_ADDR_LEN];
353         int multi;
354         int bcast;
355         int error = 0;
356
357         /*
358          * Determine whether the address is unicast, multicast, or broadcast
359          * and whether the source interface is valid.
360          */
361         multi = 0;
362         bcast = 0;
363         sin = NULL;
364         sin6 = NULL;
365         ifp = NULL;
366         rte = NULL;
367         switch (dst_in->sa_family) {
368 #ifdef INET
369         case AF_INET:
370                 sin = (struct sockaddr_in *)dst_in;
371                 if (sin->sin_addr.s_addr == INADDR_BROADCAST)
372                         bcast = 1;
373                 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
374                         multi = 1;
375                 sin = (struct sockaddr_in *)src_in;
376                 if (sin->sin_addr.s_addr != INADDR_ANY) {
377                         /*
378                          * Address comparison fails if the port is set
379                          * cache it here to be restored later.
380                          */
381                         port = sin->sin_port;
382                         sin->sin_port = 0;
383                         memset(&sin->sin_zero, 0, sizeof(sin->sin_zero));
384                 } else
385                         src_in = NULL; 
386                 break;
387 #endif
388 #ifdef INET6
389         case AF_INET6:
390                 sin6 = (struct sockaddr_in6 *)dst_in;
391                 if (IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))
392                         multi = 1;
393                 sin6 = (struct sockaddr_in6 *)src_in;
394                 if (!IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr)) {
395                         port = sin6->sin6_port;
396                         sin6->sin6_port = 0;
397                 } else
398                         src_in = NULL;
399                 break;
400 #endif
401         default:
402                 return -EINVAL;
403         }
404         /*
405          * If we have a source address to use look it up first and verify
406          * that it is a local interface.
407          */
408         if (src_in) {
409                 ifa = ifa_ifwithaddr(src_in);
410                 if (sin)
411                         sin->sin_port = port;
412                 if (sin6)
413                         sin6->sin6_port = port;
414                 if (ifa == NULL)
415                         return -ENETUNREACH;
416                 ifp = ifa->ifa_ifp;
417                 ifa_free(ifa);
418                 if (bcast || multi)
419                         goto mcast;
420         }
421         /*
422          * Make sure the route exists and has a valid link.
423          */
424         rte = rtalloc1(dst_in, 1, 0);
425         if (rte == NULL || rte->rt_ifp == NULL || !RT_LINK_IS_UP(rte->rt_ifp)) {
426                 if (rte) 
427                         RTFREE_LOCKED(rte);
428                 return -EHOSTUNREACH;
429         }
430         /*
431          * If it's not multicast or broadcast and the route doesn't match the
432          * requested interface return unreachable.  Otherwise fetch the
433          * correct interface pointer and unlock the route.
434          */
435         if (multi || bcast) {
436                 if (ifp == NULL)
437                         ifp = rte->rt_ifp;
438                 RTFREE_LOCKED(rte);
439         } else if (ifp && ifp != rte->rt_ifp) {
440                 RTFREE_LOCKED(rte);
441                 return -ENETUNREACH;
442         } else {
443                 if (ifp == NULL)
444                         ifp = rte->rt_ifp;
445                 RT_UNLOCK(rte);
446         }
447 mcast:
448         if (bcast)
449                 return rdma_copy_addr(addr, ifp, ifp->if_broadcastaddr);
450         if (multi) {
451                 struct sockaddr *llsa;
452
453                 error = ifp->if_resolvemulti(ifp, &llsa, dst_in);
454                 if (error)
455                         return -error;
456                 error = rdma_copy_addr(addr, ifp,
457                     LLADDR((struct sockaddr_dl *)llsa));
458                 free(llsa, M_IFMADDR);
459                 return error;
460         }
461         /*
462          * Resolve the link local address.
463          */
464         switch (dst_in->sa_family) {
465 #ifdef INET
466         case AF_INET:
467                 error = arpresolve(ifp, rte, NULL, dst_in, edst, &lle);
468                 break;
469 #endif
470 #ifdef INET6
471         case AF_INET6:
472                 error = nd6_storelladdr(ifp, NULL, dst_in, (u_char *)edst, &lle);
473                 break;
474 #endif
475         default:
476                 /* XXX: Shouldn't happen. */
477                 error = -EINVAL;
478         }
479         RTFREE(rte);
480         if (error == 0)
481                 return rdma_copy_addr(addr, ifp, edst);
482         if (error == EWOULDBLOCK)
483                 return -ENODATA;
484         return -error;
485 }
486
487 #endif
488
489 static void process_req(struct work_struct *work)
490 {
491         struct addr_req *req, *temp_req;
492         struct sockaddr *src_in, *dst_in;
493         struct list_head done_list;
494
495         INIT_LIST_HEAD(&done_list);
496
497         mutex_lock(&lock);
498         list_for_each_entry_safe(req, temp_req, &req_list, list) {
499                 if (req->status == -ENODATA) {
500                         src_in = (struct sockaddr *) &req->src_addr;
501                         dst_in = (struct sockaddr *) &req->dst_addr;
502                         req->status = addr_resolve(src_in, dst_in, req->addr);
503                         if (req->status && time_after_eq(jiffies, req->timeout))
504                                 req->status = -ETIMEDOUT;
505                         else if (req->status == -ENODATA)
506                                 continue;
507                 }
508                 list_move_tail(&req->list, &done_list);
509         }
510
511         if (!list_empty(&req_list)) {
512                 req = list_entry(req_list.next, struct addr_req, list);
513                 set_timeout(req->timeout);
514         }
515         mutex_unlock(&lock);
516
517         list_for_each_entry_safe(req, temp_req, &done_list, list) {
518                 list_del(&req->list);
519                 req->callback(req->status, (struct sockaddr *) &req->src_addr,
520                         req->addr, req->context);
521                 put_client(req->client);
522                 kfree(req);
523         }
524 }
525
526 int rdma_resolve_ip(struct rdma_addr_client *client,
527                     struct sockaddr *src_addr, struct sockaddr *dst_addr,
528                     struct rdma_dev_addr *addr, int timeout_ms,
529                     void (*callback)(int status, struct sockaddr *src_addr,
530                                      struct rdma_dev_addr *addr, void *context),
531                     void *context)
532 {
533         struct sockaddr *src_in, *dst_in;
534         struct addr_req *req;
535         int ret = 0;
536
537         req = kzalloc(sizeof *req, GFP_KERNEL);
538         if (!req)
539                 return -ENOMEM;
540
541         src_in = (struct sockaddr *) &req->src_addr;
542         dst_in = (struct sockaddr *) &req->dst_addr;
543
544         if (src_addr) {
545                 if (src_addr->sa_family != dst_addr->sa_family) {
546                         ret = -EINVAL;
547                         goto err;
548                 }
549
550                 memcpy(src_in, src_addr, ip_addr_size(src_addr));
551         } else {
552                 src_in->sa_family = dst_addr->sa_family;
553         }
554
555         memcpy(dst_in, dst_addr, ip_addr_size(dst_addr));
556         req->addr = addr;
557         req->callback = callback;
558         req->context = context;
559         req->client = client;
560         atomic_inc(&client->refcount);
561
562         req->status = addr_resolve(src_in, dst_in, addr);
563         switch (req->status) {
564         case 0:
565                 req->timeout = jiffies;
566                 queue_req(req);
567                 break;
568         case -ENODATA:
569                 req->timeout = msecs_to_jiffies(timeout_ms) + jiffies;
570                 queue_req(req);
571                 break;
572         default:
573                 ret = req->status;
574                 atomic_dec(&client->refcount);
575                 goto err;
576         }
577         return ret;
578 err:
579         kfree(req);
580         return ret;
581 }
582 EXPORT_SYMBOL(rdma_resolve_ip);
583
584 void rdma_addr_cancel(struct rdma_dev_addr *addr)
585 {
586         struct addr_req *req, *temp_req;
587
588         mutex_lock(&lock);
589         list_for_each_entry_safe(req, temp_req, &req_list, list) {
590                 if (req->addr == addr) {
591                         req->status = -ECANCELED;
592                         req->timeout = jiffies;
593                         list_move(&req->list, &req_list);
594                         set_timeout(req->timeout);
595                         break;
596                 }
597         }
598         mutex_unlock(&lock);
599 }
600 EXPORT_SYMBOL(rdma_addr_cancel);
601
602 static int netevent_callback(struct notifier_block *self, unsigned long event,
603         void *ctx)
604 {
605         if (event == NETEVENT_NEIGH_UPDATE) {
606 #ifdef __linux__
607                 struct neighbour *neigh = ctx;
608
609                 if (neigh->nud_state & NUD_VALID) {
610                         set_timeout(jiffies);
611                 }
612 #else
613                 set_timeout(jiffies);
614 #endif
615         }
616         return 0;
617 }
618
619 static struct notifier_block nb = {
620         .notifier_call = netevent_callback
621 };
622
623 static int addr_init(void)
624 {
625         INIT_DELAYED_WORK(&work, process_req);
626         addr_wq = create_singlethread_workqueue("ib_addr");
627         if (!addr_wq)
628                 return -ENOMEM;
629
630         register_netevent_notifier(&nb);
631         return 0;
632 }
633
634 static void addr_cleanup(void)
635 {
636         unregister_netevent_notifier(&nb);
637         destroy_workqueue(addr_wq);
638 }
639
640 module_init(addr_init);
641 module_exit(addr_cleanup);