2 * Copyright (c) 2015 Dmitry Chagin <dchagin@FreeBSD.org>
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 #include <sys/cdefs.h>
27 #include "opt_inet6.h"
29 #include <sys/param.h>
31 #include <sys/ctype.h>
33 #include <sys/filedesc.h>
36 #include <sys/malloc.h>
39 #include <sys/signalvar.h>
40 #include <sys/socket.h>
41 #include <sys/socketvar.h>
44 #include <net/if_var.h>
45 #include <net/if_dl.h>
46 #include <net/if_types.h>
47 #include <netlink/netlink.h>
50 #include <netinet/in.h>
52 #include <compat/linux/linux.h>
53 #include <compat/linux/linux_common.h>
54 #include <compat/linux/linux_mib.h>
55 #include <compat/linux/linux_util.h>
57 _Static_assert(LINUX_IFNAMSIZ == IFNAMSIZ, "Linux IFNAMSIZ");
58 _Static_assert(sizeof(struct sockaddr) == sizeof(struct l_sockaddr),
59 "Linux struct sockaddr size");
60 _Static_assert(offsetof(struct sockaddr, sa_data) ==
61 offsetof(struct l_sockaddr, sa_data), "Linux struct sockaddr layout");
63 static bool use_real_ifnames = false;
64 SYSCTL_BOOL(_compat_linux, OID_AUTO, use_real_ifnames, CTLFLAG_RWTUN,
66 "Use FreeBSD interface names instead of generating ethN aliases");
68 static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = {
69 LINUX_SIGHUP, /* SIGHUP */
70 LINUX_SIGINT, /* SIGINT */
71 LINUX_SIGQUIT, /* SIGQUIT */
72 LINUX_SIGILL, /* SIGILL */
73 LINUX_SIGTRAP, /* SIGTRAP */
74 LINUX_SIGABRT, /* SIGABRT */
76 LINUX_SIGFPE, /* SIGFPE */
77 LINUX_SIGKILL, /* SIGKILL */
78 LINUX_SIGBUS, /* SIGBUS */
79 LINUX_SIGSEGV, /* SIGSEGV */
80 LINUX_SIGSYS, /* SIGSYS */
81 LINUX_SIGPIPE, /* SIGPIPE */
82 LINUX_SIGALRM, /* SIGALRM */
83 LINUX_SIGTERM, /* SIGTERM */
84 LINUX_SIGURG, /* SIGURG */
85 LINUX_SIGSTOP, /* SIGSTOP */
86 LINUX_SIGTSTP, /* SIGTSTP */
87 LINUX_SIGCONT, /* SIGCONT */
88 LINUX_SIGCHLD, /* SIGCHLD */
89 LINUX_SIGTTIN, /* SIGTTIN */
90 LINUX_SIGTTOU, /* SIGTTOU */
91 LINUX_SIGIO, /* SIGIO */
92 LINUX_SIGXCPU, /* SIGXCPU */
93 LINUX_SIGXFSZ, /* SIGXFSZ */
94 LINUX_SIGVTALRM,/* SIGVTALRM */
95 LINUX_SIGPROF, /* SIGPROF */
96 LINUX_SIGWINCH, /* SIGWINCH */
98 LINUX_SIGUSR1, /* SIGUSR1 */
99 LINUX_SIGUSR2 /* SIGUSR2 */
102 #define LINUX_SIGPWREMU (SIGRTMIN + (LINUX_SIGRTMAX - LINUX_SIGRTMIN) + 1)
104 static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = {
105 SIGHUP, /* LINUX_SIGHUP */
106 SIGINT, /* LINUX_SIGINT */
107 SIGQUIT, /* LINUX_SIGQUIT */
108 SIGILL, /* LINUX_SIGILL */
109 SIGTRAP, /* LINUX_SIGTRAP */
110 SIGABRT, /* LINUX_SIGABRT */
111 SIGBUS, /* LINUX_SIGBUS */
112 SIGFPE, /* LINUX_SIGFPE */
113 SIGKILL, /* LINUX_SIGKILL */
114 SIGUSR1, /* LINUX_SIGUSR1 */
115 SIGSEGV, /* LINUX_SIGSEGV */
116 SIGUSR2, /* LINUX_SIGUSR2 */
117 SIGPIPE, /* LINUX_SIGPIPE */
118 SIGALRM, /* LINUX_SIGALRM */
119 SIGTERM, /* LINUX_SIGTERM */
120 SIGBUS, /* LINUX_SIGSTKFLT */
121 SIGCHLD, /* LINUX_SIGCHLD */
122 SIGCONT, /* LINUX_SIGCONT */
123 SIGSTOP, /* LINUX_SIGSTOP */
124 SIGTSTP, /* LINUX_SIGTSTP */
125 SIGTTIN, /* LINUX_SIGTTIN */
126 SIGTTOU, /* LINUX_SIGTTOU */
127 SIGURG, /* LINUX_SIGURG */
128 SIGXCPU, /* LINUX_SIGXCPU */
129 SIGXFSZ, /* LINUX_SIGXFSZ */
130 SIGVTALRM, /* LINUX_SIGVTALARM */
131 SIGPROF, /* LINUX_SIGPROF */
132 SIGWINCH, /* LINUX_SIGWINCH */
133 SIGIO, /* LINUX_SIGIO */
135 * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal
136 * to the first unused FreeBSD signal number. Since Linux supports
137 * signals from 1 to 64 we are ok here as our SIGRTMIN = 65.
139 LINUX_SIGPWREMU,/* LINUX_SIGPWR */
140 SIGSYS /* LINUX_SIGSYS */
143 static struct cdev *dev_shm_cdev;
144 static struct cdevsw dev_shm_cdevsw = {
145 .d_version = D_VERSION,
150 * Map Linux RT signals to the FreeBSD RT signals.
153 linux_to_bsd_rt_signal(int sig)
156 return (SIGRTMIN + sig - LINUX_SIGRTMIN);
160 bsd_to_linux_rt_signal(int sig)
163 return (sig - SIGRTMIN + LINUX_SIGRTMIN);
167 linux_to_bsd_signal(int sig)
170 KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("invalid Linux signal %d\n", sig));
172 if (sig < LINUX_SIGRTMIN)
173 return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]);
175 return (linux_to_bsd_rt_signal(sig));
179 bsd_to_linux_signal(int sig)
182 if (sig <= LINUX_SIGTBLSZ)
183 return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]);
184 if (sig == LINUX_SIGPWREMU)
185 return (LINUX_SIGPWR);
187 return (bsd_to_linux_rt_signal(sig));
191 linux_to_bsd_sigaltstack(int lsa)
195 if (lsa & LINUX_SS_DISABLE)
198 * Linux ignores SS_ONSTACK flag for ss
199 * parameter while FreeBSD prohibits it.
205 bsd_to_linux_sigaltstack(int bsa)
209 if (bsa & SS_DISABLE)
210 lsa |= LINUX_SS_DISABLE;
211 if (bsa & SS_ONSTACK)
212 lsa |= LINUX_SS_ONSTACK;
217 linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss)
222 for (l = 1; l <= LINUX_SIGRTMAX; l++) {
223 if (LINUX_SIGISMEMBER(*lss, l)) {
224 b = linux_to_bsd_signal(l);
232 bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss)
236 LINUX_SIGEMPTYSET(*lss);
237 for (b = 1; b <= SIGRTMAX; b++) {
238 if (SIGISMEMBER(*bss, b)) {
239 l = bsd_to_linux_signal(b);
241 LINUX_SIGADDSET(*lss, l);
247 * Translate a FreeBSD interface name to a Linux interface name
248 * by interface name, and return the number of bytes copied to lxname.
251 ifname_bsd_to_linux_name(const char *bsdname, char *lxname, size_t len)
253 struct epoch_tracker et;
258 CURVNET_SET(TD_TO_VNET(curthread));
260 ifp = ifunit(bsdname);
262 ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
269 * Translate a FreeBSD interface name to a Linux interface name
270 * by interface index, and return the number of bytes copied to lxname.
273 ifname_bsd_to_linux_idx(u_int idx, char *lxname, size_t len)
275 struct epoch_tracker et;
280 CURVNET_SET(TD_TO_VNET(curthread));
282 ifp = ifnet_byindex(idx);
284 ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
291 * Translate a FreeBSD interface name to a Linux interface name,
292 * and return the number of bytes copied to lxname, 0 if interface
293 * not found, -1 on error.
295 struct ifname_bsd_to_linux_ifp_cb_s {
303 ifname_bsd_to_linux_ifp_cb(if_t ifp, void *arg)
305 struct ifname_bsd_to_linux_ifp_cb_s *cbs = arg;
308 return (snprintf(cbs->lxname, cbs->len, "eth%d", cbs->ethno));
315 ifname_bsd_to_linux_ifp(struct ifnet *ifp, char *lxname, size_t len)
317 struct ifname_bsd_to_linux_ifp_cb_s arg = {
327 * Linux loopback interface name is lo (not lo0),
328 * we translate lo to lo0, loX to loX.
330 if (IFP_IS_LOOP(ifp) && strncmp(if_name(ifp), "lo0", IFNAMSIZ) == 0)
331 return (strlcpy(lxname, "lo", len));
333 /* Short-circuit non ethernet interfaces. */
334 if (!IFP_IS_ETH(ifp) || linux_use_real_ifname(ifp))
335 return (strlcpy(lxname, if_name(ifp), len));
337 /* Determine the (relative) unit number for ethernet interfaces. */
338 return (if_foreach(ifname_bsd_to_linux_ifp_cb, &arg));
342 * Translate a Linux interface name to a FreeBSD interface name,
343 * and return the associated ifnet structure
344 * bsdname and lxname need to be least IFNAMSIZ bytes long, but
345 * can point to the same buffer.
347 struct ifname_linux_to_ifp_cb_s {
357 ifname_linux_to_ifp_cb(if_t ifp, void *arg)
359 struct ifname_linux_to_ifp_cb_s *cbs = arg;
364 * Allow Linux programs to use FreeBSD names. Don't presume
365 * we never have an interface named "eth", so don't make
366 * the test optional based on is_eth.
368 if (strncmp(if_name(ifp), cbs->lxname, LINUX_IFNAMSIZ) == 0)
370 if (cbs->is_eth && IFP_IS_ETH(ifp) && cbs->unit == cbs->ethno)
372 if (cbs->is_lo && IFP_IS_LOOP(ifp))
384 ifname_linux_to_ifp(struct thread *td, const char *lxname)
386 struct ifname_linux_to_ifp_cb_s arg = {
396 for (len = 0; len < LINUX_IFNAMSIZ; ++len)
397 if (!isalpha(lxname[len]) || lxname[len] == '\0')
399 if (len == 0 || len == LINUX_IFNAMSIZ)
402 * Linux loopback interface name is lo (not lo0),
403 * we translate lo to lo0, loX to loX.
405 arg.is_lo = (len == 2 && strncmp(lxname, "lo", LINUX_IFNAMSIZ) == 0);
406 arg.unit = (int)strtoul(lxname + len, &ep, 10);
407 if ((ep == NULL || ep == lxname + len || ep >= lxname + LINUX_IFNAMSIZ) &&
410 arg.is_eth = (len == 3 && strncmp(lxname, "eth", len) == 0);
412 if_foreach(ifname_linux_to_ifp_cb, &arg);
417 ifname_linux_to_bsd(struct thread *td, const char *lxname, char *bsdname)
419 struct epoch_tracker et;
422 CURVNET_SET(TD_TO_VNET(td));
424 ifp = ifname_linux_to_ifp(td, lxname);
425 if (ifp != NULL && bsdname != NULL)
426 strlcpy(bsdname, if_name(ifp), IFNAMSIZ);
429 return (ifp != NULL ? 0 : EINVAL);
433 linux_ifflags(struct ifnet *ifp)
435 unsigned short flags;
439 flags = if_getflags(ifp) | if_getdrvflags(ifp);
440 return (bsd_to_linux_ifflags(flags));
444 bsd_to_linux_ifflags(int fl)
446 unsigned short flags = 0;
449 flags |= LINUX_IFF_UP;
450 if (fl & IFF_BROADCAST)
451 flags |= LINUX_IFF_BROADCAST;
453 flags |= LINUX_IFF_DEBUG;
454 if (fl & IFF_LOOPBACK)
455 flags |= LINUX_IFF_LOOPBACK;
456 if (fl & IFF_POINTOPOINT)
457 flags |= LINUX_IFF_POINTOPOINT;
458 if (fl & IFF_DRV_RUNNING)
459 flags |= LINUX_IFF_RUNNING;
461 flags |= LINUX_IFF_NOARP;
462 if (fl & IFF_PROMISC)
463 flags |= LINUX_IFF_PROMISC;
464 if (fl & IFF_ALLMULTI)
465 flags |= LINUX_IFF_ALLMULTI;
466 if (fl & IFF_MULTICAST)
467 flags |= LINUX_IFF_MULTICAST;
472 linux_ifhwaddr_cb(void *arg, struct ifaddr *ifa, u_int count)
474 struct sockaddr_dl *sdl = (struct sockaddr_dl *)ifa->ifa_addr;
475 struct l_sockaddr *lsa = arg;
479 if (sdl->sdl_type != IFT_ETHER)
481 bzero(lsa, sizeof(*lsa));
482 lsa->sa_family = LINUX_ARPHRD_ETHER;
483 bcopy(LLADDR(sdl), lsa->sa_data, LINUX_IFHWADDRLEN);
488 linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa)
493 if (IFP_IS_LOOP(ifp)) {
494 bzero(lsa, sizeof(*lsa));
495 lsa->sa_family = LINUX_ARPHRD_LOOPBACK;
498 if (!IFP_IS_ETH(ifp))
500 if (if_foreach_addr_type(ifp, AF_LINK, linux_ifhwaddr_cb, lsa) > 0)
506 linux_to_bsd_domain(int domain)
510 case LINUX_AF_UNSPEC:
522 case LINUX_AF_APPLETALK:
523 return (AF_APPLETALK);
524 case LINUX_AF_NETLINK:
531 bsd_to_linux_domain(int domain)
536 return (LINUX_AF_UNSPEC);
538 return (LINUX_AF_UNIX);
540 return (LINUX_AF_INET);
542 return (LINUX_AF_INET6);
544 return (LINUX_AF_AX25);
546 return (LINUX_AF_IPX);
548 return (LINUX_AF_APPLETALK);
550 return (LINUX_AF_NETLINK);
556 * Based on the fact that:
557 * 1. Native and Linux storage of struct sockaddr
558 * and struct sockaddr_in6 are equal.
559 * 2. On Linux sa_family is the first member of all struct sockaddr.
562 bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa,
565 struct l_sockaddr *kosa;
569 if (len < 2 || len > UCHAR_MAX)
571 bdom = bsd_to_linux_domain(sa->sa_family);
573 return (EAFNOSUPPORT);
575 kosa = malloc(len, M_LINUX, M_WAITOK);
576 bcopy(sa, kosa, len);
577 kosa->sa_family = bdom;
583 linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap,
587 struct l_sockaddr *kosa;
589 struct sockaddr_in6 *sin6;
593 int salen, bdom, error, hdrlen, namelen;
595 if (*len < 2 || *len > UCHAR_MAX)
603 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
604 * if it's a v4-mapped address, so reserve the proper space
607 if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) {
608 salen += sizeof(uint32_t);
613 kosa = malloc(salen, M_SONAME, M_WAITOK);
615 if ((error = copyin(osa, kosa, *len)))
618 bdom = linux_to_bsd_domain(kosa->sa_family);
620 error = EAFNOSUPPORT;
626 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
627 * which lacks the scope id compared with RFC2553 one. If we detect
628 * the situation, reject the address and write a message to system log.
630 * Still accept addresses for which the scope id is not used.
633 if (bdom == AF_INET6) {
634 sin6 = (struct sockaddr_in6 *)kosa;
635 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
636 (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
637 !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
638 !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
639 !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
640 !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
641 sin6->sin6_scope_id = 0;
644 "obsolete pre-RFC2553 sockaddr_in6 rejected");
649 salen -= sizeof(uint32_t);
652 if (bdom == AF_INET) {
653 if (salen < sizeof(struct sockaddr_in)) {
657 salen = sizeof(struct sockaddr_in);
660 if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) {
661 hdrlen = offsetof(struct sockaddr_un, sun_path);
662 name = ((struct sockaddr_un *)kosa)->sun_path;
665 * Linux abstract namespace starts with a NULL byte.
666 * XXX We do not support abstract namespace yet.
668 namelen = strnlen(name + 1, salen - hdrlen - 1) + 1;
670 namelen = strnlen(name, salen - hdrlen);
671 salen = hdrlen + namelen;
672 if (salen > sizeof(struct sockaddr_un)) {
673 error = ENAMETOOLONG;
678 if (bdom == AF_NETLINK) {
679 if (salen < sizeof(struct sockaddr_nl)) {
683 salen = sizeof(struct sockaddr_nl);
686 sa = (struct sockaddr *)kosa;
687 sa->sa_family = bdom;
695 free(kosa, M_SONAME);
700 linux_dev_shm_create(void)
704 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_shm_cdev,
705 &dev_shm_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0, "shm/.mountpoint");
707 printf("%s: failed to create device node, error %d\n",
713 linux_dev_shm_destroy(void)
716 destroy_dev(dev_shm_cdev);
720 bsd_to_linux_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
721 size_t mapcnt, int no_value)
723 int bsd_mask, bsd_value, linux_mask, linux_value;
730 for (i = 0; i < mapcnt; ++i) {
731 bsd_mask = bitmap[i].bsd_mask;
732 bsd_value = bitmap[i].bsd_value;
734 bsd_mask = bsd_value;
736 linux_mask = bitmap[i].linux_mask;
737 linux_value = bitmap[i].linux_value;
739 linux_mask = linux_value;
742 * If a mask larger than just the value is set, we explicitly
743 * want to make sure that only this bit we mapped within that
746 if ((value & bsd_mask) == bsd_value) {
747 linux_ret = (linux_ret & ~linux_mask) | linux_value;
758 linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
759 size_t mapcnt, int no_value)
761 int bsd_mask, bsd_value, linux_mask, linux_value;
768 for (i = 0; i < mapcnt; ++i) {
769 bsd_mask = bitmap[i].bsd_mask;
770 bsd_value = bitmap[i].bsd_value;
772 bsd_mask = bsd_value;
774 linux_mask = bitmap[i].linux_mask;
775 linux_value = bitmap[i].linux_value;
777 linux_mask = linux_value;
780 * If a mask larger than just the value is set, we explicitly
781 * want to make sure that only this bit we mapped within that
784 if ((value & linux_mask) == linux_value) {
785 bsd_ret = (bsd_ret & ~bsd_mask) | bsd_value;
796 linux_to_bsd_poll_events(struct thread *td, int fd, short lev,
803 if (lev & LINUX_POLLIN)
805 if (lev & LINUX_POLLPRI)
807 if (lev & LINUX_POLLOUT)
809 if (lev & LINUX_POLLERR)
811 if (lev & LINUX_POLLHUP)
813 if (lev & LINUX_POLLNVAL)
815 if (lev & LINUX_POLLRDNORM)
817 if (lev & LINUX_POLLRDBAND)
819 if (lev & LINUX_POLLWRBAND)
821 if (lev & LINUX_POLLWRNORM)
824 if (lev & LINUX_POLLRDHUP) {
826 * It seems that the Linux silencly ignores POLLRDHUP
827 * on non-socket file descriptors unlike FreeBSD, where
828 * events bits is more strictly checked (POLLSTANDARD).
830 error = fget_unlocked(td, fd, &cap_no_rights, &fp);
833 * XXX. On FreeBSD POLLRDHUP applies only to
836 if (fp->f_type == DTYPE_SOCKET)
842 if (lev & LINUX_POLLMSG)
843 LINUX_RATELIMIT_MSG_OPT1("unsupported POLLMSG, events(%d)", lev);
844 if (lev & LINUX_POLLREMOVE)
845 LINUX_RATELIMIT_MSG_OPT1("unsupported POLLREMOVE, events(%d)", lev);
851 bsd_to_linux_poll_events(short bev, short *lev)
856 bits |= LINUX_POLLIN;
858 bits |= LINUX_POLLPRI;
859 if (bev & (POLLOUT | POLLWRNORM))
861 * POLLWRNORM is equal to POLLOUT on FreeBSD,
864 bits |= LINUX_POLLOUT;
866 bits |= LINUX_POLLERR;
868 bits |= LINUX_POLLHUP;
870 bits |= LINUX_POLLNVAL;
871 if (bev & POLLRDNORM)
872 bits |= LINUX_POLLRDNORM;
873 if (bev & POLLRDBAND)
874 bits |= LINUX_POLLRDBAND;
875 if (bev & POLLWRBAND)
876 bits |= LINUX_POLLWRBAND;
878 bits |= LINUX_POLLRDHUP;
884 linux_use_real_ifname(const struct ifnet *ifp)
887 return (use_real_ifnames);