2 * Copyright (c) 2015 Dmitry Chagin <dchagin@FreeBSD.org>
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26 #include "opt_inet6.h"
28 #include <sys/param.h>
30 #include <sys/ctype.h>
32 #include <sys/filedesc.h>
35 #include <sys/malloc.h>
38 #include <sys/signalvar.h>
39 #include <sys/socket.h>
40 #include <sys/socketvar.h>
43 #include <net/if_var.h>
44 #include <net/if_dl.h>
45 #include <net/if_types.h>
46 #include <netlink/netlink.h>
49 #include <netinet/in.h>
51 #include <compat/linux/linux.h>
52 #include <compat/linux/linux_common.h>
53 #include <compat/linux/linux_mib.h>
54 #include <compat/linux/linux_util.h>
56 _Static_assert(LINUX_IFNAMSIZ == IFNAMSIZ, "Linux IFNAMSIZ");
57 _Static_assert(sizeof(struct sockaddr) == sizeof(struct l_sockaddr),
58 "Linux struct sockaddr size");
59 _Static_assert(offsetof(struct sockaddr, sa_data) ==
60 offsetof(struct l_sockaddr, sa_data), "Linux struct sockaddr layout");
62 static bool use_real_ifnames = false;
63 SYSCTL_BOOL(_compat_linux, OID_AUTO, use_real_ifnames, CTLFLAG_RWTUN,
65 "Use FreeBSD interface names instead of generating ethN aliases");
67 static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = {
68 LINUX_SIGHUP, /* SIGHUP */
69 LINUX_SIGINT, /* SIGINT */
70 LINUX_SIGQUIT, /* SIGQUIT */
71 LINUX_SIGILL, /* SIGILL */
72 LINUX_SIGTRAP, /* SIGTRAP */
73 LINUX_SIGABRT, /* SIGABRT */
75 LINUX_SIGFPE, /* SIGFPE */
76 LINUX_SIGKILL, /* SIGKILL */
77 LINUX_SIGBUS, /* SIGBUS */
78 LINUX_SIGSEGV, /* SIGSEGV */
79 LINUX_SIGSYS, /* SIGSYS */
80 LINUX_SIGPIPE, /* SIGPIPE */
81 LINUX_SIGALRM, /* SIGALRM */
82 LINUX_SIGTERM, /* SIGTERM */
83 LINUX_SIGURG, /* SIGURG */
84 LINUX_SIGSTOP, /* SIGSTOP */
85 LINUX_SIGTSTP, /* SIGTSTP */
86 LINUX_SIGCONT, /* SIGCONT */
87 LINUX_SIGCHLD, /* SIGCHLD */
88 LINUX_SIGTTIN, /* SIGTTIN */
89 LINUX_SIGTTOU, /* SIGTTOU */
90 LINUX_SIGIO, /* SIGIO */
91 LINUX_SIGXCPU, /* SIGXCPU */
92 LINUX_SIGXFSZ, /* SIGXFSZ */
93 LINUX_SIGVTALRM,/* SIGVTALRM */
94 LINUX_SIGPROF, /* SIGPROF */
95 LINUX_SIGWINCH, /* SIGWINCH */
97 LINUX_SIGUSR1, /* SIGUSR1 */
98 LINUX_SIGUSR2 /* SIGUSR2 */
101 #define LINUX_SIGPWREMU (SIGRTMIN + (LINUX_SIGRTMAX - LINUX_SIGRTMIN) + 1)
103 static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = {
104 SIGHUP, /* LINUX_SIGHUP */
105 SIGINT, /* LINUX_SIGINT */
106 SIGQUIT, /* LINUX_SIGQUIT */
107 SIGILL, /* LINUX_SIGILL */
108 SIGTRAP, /* LINUX_SIGTRAP */
109 SIGABRT, /* LINUX_SIGABRT */
110 SIGBUS, /* LINUX_SIGBUS */
111 SIGFPE, /* LINUX_SIGFPE */
112 SIGKILL, /* LINUX_SIGKILL */
113 SIGUSR1, /* LINUX_SIGUSR1 */
114 SIGSEGV, /* LINUX_SIGSEGV */
115 SIGUSR2, /* LINUX_SIGUSR2 */
116 SIGPIPE, /* LINUX_SIGPIPE */
117 SIGALRM, /* LINUX_SIGALRM */
118 SIGTERM, /* LINUX_SIGTERM */
119 SIGBUS, /* LINUX_SIGSTKFLT */
120 SIGCHLD, /* LINUX_SIGCHLD */
121 SIGCONT, /* LINUX_SIGCONT */
122 SIGSTOP, /* LINUX_SIGSTOP */
123 SIGTSTP, /* LINUX_SIGTSTP */
124 SIGTTIN, /* LINUX_SIGTTIN */
125 SIGTTOU, /* LINUX_SIGTTOU */
126 SIGURG, /* LINUX_SIGURG */
127 SIGXCPU, /* LINUX_SIGXCPU */
128 SIGXFSZ, /* LINUX_SIGXFSZ */
129 SIGVTALRM, /* LINUX_SIGVTALARM */
130 SIGPROF, /* LINUX_SIGPROF */
131 SIGWINCH, /* LINUX_SIGWINCH */
132 SIGIO, /* LINUX_SIGIO */
134 * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal
135 * to the first unused FreeBSD signal number. Since Linux supports
136 * signals from 1 to 64 we are ok here as our SIGRTMIN = 65.
138 LINUX_SIGPWREMU,/* LINUX_SIGPWR */
139 SIGSYS /* LINUX_SIGSYS */
142 static struct cdev *dev_shm_cdev;
143 static struct cdevsw dev_shm_cdevsw = {
144 .d_version = D_VERSION,
149 * Map Linux RT signals to the FreeBSD RT signals.
152 linux_to_bsd_rt_signal(int sig)
155 return (SIGRTMIN + sig - LINUX_SIGRTMIN);
159 bsd_to_linux_rt_signal(int sig)
162 return (sig - SIGRTMIN + LINUX_SIGRTMIN);
166 linux_to_bsd_signal(int sig)
169 KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("invalid Linux signal %d\n", sig));
171 if (sig < LINUX_SIGRTMIN)
172 return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]);
174 return (linux_to_bsd_rt_signal(sig));
178 bsd_to_linux_signal(int sig)
181 if (sig <= LINUX_SIGTBLSZ)
182 return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]);
183 if (sig == LINUX_SIGPWREMU)
184 return (LINUX_SIGPWR);
186 return (bsd_to_linux_rt_signal(sig));
190 linux_to_bsd_sigaltstack(int lsa)
194 if (lsa & LINUX_SS_DISABLE)
197 * Linux ignores SS_ONSTACK flag for ss
198 * parameter while FreeBSD prohibits it.
204 bsd_to_linux_sigaltstack(int bsa)
208 if (bsa & SS_DISABLE)
209 lsa |= LINUX_SS_DISABLE;
210 if (bsa & SS_ONSTACK)
211 lsa |= LINUX_SS_ONSTACK;
216 linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss)
221 for (l = 1; l <= LINUX_SIGRTMAX; l++) {
222 if (LINUX_SIGISMEMBER(*lss, l)) {
223 b = linux_to_bsd_signal(l);
231 bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss)
235 LINUX_SIGEMPTYSET(*lss);
236 for (b = 1; b <= SIGRTMAX; b++) {
237 if (SIGISMEMBER(*bss, b)) {
238 l = bsd_to_linux_signal(b);
240 LINUX_SIGADDSET(*lss, l);
246 * Translate a FreeBSD interface name to a Linux interface name
247 * by interface name, and return the number of bytes copied to lxname.
250 ifname_bsd_to_linux_name(const char *bsdname, char *lxname, size_t len)
252 struct epoch_tracker et;
256 CURVNET_ASSERT_SET();
260 ifp = ifunit(bsdname);
262 ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
268 * Translate a FreeBSD interface name to a Linux interface name
269 * by interface index, and return the number of bytes copied to lxname.
272 ifname_bsd_to_linux_idx(u_int idx, char *lxname, size_t len)
274 struct epoch_tracker et;
279 CURVNET_SET(TD_TO_VNET(curthread));
281 ifp = ifnet_byindex(idx);
283 ret = ifname_bsd_to_linux_ifp(ifp, lxname, len);
290 * Translate a FreeBSD interface name to a Linux interface name,
291 * and return the number of bytes copied to lxname, 0 if interface
292 * not found, -1 on error.
294 struct ifname_bsd_to_linux_ifp_cb_s {
302 ifname_bsd_to_linux_ifp_cb(if_t ifp, void *arg)
304 struct ifname_bsd_to_linux_ifp_cb_s *cbs = arg;
307 return (snprintf(cbs->lxname, cbs->len, "eth%d", cbs->ethno));
314 ifname_bsd_to_linux_ifp(struct ifnet *ifp, char *lxname, size_t len)
316 struct ifname_bsd_to_linux_ifp_cb_s arg = {
326 * Linux loopback interface name is lo (not lo0),
327 * we translate lo to lo0, loX to loX.
329 if (IFP_IS_LOOP(ifp) && strncmp(if_name(ifp), "lo0", IFNAMSIZ) == 0)
330 return (strlcpy(lxname, "lo", len));
332 /* Short-circuit non ethernet interfaces. */
333 if (!IFP_IS_ETH(ifp) || linux_use_real_ifname(ifp))
334 return (strlcpy(lxname, if_name(ifp), len));
336 /* Determine the (relative) unit number for ethernet interfaces. */
337 return (if_foreach(ifname_bsd_to_linux_ifp_cb, &arg));
341 * Translate a Linux interface name to a FreeBSD interface name,
342 * and return the associated ifnet structure
343 * bsdname and lxname need to be least IFNAMSIZ bytes long, but
344 * can point to the same buffer.
346 struct ifname_linux_to_ifp_cb_s {
356 ifname_linux_to_ifp_cb(if_t ifp, void *arg)
358 struct ifname_linux_to_ifp_cb_s *cbs = arg;
363 * Allow Linux programs to use FreeBSD names. Don't presume
364 * we never have an interface named "eth", so don't make
365 * the test optional based on is_eth.
367 if (strncmp(if_name(ifp), cbs->lxname, LINUX_IFNAMSIZ) == 0)
369 if (cbs->is_eth && IFP_IS_ETH(ifp) && cbs->unit == cbs->ethno)
371 if (cbs->is_lo && IFP_IS_LOOP(ifp))
383 ifname_linux_to_ifp(struct thread *td, const char *lxname)
385 struct ifname_linux_to_ifp_cb_s arg = {
395 for (len = 0; len < LINUX_IFNAMSIZ; ++len)
396 if (!isalpha(lxname[len]) || lxname[len] == '\0')
398 if (len == 0 || len == LINUX_IFNAMSIZ)
401 * Linux loopback interface name is lo (not lo0),
402 * we translate lo to lo0, loX to loX.
404 arg.is_lo = (len == 2 && strncmp(lxname, "lo", LINUX_IFNAMSIZ) == 0);
405 arg.unit = (int)strtoul(lxname + len, &ep, 10);
406 if ((ep == NULL || ep == lxname + len || ep >= lxname + LINUX_IFNAMSIZ) &&
409 arg.is_eth = (len == 3 && strncmp(lxname, "eth", len) == 0);
411 if_foreach(ifname_linux_to_ifp_cb, &arg);
416 ifname_linux_to_bsd(struct thread *td, const char *lxname, char *bsdname)
418 struct epoch_tracker et;
421 CURVNET_SET(TD_TO_VNET(td));
423 ifp = ifname_linux_to_ifp(td, lxname);
424 if (ifp != NULL && bsdname != NULL)
425 strlcpy(bsdname, if_name(ifp), IFNAMSIZ);
428 return (ifp != NULL ? 0 : EINVAL);
432 linux_ifflags(struct ifnet *ifp)
434 unsigned short flags;
438 flags = if_getflags(ifp) | if_getdrvflags(ifp);
439 return (bsd_to_linux_ifflags(flags));
443 bsd_to_linux_ifflags(int fl)
445 unsigned short flags = 0;
448 flags |= LINUX_IFF_UP;
449 if (fl & IFF_BROADCAST)
450 flags |= LINUX_IFF_BROADCAST;
452 flags |= LINUX_IFF_DEBUG;
453 if (fl & IFF_LOOPBACK)
454 flags |= LINUX_IFF_LOOPBACK;
455 if (fl & IFF_POINTOPOINT)
456 flags |= LINUX_IFF_POINTOPOINT;
457 if (fl & IFF_DRV_RUNNING)
458 flags |= LINUX_IFF_RUNNING;
460 flags |= LINUX_IFF_NOARP;
461 if (fl & IFF_PROMISC)
462 flags |= LINUX_IFF_PROMISC;
463 if (fl & IFF_ALLMULTI)
464 flags |= LINUX_IFF_ALLMULTI;
465 if (fl & IFF_MULTICAST)
466 flags |= LINUX_IFF_MULTICAST;
471 linux_ifhwaddr_cb(void *arg, struct ifaddr *ifa, u_int count)
473 struct sockaddr_dl *sdl = (struct sockaddr_dl *)ifa->ifa_addr;
474 struct l_sockaddr *lsa = arg;
478 if (sdl->sdl_type != IFT_ETHER)
480 bzero(lsa, sizeof(*lsa));
481 lsa->sa_family = LINUX_ARPHRD_ETHER;
482 bcopy(LLADDR(sdl), lsa->sa_data, LINUX_IFHWADDRLEN);
487 linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa)
492 if (IFP_IS_LOOP(ifp)) {
493 bzero(lsa, sizeof(*lsa));
494 lsa->sa_family = LINUX_ARPHRD_LOOPBACK;
497 if (!IFP_IS_ETH(ifp))
499 if (if_foreach_addr_type(ifp, AF_LINK, linux_ifhwaddr_cb, lsa) > 0)
505 linux_to_bsd_domain(int domain)
509 case LINUX_AF_UNSPEC:
521 case LINUX_AF_APPLETALK:
522 return (AF_APPLETALK);
523 case LINUX_AF_NETLINK:
530 bsd_to_linux_domain(int domain)
535 return (LINUX_AF_UNSPEC);
537 return (LINUX_AF_UNIX);
539 return (LINUX_AF_INET);
541 return (LINUX_AF_INET6);
543 return (LINUX_AF_AX25);
545 return (LINUX_AF_IPX);
547 return (LINUX_AF_APPLETALK);
549 return (LINUX_AF_NETLINK);
555 * Based on the fact that:
556 * 1. Native and Linux storage of struct sockaddr
557 * and struct sockaddr_in6 are equal.
558 * 2. On Linux sa_family is the first member of all struct sockaddr.
561 bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa,
564 struct l_sockaddr *kosa;
568 if (len < 2 || len > UCHAR_MAX)
570 bdom = bsd_to_linux_domain(sa->sa_family);
572 return (EAFNOSUPPORT);
574 kosa = malloc(len, M_LINUX, M_WAITOK);
575 bcopy(sa, kosa, len);
576 kosa->sa_family = bdom;
582 linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap,
586 struct l_sockaddr *kosa;
588 struct sockaddr_in6 *sin6;
592 int salen, bdom, error, hdrlen, namelen;
594 if (*len < 2 || *len > UCHAR_MAX)
602 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
603 * if it's a v4-mapped address, so reserve the proper space
606 if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) {
607 salen += sizeof(uint32_t);
612 kosa = malloc(salen, M_SONAME, M_WAITOK);
614 if ((error = copyin(osa, kosa, *len)))
617 bdom = linux_to_bsd_domain(kosa->sa_family);
619 error = EAFNOSUPPORT;
625 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
626 * which lacks the scope id compared with RFC2553 one. If we detect
627 * the situation, reject the address and write a message to system log.
629 * Still accept addresses for which the scope id is not used.
632 if (bdom == AF_INET6) {
633 sin6 = (struct sockaddr_in6 *)kosa;
634 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
635 (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
636 !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
637 !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
638 !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
639 !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
640 sin6->sin6_scope_id = 0;
643 "obsolete pre-RFC2553 sockaddr_in6 rejected");
648 salen -= sizeof(uint32_t);
651 if (bdom == AF_INET) {
652 if (salen < sizeof(struct sockaddr_in)) {
656 salen = sizeof(struct sockaddr_in);
659 if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) {
660 hdrlen = offsetof(struct sockaddr_un, sun_path);
661 name = ((struct sockaddr_un *)kosa)->sun_path;
664 * Linux abstract namespace starts with a NULL byte.
665 * XXX We do not support abstract namespace yet.
667 namelen = strnlen(name + 1, salen - hdrlen - 1) + 1;
669 namelen = strnlen(name, salen - hdrlen);
670 salen = hdrlen + namelen;
671 if (salen > sizeof(struct sockaddr_un)) {
672 error = ENAMETOOLONG;
677 if (bdom == AF_NETLINK) {
678 if (salen < sizeof(struct sockaddr_nl)) {
682 salen = sizeof(struct sockaddr_nl);
685 sa = (struct sockaddr *)kosa;
686 sa->sa_family = bdom;
694 free(kosa, M_SONAME);
699 linux_dev_shm_create(void)
703 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_shm_cdev,
704 &dev_shm_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0, "shm/.mountpoint");
706 printf("%s: failed to create device node, error %d\n",
712 linux_dev_shm_destroy(void)
715 destroy_dev(dev_shm_cdev);
719 bsd_to_linux_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
720 size_t mapcnt, int no_value)
722 int bsd_mask, bsd_value, linux_mask, linux_value;
729 for (i = 0; i < mapcnt; ++i) {
730 bsd_mask = bitmap[i].bsd_mask;
731 bsd_value = bitmap[i].bsd_value;
733 bsd_mask = bsd_value;
735 linux_mask = bitmap[i].linux_mask;
736 linux_value = bitmap[i].linux_value;
738 linux_mask = linux_value;
741 * If a mask larger than just the value is set, we explicitly
742 * want to make sure that only this bit we mapped within that
745 if ((value & bsd_mask) == bsd_value) {
746 linux_ret = (linux_ret & ~linux_mask) | linux_value;
757 linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
758 size_t mapcnt, int no_value)
760 int bsd_mask, bsd_value, linux_mask, linux_value;
767 for (i = 0; i < mapcnt; ++i) {
768 bsd_mask = bitmap[i].bsd_mask;
769 bsd_value = bitmap[i].bsd_value;
771 bsd_mask = bsd_value;
773 linux_mask = bitmap[i].linux_mask;
774 linux_value = bitmap[i].linux_value;
776 linux_mask = linux_value;
779 * If a mask larger than just the value is set, we explicitly
780 * want to make sure that only this bit we mapped within that
783 if ((value & linux_mask) == linux_value) {
784 bsd_ret = (bsd_ret & ~bsd_mask) | bsd_value;
795 linux_to_bsd_poll_events(struct thread *td, int fd, short lev,
802 if (lev & LINUX_POLLIN)
804 if (lev & LINUX_POLLPRI)
806 if (lev & LINUX_POLLOUT)
808 if (lev & LINUX_POLLERR)
810 if (lev & LINUX_POLLHUP)
812 if (lev & LINUX_POLLNVAL)
814 if (lev & LINUX_POLLRDNORM)
816 if (lev & LINUX_POLLRDBAND)
818 if (lev & LINUX_POLLWRBAND)
820 if (lev & LINUX_POLLWRNORM)
823 if (lev & LINUX_POLLRDHUP) {
825 * It seems that the Linux silencly ignores POLLRDHUP
826 * on non-socket file descriptors unlike FreeBSD, where
827 * events bits is more strictly checked (POLLSTANDARD).
829 error = fget_unlocked(td, fd, &cap_no_rights, &fp);
832 * XXX. On FreeBSD POLLRDHUP applies only to
835 if (fp->f_type == DTYPE_SOCKET)
841 if (lev & LINUX_POLLMSG)
842 LINUX_RATELIMIT_MSG_OPT1("unsupported POLLMSG, events(%d)", lev);
843 if (lev & LINUX_POLLREMOVE)
844 LINUX_RATELIMIT_MSG_OPT1("unsupported POLLREMOVE, events(%d)", lev);
850 bsd_to_linux_poll_events(short bev, short *lev)
855 bits |= LINUX_POLLIN;
857 bits |= LINUX_POLLPRI;
858 if (bev & (POLLOUT | POLLWRNORM))
860 * POLLWRNORM is equal to POLLOUT on FreeBSD,
863 bits |= LINUX_POLLOUT;
865 bits |= LINUX_POLLERR;
867 bits |= LINUX_POLLHUP;
869 bits |= LINUX_POLLNVAL;
870 if (bev & POLLRDNORM)
871 bits |= LINUX_POLLRDNORM;
872 if (bev & POLLRDBAND)
873 bits |= LINUX_POLLRDBAND;
874 if (bev & POLLWRBAND)
875 bits |= LINUX_POLLWRBAND;
877 bits |= LINUX_POLLRDHUP;
883 linux_use_real_ifname(const struct ifnet *ifp)
886 return (use_real_ifnames);