2 * SPDX-License-Identifier: BSD-2-Clause
4 * Copyright (c) 2022 Alexander V. Chernikov
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include "opt_inet6.h"
31 #include <sys/types.h>
34 #include <sys/malloc.h>
35 #include <sys/rmlock.h>
36 #include <sys/socket.h>
37 #include <sys/vnode.h>
40 #include <net/if_dl.h>
41 #include <net/route.h>
42 #include <net/route/nhop.h>
43 #include <net/route/route_ctl.h>
44 #include <netlink/netlink.h>
45 #include <netlink/netlink_ctl.h>
46 #include <netlink/netlink_linux.h>
47 #include <netlink/netlink_route.h>
49 #include <compat/linux/linux.h>
50 #include <compat/linux/linux_common.h>
51 #include <compat/linux/linux_util.h>
53 #define DEBUG_MOD_NAME nl_linux
54 #define DEBUG_MAX_LEVEL LOG_DEBUG3
55 #include <netlink/netlink_debug.h>
56 _DECLARE_DEBUG(LOG_INFO);
59 valid_rta_size(const struct rtattr *rta, int sz)
61 return (NL_RTA_DATA_LEN(rta) == sz);
65 valid_rta_u32(const struct rtattr *rta)
67 return (valid_rta_size(rta, sizeof(uint32_t)));
71 _rta_get_uint32(const struct rtattr *rta)
73 return (*((const uint32_t *)NL_RTA_DATA_CONST(rta)));
76 static struct nlmsghdr *
77 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
79 struct ndmsg *ndm = (struct ndmsg *)(hdr + 1);
81 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg))
82 ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family);
87 static struct nlmsghdr *
88 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
90 struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1);
92 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg))
93 ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family);
98 static struct nlmsghdr *
99 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
101 /* Tweak address families and default fib only */
102 struct rtmsg *rtm = (struct rtmsg *)(hdr + 1);
103 struct nlattr *nla, *nla_head;
106 rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family);
108 if (rtm->rtm_table == 254)
111 attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr);
112 attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg));
113 nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg)));
115 NLA_FOREACH(nla, nla_head, attrs_len) {
116 RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d",
117 nla->nla_type, nla->nla_len, attrs_len);
118 struct rtattr *rta = (struct rtattr *)nla;
119 if (rta->rta_len < sizeof(struct rtattr)) {
122 switch (rta->rta_type) {
124 if (!valid_rta_u32(rta))
127 uint32_t fibnum = _rta_get_uint32(rta);
128 RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum);
130 *((uint32_t *)NL_RTA_DATA(rta)) = 0;
140 static struct nlmsghdr *
141 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
143 switch (hdr->nlmsg_type) {
144 case NL_RTM_GETROUTE:
145 case NL_RTM_NEWROUTE:
146 case NL_RTM_DELROUTE:
147 return (rtnl_route_from_linux(hdr, npt));
148 case NL_RTM_GETNEIGH:
149 return (rtnl_neigh_from_linux(hdr, npt));
151 return (rtnl_ifaddr_from_linux(hdr, npt));
152 /* Silence warning for the messages where no translation is required */
158 RT_LOG(LOG_DEBUG, "Passing message type %d untranslated",
165 static struct nlmsghdr *
166 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr,
167 struct nl_pstate *npt)
169 switch (netlink_family) {
171 return (rtnl_from_linux(hdr, npt));
178 /************************************************************
180 ************************************************************/
183 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw)
186 out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char);
188 if (out_hdr != NULL) {
189 memcpy(out_hdr, hdr, hdr->nlmsg_len);
196 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw)
198 return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type,
199 hdr->nlmsg_flags, 0));
203 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz)
205 void *next_hdr = nlmsg_reserve_data(nw, sz, void);
206 memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz));
210 #define nlmsg_copy_next_header(_hdr, _ns, _t) \
211 ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t))))
214 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw)
216 struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr);
218 memcpy(nla, nla_orig, nla_orig->nla_len);
225 * Translate a FreeBSD interface name to a Linux interface name.
228 nlmsg_translate_ifname_nla(struct nlattr *nla, struct nl_writer *nw)
230 char ifname[LINUX_IFNAMSIZ];
232 if (ifname_bsd_to_linux_name((char *)(nla + 1), ifname,
233 sizeof(ifname)) <= 0)
235 return (nlattr_add_string(nw, IFLA_IFNAME, ifname));
238 #define LINUX_NLA_UNHANDLED -1
240 * Translate a FreeBSD attribute to a Linux attribute.
241 * Returns LINUX_NLA_UNHANDLED when the attribute is not processed
242 * and the caller must take care of it, otherwise the result is returned.
245 nlmsg_translate_all_nla(struct nlmsghdr *hdr, struct nlattr *nla,
246 struct nl_writer *nw)
249 switch (hdr->nlmsg_type) {
253 switch (nla->nla_type) {
255 return (nlmsg_translate_ifname_nla(nla, nw));
262 return (LINUX_NLA_UNHANDLED);
266 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw)
271 int hdrlen = NETLINK_ALIGN(raw_hdrlen);
272 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
273 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
275 NLA_FOREACH(nla, nla_head, attrs_len) {
276 RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len);
277 if (nla->nla_len < sizeof(struct nlattr)) {
280 ret = nlmsg_translate_all_nla(hdr, nla, nw);
281 if (ret == LINUX_NLA_UNHANDLED)
282 ret = nlmsg_copy_nla(nla, nw);
288 #undef LINUX_NLA_UNHANDLED
291 rtnl_if_flags_to_linux(unsigned int if_flags)
293 unsigned int result = 0;
295 for (int i = 0; i < 31; i++) {
296 unsigned int flag = 1 << i;
297 if (!(flag & if_flags))
304 case IFF_POINTOPOINT:
305 case IFF_DRV_RUNNING:
312 case IFF_DRV_OACTIVE:
324 /* No Linux analogue */
334 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
335 struct nl_writer *nw)
337 if (!nlmsg_copy_header(hdr, nw))
340 struct ifinfomsg *ifinfo;
341 ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg);
343 ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family);
344 /* Convert interface type */
345 switch (ifinfo->ifi_type) {
347 ifinfo->ifi_type = LINUX_ARPHRD_ETHER;
350 ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags);
352 /* Copy attributes unchanged */
353 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw))
356 /* make ip(8) happy */
357 if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue"))
360 if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000))
364 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
369 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
370 struct nl_writer *nw)
372 if (!nlmsg_copy_header(hdr, nw))
375 struct ifaddrmsg *ifamsg;
376 ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg);
378 ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family);
379 /* XXX: fake ifa_flags? */
381 /* Copy attributes unchanged */
382 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw))
386 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
391 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
392 struct nl_writer *nw)
394 if (!nlmsg_copy_header(hdr, nw))
398 ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg);
400 ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family);
402 /* Copy attributes unchanged */
403 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw))
407 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
412 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
413 struct nl_writer *nw)
415 if (!nlmsg_copy_header(hdr, nw))
419 rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg);
420 rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family);
424 int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg));
425 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
426 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
428 NLA_FOREACH(nla, nla_head, attrs_len) {
429 struct rtattr *rta = (struct rtattr *)nla;
430 //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len);
431 if (rta->rta_len < sizeof(struct rtattr)) {
435 switch (rta->rta_type) {
439 fibnum = _rta_get_uint32(rta);
442 RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum);
443 if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum))
448 if (!nlmsg_copy_nla(nla, nw))
455 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
460 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
462 RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type);
464 switch (hdr->nlmsg_type) {
468 return (rtnl_newlink_to_linux(hdr, nlp, nw));
471 return (rtnl_newaddr_to_linux(hdr, nlp, nw));
472 case NL_RTM_NEWROUTE:
473 case NL_RTM_DELROUTE:
474 return (rtnl_newroute_to_linux(hdr, nlp, nw));
475 case NL_RTM_NEWNEIGH:
476 case NL_RTM_DELNEIGH:
477 case NL_RTM_GETNEIGH:
478 return (rtnl_newneigh_to_linux(hdr, nlp, nw));
480 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
482 return (handle_default_out(hdr, nw));
487 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
489 if (!nlmsg_copy_header(hdr, nw))
492 struct nlmsgerr *nlerr;
493 nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr);
494 nlerr->error = bsd_to_linux_errno(nlerr->error);
496 int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr);
497 if (hdr->nlmsg_len == copied_len) {
503 * CAP_ACK was not set. Original request needs to be translated.
504 * XXX: implement translation of the original message
506 RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated",
507 nlerr->msg.nlmsg_type);
508 char *dst_payload, *src_payload;
509 int copy_len = hdr->nlmsg_len - copied_len;
510 dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char);
512 src_payload = (char *)hdr + copied_len;
514 memcpy(dst_payload, src_payload, copy_len);
521 nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp,
522 struct nl_writer *nw)
524 if (hdr->nlmsg_type < NLMSG_MIN_TYPE) {
525 switch (hdr->nlmsg_type) {
527 return (nlmsg_error_to_linux(hdr, nlp, nw));
531 return (handle_default_out(hdr, nw));
533 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
535 return (handle_default_out(hdr, nw));
539 switch (netlink_family) {
541 return (rtnl_to_linux(hdr, nlp, nw));
543 return (handle_default_out(hdr, nw));
548 nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp)
550 RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length);
551 struct nl_writer nw = {};
553 struct mbuf *m = NULL;
554 if (!nlmsg_get_chain_writer(&nw, data_length, &m)) {
555 RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d",
560 /* Assume correct headers. Buffer IS mutable */
562 for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) {
563 struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset];
564 int msglen = NLMSG_ALIGN(hdr->nlmsg_len);
567 if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) {
568 RT_LOG(LOG_DEBUG, "failed to process msg type %d",
576 RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count,
577 m ? m_length(m, NULL) : 0);
583 mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp)
585 /* XXX: easiest solution, not optimized for performance */
586 int data_length = m_length(m, NULL);
587 char *buf = malloc(data_length, M_LINUX, M_NOWAIT);
589 RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message",
594 m_copydata(m, 0, data_length, buf);
597 m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp);
603 static struct linux_netlink_provider linux_netlink_v1 = {
604 .mbufs_to_linux = mbufs_to_linux,
605 .msgs_to_linux = nlmsgs_to_linux,
606 .msg_from_linux = nlmsg_from_linux,
610 linux_netlink_register(void)
612 linux_netlink_p = &linux_netlink_v1;
616 linux_netlink_deregister(void)
618 linux_netlink_p = NULL;