From e0c15f45abd4bd5165e11b557a8c90d0faf5cfeb Mon Sep 17 00:00:00 2001 From: Kristof Provost Date: Mon, 18 Jan 2021 16:55:53 -0500 Subject: [PATCH] MFC r368237: if: Fix panic when destroying vnet and epair simultaneously When destroying a vnet and an epair (with one end in the vnet) we often panicked. This was the result of the destruction of the epair, which destroys both ends simultaneously, happening while vnet_if_return() was moving the struct ifnet to its home vnet. This can result in a freed ifnet being re-added to the home vnet V_ifnet list. That in turn panics the next time the ifnet is used. Prevent this race by ensuring that vnet_if_return() cannot run at the same time as if_detach() or epair_clone_destroy(). PR: 238870, 234985, 244703, 250870 Sponsored by: Modirum MDPay Approved by: so --- sys/net/if.c | 147 ++++++++++++++++++++++++++++++++--------------- sys/net/if_var.h | 24 ++------ 2 files changed, 104 insertions(+), 67 deletions(-) diff --git a/sys/net/if.c b/sys/net/if.c index 4ec03f9f12e..1e423949712 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -273,6 +273,8 @@ static int if_getgroupmembers(struct ifgroupreq *); static void if_delgroups(struct ifnet *); static void if_attach_internal(struct ifnet *, int, struct if_clone *); static int if_detach_internal(struct ifnet *, int, struct if_clone **); +static void if_link_ifnet(struct ifnet *); +static bool if_unlink_ifnet(struct ifnet *, bool); #ifdef VIMAGE static void if_vmove(struct ifnet *, struct vnet *); #endif @@ -304,18 +306,17 @@ VNET_DEFINE(struct ifnet **, ifindex_table); /* * The global network interface list (V_ifnet) and related state (such as - * if_index, if_indexlim, and ifindex_table) are protected by an sxlock and - * an rwlock. Either may be acquired shared to stablize the list, but both - * must be acquired writable to modify the list. This model allows us to - * both stablize the interface list during interrupt thread processing, but - * also to stablize it over long-running ioctls, without introducing priority - * inversions and deadlocks. + * if_index, if_indexlim, and ifindex_table) are protected by an sxlock. + * This may be acquired to stabilise the list, or we may rely on NET_EPOCH. */ struct rwlock ifnet_rwlock; RW_SYSINIT_FLAGS(ifnet_rw, &ifnet_rwlock, "ifnet_rw", RW_RECURSE); struct sx ifnet_sxlock; SX_SYSINIT_FLAGS(ifnet_sx, &ifnet_sxlock, "ifnet_sx", SX_RECURSE); +struct sx ifnet_detach_sxlock; +SX_SYSINIT(ifnet_detach, &ifnet_detach_sxlock, "ifnet_detach_sx"); + /* * The allocation of network interfaces is a rather non-atomic affair; we * need to select an index before we are ready to expose the interface for @@ -475,17 +476,87 @@ vnet_if_uninit(const void *unused __unused) } VNET_SYSUNINIT(vnet_if_uninit, SI_SUB_INIT_IF, SI_ORDER_FIRST, vnet_if_uninit, NULL); +#endif +static void +if_link_ifnet(struct ifnet *ifp) +{ + + IFNET_WLOCK(); + CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link); +#ifdef VIMAGE + curvnet->vnet_ifcnt++; +#endif + IFNET_WUNLOCK(); +} + +static bool +if_unlink_ifnet(struct ifnet *ifp, bool vmove) +{ + struct ifnet *iter; + int found = 0; + + IFNET_WLOCK(); + CK_STAILQ_FOREACH(iter, &V_ifnet, if_link) + if (iter == ifp) { + CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link); + if (!vmove) + ifp->if_flags |= IFF_DYING; + found = 1; + break; + } +#ifdef VIMAGE + curvnet->vnet_ifcnt--; +#endif + IFNET_WUNLOCK(); + + return (found); +} + +#ifdef VIMAGE static void vnet_if_return(const void *unused __unused) { struct ifnet *ifp, *nifp; + struct ifnet **pending; + int found, i; + + i = 0; + + /* + * We need to protect our access to the V_ifnet tailq. Ordinarily we'd + * enter NET_EPOCH, but that's not possible, because if_vmove() calls + * if_detach_internal(), which waits for NET_EPOCH callbacks to + * complete. We can't do that from within NET_EPOCH. + * + * However, we can also use the IFNET_xLOCK, which is the V_ifnet + * read/write lock. We cannot hold the lock as we call if_vmove() + * though, as that presents LOR w.r.t ifnet_sx, in_multi_sx and iflib + * ctx lock. + */ + IFNET_WLOCK(); + + pending = malloc(sizeof(struct ifnet *) * curvnet->vnet_ifcnt, + M_IFNET, M_WAITOK | M_ZERO); /* Return all inherited interfaces to their parent vnets. */ CK_STAILQ_FOREACH_SAFE(ifp, &V_ifnet, if_link, nifp) { - if (ifp->if_home_vnet != ifp->if_vnet) - if_vmove(ifp, ifp->if_home_vnet); + if (ifp->if_home_vnet != ifp->if_vnet) { + found = if_unlink_ifnet(ifp, true); + MPASS(found); + + pending[i++] = ifp; + } + } + IFNET_WUNLOCK(); + + for (int j = 0; j < i; j++) { + sx_xlock(&ifnet_detach_sxlock); + if_vmove(pending[j], pending[j]->if_home_vnet); + sx_xunlock(&ifnet_detach_sxlock); } + + free(pending, M_IFNET); } VNET_SYSUNINIT(vnet_if_return, SI_SUB_VNET_DONE, SI_ORDER_ANY, vnet_if_return, NULL); @@ -893,12 +964,7 @@ if_attach_internal(struct ifnet *ifp, int vmove, struct if_clone *ifc) } #endif - IFNET_WLOCK(); - CK_STAILQ_INSERT_TAIL(&V_ifnet, ifp, if_link); -#ifdef VIMAGE - curvnet->vnet_ifcnt++; -#endif - IFNET_WUNLOCK(); + if_link_ifnet(ifp); if (domain_init_status >= 2) if_attachdomain1(ifp); @@ -1036,9 +1102,15 @@ if_purgemaddrs(struct ifnet *ifp) void if_detach(struct ifnet *ifp) { + bool found; CURVNET_SET_QUIET(ifp->if_vnet); - if_detach_internal(ifp, 0, NULL); + found = if_unlink_ifnet(ifp, false); + if (found) { + sx_slock(&ifnet_detach_sxlock); + if_detach_internal(ifp, 0, NULL); + sx_sunlock(&ifnet_detach_sxlock); + } CURVNET_RESTORE(); } @@ -1058,47 +1130,17 @@ if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) struct ifaddr *ifa; int i; struct domain *dp; - struct ifnet *iter; - int found = 0; #ifdef VIMAGE int shutdown; shutdown = (ifp->if_vnet->vnet_state > SI_SUB_VNET && ifp->if_vnet->vnet_state < SI_SUB_VNET_DONE) ? 1 : 0; #endif - IFNET_WLOCK(); - CK_STAILQ_FOREACH(iter, &V_ifnet, if_link) - if (iter == ifp) { - CK_STAILQ_REMOVE(&V_ifnet, ifp, ifnet, if_link); - if (!vmove) - ifp->if_flags |= IFF_DYING; - found = 1; - break; - } - IFNET_WUNLOCK(); - if (!found) { - /* - * While we would want to panic here, we cannot - * guarantee that the interface is indeed still on - * the list given we don't hold locks all the way. - */ - return (ENOENT); -#if 0 - if (vmove) - panic("%s: ifp=%p not on the ifnet tailq %p", - __func__, ifp, &V_ifnet); - else - return; /* XXX this should panic as well? */ -#endif - } /* * At this point we know the interface still was on the ifnet list * and we removed it so we are in a stable state. */ -#ifdef VIMAGE - curvnet->vnet_ifcnt--; -#endif epoch_wait_preempt(net_epoch_preempt); /* @@ -1315,6 +1357,7 @@ if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid) struct prison *pr; struct ifnet *difp; int shutdown; + bool found; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); @@ -1351,6 +1394,9 @@ if_vmove_loan(struct thread *td, struct ifnet *ifp, char *ifname, int jid) } CURVNET_RESTORE(); + found = if_unlink_ifnet(ifp, true); + MPASS(found); + /* Move the interface into the child jail/vnet. */ if_vmove(ifp, pr->pr_vnet); @@ -1367,7 +1413,8 @@ if_vmove_reclaim(struct thread *td, char *ifname, int jid) struct prison *pr; struct vnet *vnet_dst; struct ifnet *ifp; - int shutdown; + int shutdown; + bool found; /* Try to find the prison within our visibility. */ sx_slock(&allprison_lock); @@ -1405,6 +1452,8 @@ if_vmove_reclaim(struct thread *td, char *ifname, int jid) } /* Get interface back from child jail/vnet. */ + found = if_unlink_ifnet(ifp, true); + MPASS(found); if_vmove(ifp, vnet_dst); CURVNET_RESTORE(); @@ -3102,8 +3151,12 @@ ifioctl(struct socket *so, u_long cmd, caddr_t data, struct thread *td) goto out_noref; case SIOCIFDESTROY: error = priv_check(td, PRIV_NET_IFDESTROY); - if (error == 0) + + if (error == 0) { + sx_slock(&ifnet_detach_sxlock); error = if_clone_destroy(ifr->ifr_name); + sx_sunlock(&ifnet_detach_sxlock); + } goto out_noref; case SIOCIFGCLONERS: diff --git a/sys/net/if_var.h b/sys/net/if_var.h index 173d5755125..6f42a1b1eb6 100644 --- a/sys/net/if_var.h +++ b/sys/net/if_var.h @@ -569,27 +569,11 @@ struct ifmultiaddr { extern struct rwlock ifnet_rwlock; extern struct sx ifnet_sxlock; -#define IFNET_WLOCK() do { \ - sx_xlock(&ifnet_sxlock); \ - rw_wlock(&ifnet_rwlock); \ -} while (0) - -#define IFNET_WUNLOCK() do { \ - rw_wunlock(&ifnet_rwlock); \ - sx_xunlock(&ifnet_sxlock); \ -} while (0) - -/* - * To assert the ifnet lock, you must know not only whether it's for read or - * write, but also whether it was acquired with sleep support or not. - */ -#define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED) +#define IFNET_WLOCK() sx_xlock(&ifnet_sxlock) +#define IFNET_WUNLOCK() sx_xunlock(&ifnet_sxlock) +#define IFNET_RLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_SLOCKED) #define IFNET_RLOCK_NOSLEEP_ASSERT() MPASS(in_epoch(net_epoch_preempt)) -#define IFNET_WLOCK_ASSERT() do { \ - sx_assert(&ifnet_sxlock, SA_XLOCKED); \ - rw_assert(&ifnet_rwlock, RA_WLOCKED); \ -} while (0) - +#define IFNET_WLOCK_ASSERT() sx_assert(&ifnet_sxlock, SA_XLOCKED) #define IFNET_RLOCK() sx_slock(&ifnet_sxlock) #define IFNET_RLOCK_NOSLEEP() struct epoch_tracker ifnet_rlock_et; epoch_enter_preempt(net_epoch_preempt, &ifnet_rlock_et) #define IFNET_RUNLOCK() sx_sunlock(&ifnet_sxlock) -- 2.45.0