From bf8cde114e8766f6ce12baa60d84a4733b8edaaa Mon Sep 17 00:00:00 2001 From: hselasky Date: Wed, 2 Oct 2019 09:22:22 +0000 Subject: [PATCH] Add support for Multi-Physical Function Switch, MPFS, in mlx5en. MPFS is a logical switch in the Mellanox device which forward packets based on a hardware driven L2 address table, to one or more physical- or virtual- functions. The physical- or virtual- function is required to tell the MPFS by using the MPFS firmware commands, which unicast MAC addresses it is requesting from the physical port's traffic. Broadcast and multicast traffic however, is copied to all listening physical- and virtual- functions and does not need a rule in the MPFS switching table. Linux commit: eeb66cdb682678bfd1f02a4547e3649b38ffea7e MFC after: 3 days Sponsored by: Mellanox Technologies --- sys/conf/files | 2 + sys/dev/mlx5/driver.h | 6 + sys/dev/mlx5/mlx5_core/mlx5_main.c | 13 +- sys/dev/mlx5/mlx5_core/mlx5_mpfs.c | 125 +++++++++++++++++++ sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c | 139 +++++++++++++++++++--- sys/dev/mlx5/mpfs.h | 37 ++++++ sys/modules/mlx5/Makefile | 1 + 7 files changed, 304 insertions(+), 19 deletions(-) create mode 100644 sys/dev/mlx5/mlx5_core/mlx5_mpfs.c create mode 100644 sys/dev/mlx5/mpfs.h diff --git a/sys/conf/files b/sys/conf/files index 8d629499623..ef7c803c682 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -4727,6 +4727,8 @@ dev/mlx5/mlx5_core/mlx5_main.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_mcg.c optional mlx5 pci \ compile-with "${OFED_C}" +dev/mlx5/mlx5_core/mlx5_mpfs.c optional mlx5 pci \ + compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_mr.c optional mlx5 pci \ compile-with "${OFED_C}" dev/mlx5/mlx5_core/mlx5_pagealloc.c optional mlx5 pci \ diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h index 9c1d5120c06..f4fd60b8645 100644 --- a/sys/dev/mlx5/driver.h +++ b/sys/dev/mlx5/driver.h @@ -717,6 +717,12 @@ struct mlx5_core_dev { struct mlx5_rsvd_gids reserved_gids; atomic_t roce_en; } roce; + + struct { + spinlock_t spinlock; +#define MLX5_MPFS_TABLE_MAX 32 + long bitmap[BITS_TO_LONGS(MLX5_MPFS_TABLE_MAX)]; + } mpfs; #ifdef CONFIG_MLX5_FPGA struct mlx5_fpga_device *fpga; #endif diff --git a/sys/dev/mlx5/mlx5_core/mlx5_main.c b/sys/dev/mlx5/mlx5_core/mlx5_main.c index b622319560d..c1864765d88 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_main.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_main.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include @@ -1130,10 +1131,16 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_free_comp_eqs; } + err = mlx5_mpfs_init(dev); + if (err) { + mlx5_core_err(dev, "mpfs init failed %d\n", err); + goto err_fs; + } + err = mlx5_fpga_device_start(dev); if (err) { dev_err(&pdev->dev, "fpga device start failed %d\n", err); - goto err_fs; + goto err_mpfs; } err = mlx5_register_device(dev); @@ -1151,6 +1158,9 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, err_fpga: mlx5_fpga_device_stop(dev); +err_mpfs: + mlx5_mpfs_destroy(dev); + err_fs: mlx5_cleanup_fs(dev); @@ -1216,6 +1226,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_unregister_device(dev); mlx5_fpga_device_stop(dev); + mlx5_mpfs_destroy(dev); mlx5_cleanup_fs(dev); unmap_bf_area(dev); mlx5_wait_for_reclaim_vfs_pages(dev); diff --git a/sys/dev/mlx5/mlx5_core/mlx5_mpfs.c b/sys/dev/mlx5/mlx5_core/mlx5_mpfs.c new file mode 100644 index 00000000000..bff31e202c3 --- /dev/null +++ b/sys/dev/mlx5/mlx5_core/mlx5_mpfs.c @@ -0,0 +1,125 @@ +/*- + * Copyright (c) 2019, Mellanox Technologies, Ltd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include +#include + +#include +#include +#include +#include + +#define MPFS_LOCK(dev) spin_lock(&(dev)->mpfs.spinlock) +#define MPFS_UNLOCK(dev) spin_unlock(&(dev)->mpfs.spinlock) + +int +mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u32 *p_index, const u8 *mac) +{ + const u32 l2table_size = MIN(1U << MLX5_CAP_GEN(dev, log_max_l2_table), + MLX5_MPFS_TABLE_MAX); + u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {}; + u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {}; + u8 *in_mac_addr; + u32 index; + int err; + + if (!MLX5_CAP_GEN(dev, eswitch_flow_table)) { + *p_index = 0; + return (0); + } + + MPFS_LOCK(dev); + index = find_first_zero_bit(dev->mpfs.bitmap, l2table_size); + if (index < l2table_size) + set_bit(index, dev->mpfs.bitmap); + MPFS_UNLOCK(dev); + + if (index >= l2table_size) + return (-ENOMEM); + + MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY); + MLX5_SET(set_l2_table_entry_in, in, table_index, index); + + in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address); + ether_addr_copy(&in_mac_addr[2], mac); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err != 0) { + MPFS_LOCK(dev); + clear_bit(index, dev->mpfs.bitmap); + MPFS_UNLOCK(dev); + } else { + *p_index = index; + } + return (err); +} + +int +mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u32 index) +{ + u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {}; + u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {}; + int err; + + if (!MLX5_CAP_GEN(dev, eswitch_flow_table)) { + if (index != 0) + return (-EINVAL); + return (0); + } + + MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY); + MLX5_SET(delete_l2_table_entry_in, in, table_index, index); + + err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); + if (err == 0) { + MPFS_LOCK(dev); + clear_bit(index, dev->mpfs.bitmap); + MPFS_UNLOCK(dev); + } + return (err); +} + +int +mlx5_mpfs_init(struct mlx5_core_dev *dev) +{ + + spin_lock_init(&dev->mpfs.spinlock); + bitmap_zero(dev->mpfs.bitmap, MLX5_MPFS_TABLE_MAX); + return (0); +} + +void +mlx5_mpfs_destroy(struct mlx5_core_dev *dev) +{ + u32 num; + + num = bitmap_weight(dev->mpfs.bitmap, MLX5_MPFS_TABLE_MAX); + if (num != 0) + dev_err(&dev->pdev->dev, "Leaking %u MPFS MAC table entries\n", num); + + spin_lock_destroy(&dev->mpfs.spinlock); +} diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c index dfbf9ff1de9..f2a6b4b8580 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_flow_table.c @@ -29,6 +29,7 @@ #include #include +#include #define MLX5_SET_CFG(p, f, v) MLX5_SET(create_flow_group_in, p, f, v) @@ -54,6 +55,7 @@ enum { struct mlx5e_eth_addr_hash_node { LIST_ENTRY(mlx5e_eth_addr_hash_node) hlist; u8 action; + u32 mpfs_index; struct mlx5e_eth_addr_info ai; }; @@ -63,29 +65,23 @@ mlx5e_hash_eth_addr(const u8 * addr) return (addr[5]); } -static void +static bool mlx5e_add_eth_addr_to_hash(struct mlx5e_eth_addr_hash_head *hash, - const u8 * addr) + struct mlx5e_eth_addr_hash_node *hn_new) { struct mlx5e_eth_addr_hash_node *hn; - int ix = mlx5e_hash_eth_addr(addr); + u32 ix = mlx5e_hash_eth_addr(hn_new->ai.addr); LIST_FOREACH(hn, &hash[ix], hlist) { - if (bcmp(hn->ai.addr, addr, ETHER_ADDR_LEN) == 0) { + if (bcmp(hn->ai.addr, hn_new->ai.addr, ETHER_ADDR_LEN) == 0) { if (hn->action == MLX5E_ACTION_DEL) hn->action = MLX5E_ACTION_NONE; - return; + free(hn_new, M_MLX5EN); + return (false); } } - - hn = malloc(sizeof(*hn), M_MLX5EN, M_NOWAIT | M_ZERO); - if (hn == NULL) - return; - - ether_addr_copy(hn->ai.addr, addr); - hn->action = MLX5E_ACTION_ADD; - - LIST_INSERT_HEAD(&hash[ix], hn, hlist); + LIST_INSERT_HEAD(&hash[ix], hn_new, hlist); + return (true); } static void @@ -757,6 +753,8 @@ mlx5e_execute_action(struct mlx5e_priv *priv, case MLX5E_ACTION_DEL: mlx5e_del_eth_addr_from_flow_table(priv, &hn->ai); + if (hn->mpfs_index != -1U) + mlx5_mpfs_del_mac(priv->mdev, hn->mpfs_index); mlx5e_del_eth_addr_from_hash(hn); break; @@ -765,36 +763,139 @@ mlx5e_execute_action(struct mlx5e_priv *priv, } } +static struct mlx5e_eth_addr_hash_node * +mlx5e_move_hn(struct mlx5e_eth_addr_hash_head *fh, struct mlx5e_eth_addr_hash_head *uh) +{ + struct mlx5e_eth_addr_hash_node *hn; + + hn = LIST_FIRST(fh); + if (hn != NULL) { + LIST_REMOVE(hn, hlist); + LIST_INSERT_HEAD(uh, hn, hlist); + } + return (hn); +} + +static struct mlx5e_eth_addr_hash_node * +mlx5e_remove_hn(struct mlx5e_eth_addr_hash_head *fh) +{ + struct mlx5e_eth_addr_hash_node *hn; + + hn = LIST_FIRST(fh); + if (hn != NULL) + LIST_REMOVE(hn, hlist); + return (hn); +} + static void mlx5e_sync_ifp_addr(struct mlx5e_priv *priv) { + struct mlx5e_eth_addr_hash_head head_free; + struct mlx5e_eth_addr_hash_head head_uc; + struct mlx5e_eth_addr_hash_head head_mc; + struct mlx5e_eth_addr_hash_node *hn; struct ifnet *ifp = priv->ifp; struct ifaddr *ifa; struct ifmultiaddr *ifma; + bool success = false; + size_t x; + size_t num; PRIV_ASSERT_LOCKED(priv); - /* XXX adding this entry might not be needed */ - mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_uc, + LIST_INIT(&head_free); + LIST_INIT(&head_uc); + LIST_INIT(&head_mc); +retry: + num = 1; + + if_addr_rlock(ifp); + CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { + if (ifa->ifa_addr->sa_family != AF_LINK) + continue; + num++; + } + if_addr_runlock(ifp); + + if_maddr_rlock(ifp); + CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_LINK) + continue; + num++; + } + if_maddr_runlock(ifp); + + /* allocate place holders */ + for (x = 0; x != num; x++) { + hn = malloc(sizeof(*hn), M_MLX5EN, M_WAITOK | M_ZERO); + hn->action = MLX5E_ACTION_ADD; + hn->mpfs_index = -1U; + LIST_INSERT_HEAD(&head_free, hn, hlist); + } + + hn = mlx5e_move_hn(&head_free, &head_uc); + if (hn == NULL) + goto cleanup; + + ether_addr_copy(hn->ai.addr, LLADDR((struct sockaddr_dl *)(ifp->if_addr->ifa_addr))); if_addr_rlock(ifp); CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { if (ifa->ifa_addr->sa_family != AF_LINK) continue; - mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_uc, + hn = mlx5e_move_hn(&head_free, &head_uc); + if (hn == NULL) + break; + ether_addr_copy(hn->ai.addr, LLADDR((struct sockaddr_dl *)ifa->ifa_addr)); } if_addr_runlock(ifp); + if (ifa != NULL) + goto cleanup; if_maddr_rlock(ifp); CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { if (ifma->ifma_addr->sa_family != AF_LINK) continue; - mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_mc, + hn = mlx5e_move_hn(&head_free, &head_mc); + if (hn == NULL) + break; + ether_addr_copy(hn->ai.addr, LLADDR((struct sockaddr_dl *)ifma->ifma_addr)); } if_maddr_runlock(ifp); + if (ifma != NULL) + goto cleanup; + + /* insert L2 unicast addresses into hash list */ + + while ((hn = mlx5e_remove_hn(&head_uc)) != NULL) { + if (mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_uc, hn) == 0) + continue; + if (hn->mpfs_index == -1U) + mlx5_mpfs_add_mac(priv->mdev, &hn->mpfs_index, hn->ai.addr); + } + + /* insert L2 multicast addresses into hash list */ + + while ((hn = mlx5e_remove_hn(&head_mc)) != NULL) { + if (mlx5e_add_eth_addr_to_hash(priv->eth_addr.if_mc, hn) == 0) + continue; + } + + success = true; + +cleanup: + while ((hn = mlx5e_remove_hn(&head_uc)) != NULL) + free(hn, M_MLX5EN); + while ((hn = mlx5e_remove_hn(&head_mc)) != NULL) + free(hn, M_MLX5EN); + while ((hn = mlx5e_remove_hn(&head_free)) != NULL) + free(hn, M_MLX5EN); + + if (success == false) + goto retry; } static void mlx5e_fill_addr_array(struct mlx5e_priv *priv, int list_type, @@ -1493,6 +1594,8 @@ mlx5e_open_flow_table(struct mlx5e_priv *priv) void mlx5e_close_flow_table(struct mlx5e_priv *priv) { + + mlx5e_handle_ifp_addr(priv); mlx5e_destroy_inner_rss_flow_table(priv); mlx5e_destroy_main_flow_table(priv); mlx5e_destroy_vlan_flow_table(priv); diff --git a/sys/dev/mlx5/mpfs.h b/sys/dev/mlx5/mpfs.h new file mode 100644 index 00000000000..d5d4809fc68 --- /dev/null +++ b/sys/dev/mlx5/mpfs.h @@ -0,0 +1,37 @@ +/*- + * Copyright (c) 2019, Mellanox Technologies, Ltd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _MLX5_MPFS_H_ +#define _MLX5_MPFS_H_ + +struct mlx5_core_dev; +int mlx5_mpfs_add_mac(struct mlx5_core_dev *dev, u32 *p_index, const u8 *mac); +int mlx5_mpfs_del_mac(struct mlx5_core_dev *dev, u32 index); +int mlx5_mpfs_init(struct mlx5_core_dev *dev); +void mlx5_mpfs_destroy(struct mlx5_core_dev *dev); + +#endif /* _MLX5_MPFS_H_ */ diff --git a/sys/modules/mlx5/Makefile b/sys/modules/mlx5/Makefile index f90db5ba92d..b6ae60a11e5 100644 --- a/sys/modules/mlx5/Makefile +++ b/sys/modules/mlx5/Makefile @@ -19,6 +19,7 @@ mlx5_health.c \ mlx5_mad.c \ mlx5_main.c \ mlx5_mcg.c \ +mlx5_mpfs.c \ mlx5_mr.c \ mlx5_pagealloc.c \ mlx5_pd.c \ -- 2.45.0