2 * Copyright (c) 2013-2017, Mellanox Technologies, Ltd. All rights reserved.
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
7 * 1. Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
13 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 #include <linux/etherdevice.h>
29 #include <dev/mlx5/driver.h>
30 #include <dev/mlx5/mlx5_ifc.h>
31 #include <dev/mlx5/vport.h>
32 #include <dev/mlx5/fs.h>
33 #include <dev/mlx5/mpfs.h>
34 #include "mlx5_core.h"
37 #define UPLINK_VPORT 0xFFFF
39 #define MLX5_DEBUG_ESWITCH_MASK BIT(3)
41 #define esw_info(dev, format, ...) \
42 printf("mlx5_core: INFO: ""(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
44 #define esw_warn(dev, format, ...) \
45 printf("mlx5_core: WARN: ""(%s): E-Switch: " format, (dev)->priv.name, ##__VA_ARGS__)
47 #define esw_debug(dev, format, ...) \
48 mlx5_core_dbg_mask(dev, MLX5_DEBUG_ESWITCH_MASK, format, ##__VA_ARGS__)
56 /* E-Switch UC L2 table hash node */
58 struct l2addr_node node;
63 /* E-Switch MC FDB table hash node */
64 struct esw_mc_addr { /* SRIOV only */
65 struct l2addr_node node;
66 struct mlx5_flow_rule *uplink_rule; /* Forward to uplink rule */
70 /* Vport UC/MC hash node */
72 struct l2addr_node node;
75 struct mlx5_flow_rule *flow_rule; /* SRIOV only */
79 UC_ADDR_CHANGE = BIT(0),
80 MC_ADDR_CHANGE = BIT(1),
83 /* Vport context events */
84 #define SRIOV_VPORT_EVENTS (UC_ADDR_CHANGE | \
87 static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
90 int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0};
91 int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0};
94 MLX5_SET(modify_nic_vport_context_in, in,
95 opcode, MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
96 MLX5_SET(modify_nic_vport_context_in, in, field_select.change_event, 1);
97 MLX5_SET(modify_nic_vport_context_in, in, vport_number, vport);
99 MLX5_SET(modify_nic_vport_context_in, in, other_vport, 1);
100 nic_vport_ctx = MLX5_ADDR_OF(modify_nic_vport_context_in,
101 in, nic_vport_context);
103 MLX5_SET(nic_vport_context, nic_vport_ctx, arm_change_event, 1);
105 if (events_mask & UC_ADDR_CHANGE)
106 MLX5_SET(nic_vport_context, nic_vport_ctx,
107 event_on_uc_address_change, 1);
108 if (events_mask & MC_ADDR_CHANGE)
109 MLX5_SET(nic_vport_context, nic_vport_ctx,
110 event_on_mc_address_change, 1);
112 return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
115 /* E-Switch vport context HW commands */
116 static int query_esw_vport_context_cmd(struct mlx5_core_dev *mdev, u32 vport,
117 u32 *out, int outlen)
119 u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {0};
121 MLX5_SET(query_nic_vport_context_in, in, opcode,
122 MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
124 MLX5_SET(query_esw_vport_context_in, in, vport_number, vport);
126 MLX5_SET(query_esw_vport_context_in, in, other_vport, 1);
128 return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
131 static int query_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
134 u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {0};
142 if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
143 !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
146 err = query_esw_vport_context_cmd(dev, vport, out, sizeof(out));
150 cvlan_strip = MLX5_GET(query_esw_vport_context_out, out,
151 esw_vport_context.vport_cvlan_strip);
153 cvlan_insert = MLX5_GET(query_esw_vport_context_out, out,
154 esw_vport_context.vport_cvlan_insert);
156 if (cvlan_strip || cvlan_insert) {
157 *vlan = MLX5_GET(query_esw_vport_context_out, out,
158 esw_vport_context.cvlan_id);
159 *qos = MLX5_GET(query_esw_vport_context_out, out,
160 esw_vport_context.cvlan_pcp);
163 esw_debug(dev, "Query Vport[%d] cvlan: VLAN %d qos=%d\n",
169 static int modify_esw_vport_context_cmd(struct mlx5_core_dev *dev, u16 vport,
172 u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)] = {0};
174 MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
176 MLX5_SET(modify_esw_vport_context_in, in, other_vport, 1);
178 MLX5_SET(modify_esw_vport_context_in, in, opcode,
179 MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
181 return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
184 static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u32 vport,
185 u16 vlan, u8 qos, bool set)
187 u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0};
189 if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
190 !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
193 esw_debug(dev, "Set Vport[%d] VLAN %d qos %d set=%d\n",
194 vport, vlan, qos, set);
197 MLX5_SET(modify_esw_vport_context_in, in,
198 esw_vport_context.vport_cvlan_strip, 1);
199 /* insert only if no vlan in packet */
200 MLX5_SET(modify_esw_vport_context_in, in,
201 esw_vport_context.vport_cvlan_insert, 1);
202 MLX5_SET(modify_esw_vport_context_in, in,
203 esw_vport_context.cvlan_pcp, qos);
204 MLX5_SET(modify_esw_vport_context_in, in,
205 esw_vport_context.cvlan_id, vlan);
208 MLX5_SET(modify_esw_vport_context_in, in,
209 field_select.vport_cvlan_strip, 1);
210 MLX5_SET(modify_esw_vport_context_in, in,
211 field_select.vport_cvlan_insert, 1);
213 return modify_esw_vport_context_cmd(dev, vport, in, sizeof(in));
217 static struct mlx5_flow_rule *
218 esw_fdb_set_vport_rule(struct mlx5_eswitch *esw, u8 mac[ETH_ALEN], u32 vport)
220 int match_header = MLX5_MATCH_OUTER_HEADERS;
221 struct mlx5_flow_destination dest;
222 struct mlx5_flow_rule *flow_rule = NULL;
228 match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
229 match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
230 if (!match_v || !match_c) {
231 printf("mlx5_core: WARN: ""FDB: Failed to alloc match parameters\n");
234 dmac_v = MLX5_ADDR_OF(fte_match_param, match_v,
235 outer_headers.dmac_47_16);
236 dmac_c = MLX5_ADDR_OF(fte_match_param, match_c,
237 outer_headers.dmac_47_16);
239 ether_addr_copy(dmac_v, mac);
240 /* Match criteria mask */
241 memset(dmac_c, 0xff, 6);
243 dest.type = MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT;
244 dest.vport_num = vport;
247 "\tFDB add rule dmac_v(%pM) dmac_c(%pM) -> vport(%d)\n",
248 dmac_v, dmac_c, vport);
250 mlx5_add_flow_rule(esw->fdb_table.fdb,
254 MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,
256 if (IS_ERR_OR_NULL(flow_rule)) {
257 printf("mlx5_core: WARN: ""FDB: Failed to add flow rule: dmac_v(%pM) dmac_c(%pM) -> vport(%d), err(%ld)\n", dmac_v, dmac_c, vport, PTR_ERR(flow_rule));
266 static int esw_create_fdb_table(struct mlx5_eswitch *esw)
268 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
269 struct mlx5_core_dev *dev = esw->dev;
270 struct mlx5_flow_namespace *root_ns;
271 struct mlx5_flow_table *fdb;
272 struct mlx5_flow_group *g;
273 void *match_criteria;
279 esw_debug(dev, "Create FDB log_max_size(%d)\n",
280 MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size));
282 root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
284 esw_warn(dev, "Failed to get FDB flow namespace\n");
288 flow_group_in = mlx5_vzalloc(inlen);
291 memset(flow_group_in, 0, inlen);
293 /* (-2) Since MaorG said so .. */
294 table_size = BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(dev, log_max_ft_size)) - 2;
296 fdb = mlx5_create_flow_table(root_ns, 0, "FDB", table_size);
297 if (IS_ERR_OR_NULL(fdb)) {
299 esw_warn(dev, "Failed to create FDB Table err %d\n", err);
303 MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
304 MLX5_MATCH_OUTER_HEADERS);
305 match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
306 dmac = MLX5_ADDR_OF(fte_match_param, match_criteria, outer_headers.dmac_47_16);
307 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
308 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, table_size - 1);
309 eth_broadcast_addr(dmac);
311 g = mlx5_create_flow_group(fdb, flow_group_in);
312 if (IS_ERR_OR_NULL(g)) {
314 esw_warn(dev, "Failed to create flow group err(%d)\n", err);
318 esw->fdb_table.addr_grp = g;
319 esw->fdb_table.fdb = fdb;
321 kfree(flow_group_in);
322 if (err && !IS_ERR_OR_NULL(fdb))
323 mlx5_destroy_flow_table(fdb);
327 static void esw_destroy_fdb_table(struct mlx5_eswitch *esw)
329 if (!esw->fdb_table.fdb)
332 esw_debug(esw->dev, "Destroy FDB Table\n");
333 mlx5_destroy_flow_group(esw->fdb_table.addr_grp);
334 mlx5_destroy_flow_table(esw->fdb_table.fdb);
335 esw->fdb_table.fdb = NULL;
336 esw->fdb_table.addr_grp = NULL;
339 /* E-Switch vport UC/MC lists management */
340 typedef int (*vport_addr_action)(struct mlx5_eswitch *esw,
341 struct vport_addr *vaddr);
343 static int esw_add_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
345 struct hlist_head *hash = esw->l2_table.l2_hash;
346 struct esw_uc_addr *esw_uc;
347 u8 *mac = vaddr->node.addr;
348 u32 vport = vaddr->vport;
351 esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr);
354 "Failed to set L2 mac(%pM) for vport(%d), mac is already in use by vport(%d)\n",
355 mac, vport, esw_uc->vport);
359 esw_uc = l2addr_hash_add(hash, mac, struct esw_uc_addr, GFP_KERNEL);
362 esw_uc->vport = vport;
364 err = mlx5_mpfs_add_mac(esw->dev, &esw_uc->table_index, mac, 0, 0);
368 if (esw->fdb_table.fdb) /* SRIOV is enabled: Forward UC MAC to vport */
369 vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
371 esw_debug(esw->dev, "\tADDED UC MAC: vport[%d] %pM index:%d fr(%p)\n",
372 vport, mac, esw_uc->table_index, vaddr->flow_rule);
375 l2addr_hash_del(esw_uc);
379 static int esw_del_uc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
381 struct hlist_head *hash = esw->l2_table.l2_hash;
382 struct esw_uc_addr *esw_uc;
383 u8 *mac = vaddr->node.addr;
384 u32 vport = vaddr->vport;
386 esw_uc = l2addr_hash_find(hash, mac, struct esw_uc_addr);
387 if (!esw_uc || esw_uc->vport != vport) {
389 "MAC(%pM) doesn't belong to vport (%d)\n",
393 esw_debug(esw->dev, "\tDELETE UC MAC: vport[%d] %pM index:%d fr(%p)\n",
394 vport, mac, esw_uc->table_index, vaddr->flow_rule);
396 mlx5_mpfs_del_mac(esw->dev, esw_uc->table_index);
398 if (vaddr->flow_rule)
399 mlx5_del_flow_rule(vaddr->flow_rule);
400 vaddr->flow_rule = NULL;
402 l2addr_hash_del(esw_uc);
406 static int esw_add_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
408 struct hlist_head *hash = esw->mc_table;
409 struct esw_mc_addr *esw_mc;
410 u8 *mac = vaddr->node.addr;
411 u32 vport = vaddr->vport;
413 if (!esw->fdb_table.fdb)
416 esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr);
420 esw_mc = l2addr_hash_add(hash, mac, struct esw_mc_addr, GFP_KERNEL);
424 esw_mc->uplink_rule = /* Forward MC MAC to Uplink */
425 esw_fdb_set_vport_rule(esw, mac, UPLINK_VPORT);
428 /* Forward MC MAC to vport */
429 vaddr->flow_rule = esw_fdb_set_vport_rule(esw, mac, vport);
431 "\tADDED MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n",
432 vport, mac, vaddr->flow_rule,
433 esw_mc->refcnt, esw_mc->uplink_rule);
437 static int esw_del_mc_addr(struct mlx5_eswitch *esw, struct vport_addr *vaddr)
439 struct hlist_head *hash = esw->mc_table;
440 struct esw_mc_addr *esw_mc;
441 u8 *mac = vaddr->node.addr;
442 u32 vport = vaddr->vport;
444 if (!esw->fdb_table.fdb)
447 esw_mc = l2addr_hash_find(hash, mac, struct esw_mc_addr);
450 "Failed to find eswitch MC addr for MAC(%pM) vport(%d)",
455 "\tDELETE MC MAC: vport[%d] %pM fr(%p) refcnt(%d) uplinkfr(%p)\n",
456 vport, mac, vaddr->flow_rule, esw_mc->refcnt,
457 esw_mc->uplink_rule);
459 if (vaddr->flow_rule)
460 mlx5_del_flow_rule(vaddr->flow_rule);
461 vaddr->flow_rule = NULL;
463 if (--esw_mc->refcnt)
466 if (esw_mc->uplink_rule)
467 mlx5_del_flow_rule(esw_mc->uplink_rule);
469 l2addr_hash_del(esw_mc);
473 /* Apply vport UC/MC list to HW l2 table and FDB table */
474 static void esw_apply_vport_addr_list(struct mlx5_eswitch *esw,
475 u32 vport_num, int list_type)
477 struct mlx5_vport *vport = &esw->vports[vport_num];
478 bool is_uc = list_type == MLX5_NIC_VPORT_LIST_TYPE_UC;
479 vport_addr_action vport_addr_add;
480 vport_addr_action vport_addr_del;
481 struct vport_addr *addr;
482 struct l2addr_node *node;
483 struct hlist_head *hash;
484 struct hlist_node *tmp;
487 vport_addr_add = is_uc ? esw_add_uc_addr :
489 vport_addr_del = is_uc ? esw_del_uc_addr :
492 hash = is_uc ? vport->uc_list : vport->mc_list;
493 for_each_l2hash_node(node, tmp, hash, hi) {
494 addr = container_of(node, struct vport_addr, node);
495 switch (addr->action) {
496 case MLX5_ACTION_ADD:
497 vport_addr_add(esw, addr);
498 addr->action = MLX5_ACTION_NONE;
500 case MLX5_ACTION_DEL:
501 vport_addr_del(esw, addr);
502 l2addr_hash_del(addr);
508 /* Sync vport UC/MC list from vport context */
509 static void esw_update_vport_addr_list(struct mlx5_eswitch *esw,
510 u32 vport_num, int list_type)
512 struct mlx5_vport *vport = &esw->vports[vport_num];
513 bool is_uc = list_type == MLX5_NIC_VPORT_LIST_TYPE_UC;
514 u8 (*mac_list)[ETH_ALEN];
515 struct l2addr_node *node;
516 struct vport_addr *addr;
517 struct hlist_head *hash;
518 struct hlist_node *tmp;
524 size = is_uc ? MLX5_MAX_UC_PER_VPORT(esw->dev) :
525 MLX5_MAX_MC_PER_VPORT(esw->dev);
527 mac_list = kcalloc(size, ETH_ALEN, GFP_KERNEL);
531 hash = is_uc ? vport->uc_list : vport->mc_list;
533 for_each_l2hash_node(node, tmp, hash, hi) {
534 addr = container_of(node, struct vport_addr, node);
535 addr->action = MLX5_ACTION_DEL;
538 err = mlx5_query_nic_vport_mac_list(esw->dev, vport_num, list_type,
542 esw_debug(esw->dev, "vport[%d] context update %s list size (%d)\n",
543 vport_num, is_uc ? "UC" : "MC", size);
545 for (i = 0; i < size; i++) {
546 if (is_uc && !is_valid_ether_addr(mac_list[i]))
549 if (!is_uc && !is_multicast_ether_addr(mac_list[i]))
552 addr = l2addr_hash_find(hash, mac_list[i], struct vport_addr);
554 addr->action = MLX5_ACTION_NONE;
558 addr = l2addr_hash_add(hash, mac_list[i], struct vport_addr,
562 "Failed to add MAC(%pM) to vport[%d] DB\n",
563 mac_list[i], vport_num);
566 addr->vport = vport_num;
567 addr->action = MLX5_ACTION_ADD;
572 static void esw_vport_change_handler(struct work_struct *work)
574 struct mlx5_vport *vport =
575 container_of(work, struct mlx5_vport, vport_change_handler);
576 struct mlx5_core_dev *dev = vport->dev;
577 struct mlx5_eswitch *esw = dev->priv.eswitch;
580 mlx5_query_nic_vport_mac_address(dev, vport->vport, mac);
581 esw_debug(dev, "vport[%d] Context Changed: perm mac: %pM\n",
584 if (vport->enabled_events & UC_ADDR_CHANGE) {
585 esw_update_vport_addr_list(esw, vport->vport,
586 MLX5_NIC_VPORT_LIST_TYPE_UC);
587 esw_apply_vport_addr_list(esw, vport->vport,
588 MLX5_NIC_VPORT_LIST_TYPE_UC);
591 if (vport->enabled_events & MC_ADDR_CHANGE) {
592 esw_update_vport_addr_list(esw, vport->vport,
593 MLX5_NIC_VPORT_LIST_TYPE_MC);
594 esw_apply_vport_addr_list(esw, vport->vport,
595 MLX5_NIC_VPORT_LIST_TYPE_MC);
598 esw_debug(esw->dev, "vport[%d] Context Changed: Done\n", vport->vport);
600 arm_vport_context_events_cmd(dev, vport->vport,
601 vport->enabled_events);
604 static void esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
605 struct mlx5_vport *vport)
607 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
608 struct mlx5_flow_group *vlan_grp = NULL;
609 struct mlx5_flow_group *drop_grp = NULL;
610 struct mlx5_core_dev *dev = esw->dev;
611 struct mlx5_flow_namespace *root_ns;
612 struct mlx5_flow_table *acl;
613 void *match_criteria;
619 if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
622 esw_debug(dev, "Create vport[%d] egress ACL log_max_size(%d)\n",
623 vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
625 root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS);
627 esw_warn(dev, "Failed to get E-Switch egress flow namespace\n");
631 flow_group_in = mlx5_vzalloc(inlen);
635 snprintf(table_name, 32, "egress_%d", vport->vport);
636 acl = mlx5_create_vport_flow_table(root_ns, vport->vport, 0, table_name, table_size);
637 if (IS_ERR_OR_NULL(acl)) {
639 esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n",
644 MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
645 match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
646 MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
647 MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid);
648 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
649 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
651 vlan_grp = mlx5_create_flow_group(acl, flow_group_in);
652 if (IS_ERR_OR_NULL(vlan_grp)) {
653 err = PTR_ERR(vlan_grp);
654 esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n",
659 memset(flow_group_in, 0, inlen);
660 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
661 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
662 drop_grp = mlx5_create_flow_group(acl, flow_group_in);
663 if (IS_ERR_OR_NULL(drop_grp)) {
664 err = PTR_ERR(drop_grp);
665 esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n",
670 vport->egress.acl = acl;
671 vport->egress.drop_grp = drop_grp;
672 vport->egress.allowed_vlans_grp = vlan_grp;
674 kfree(flow_group_in);
675 if (err && !IS_ERR_OR_NULL(vlan_grp))
676 mlx5_destroy_flow_group(vlan_grp);
677 if (err && !IS_ERR_OR_NULL(acl))
678 mlx5_destroy_flow_table(acl);
681 static void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
682 struct mlx5_vport *vport)
684 if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan))
685 mlx5_del_flow_rule(vport->egress.allowed_vlan);
687 if (!IS_ERR_OR_NULL(vport->egress.drop_rule))
688 mlx5_del_flow_rule(vport->egress.drop_rule);
690 vport->egress.allowed_vlan = NULL;
691 vport->egress.drop_rule = NULL;
694 static void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
695 struct mlx5_vport *vport)
697 if (IS_ERR_OR_NULL(vport->egress.acl))
700 esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport);
702 esw_vport_cleanup_egress_rules(esw, vport);
703 mlx5_destroy_flow_group(vport->egress.allowed_vlans_grp);
704 mlx5_destroy_flow_group(vport->egress.drop_grp);
705 mlx5_destroy_flow_table(vport->egress.acl);
706 vport->egress.allowed_vlans_grp = NULL;
707 vport->egress.drop_grp = NULL;
708 vport->egress.acl = NULL;
711 static void esw_vport_enable_ingress_acl(struct mlx5_eswitch *esw,
712 struct mlx5_vport *vport)
714 int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
715 struct mlx5_core_dev *dev = esw->dev;
716 struct mlx5_flow_namespace *root_ns;
717 struct mlx5_flow_table *acl;
718 struct mlx5_flow_group *g;
719 void *match_criteria;
725 if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
728 esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n",
729 vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
731 root_ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS);
733 esw_warn(dev, "Failed to get E-Switch ingress flow namespace\n");
737 flow_group_in = mlx5_vzalloc(inlen);
741 snprintf(table_name, 32, "ingress_%d", vport->vport);
742 acl = mlx5_create_vport_flow_table(root_ns, vport->vport, 0, table_name, table_size);
743 if (IS_ERR_OR_NULL(acl)) {
745 esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow Table, err(%d)\n",
750 MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
751 match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
752 MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
753 MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
754 MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
756 g = mlx5_create_flow_group(acl, flow_group_in);
757 if (IS_ERR_OR_NULL(g)) {
759 esw_warn(dev, "Failed to create E-Switch vport[%d] ingress flow group, err(%d)\n",
764 vport->ingress.acl = acl;
765 vport->ingress.drop_grp = g;
767 kfree(flow_group_in);
768 if (err && !IS_ERR_OR_NULL(acl))
769 mlx5_destroy_flow_table(acl);
772 static void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
773 struct mlx5_vport *vport)
775 if (!IS_ERR_OR_NULL(vport->ingress.drop_rule))
776 mlx5_del_flow_rule(vport->ingress.drop_rule);
777 vport->ingress.drop_rule = NULL;
780 static void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
781 struct mlx5_vport *vport)
783 if (IS_ERR_OR_NULL(vport->ingress.acl))
786 esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport);
788 esw_vport_cleanup_ingress_rules(esw, vport);
789 mlx5_destroy_flow_group(vport->ingress.drop_grp);
790 mlx5_destroy_flow_table(vport->ingress.acl);
791 vport->ingress.acl = NULL;
792 vport->ingress.drop_grp = NULL;
795 static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
796 struct mlx5_vport *vport)
798 struct mlx5_flow_destination dest;
803 if (IS_ERR_OR_NULL(vport->ingress.acl)) {
805 "vport[%d] configure ingress rules failed, ingress acl is not initialized!\n",
810 esw_vport_cleanup_ingress_rules(esw, vport);
812 if (!vport->vlan && !vport->qos)
816 "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
817 vport->vport, vport->vlan, vport->qos);
819 match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
820 match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
821 if (!match_v || !match_c) {
823 esw_warn(esw->dev, "vport[%d] configure ingress rules failed, err(%d)\n",
827 MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.cvlan_tag);
828 MLX5_SET_TO_ONES(fte_match_param, match_v, outer_headers.cvlan_tag);
830 dest.type = MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT;
831 dest.vport_num = vport->vport;
833 vport->ingress.drop_rule =
834 mlx5_add_flow_rule(vport->ingress.acl,
835 MLX5_MATCH_OUTER_HEADERS,
838 MLX5_FLOW_CONTEXT_ACTION_DROP,
840 if (IS_ERR_OR_NULL(vport->ingress.drop_rule)) {
841 err = PTR_ERR(vport->ingress.drop_rule);
842 printf("mlx5_core: WARN: ""vport[%d] configure ingress rules, err(%d)\n", vport->vport, err);
843 vport->ingress.drop_rule = NULL;
851 static int esw_vport_egress_config(struct mlx5_eswitch *esw,
852 struct mlx5_vport *vport)
854 struct mlx5_flow_destination dest;
859 if (IS_ERR_OR_NULL(vport->egress.acl)) {
860 esw_warn(esw->dev, "vport[%d] configure rgress rules failed, egress acl is not initialized!\n",
865 esw_vport_cleanup_egress_rules(esw, vport);
867 if (!vport->vlan && !vport->qos)
871 "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
872 vport->vport, vport->vlan, vport->qos);
874 match_v = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
875 match_c = kzalloc(MLX5_ST_SZ_BYTES(fte_match_param), GFP_KERNEL);
876 if (!match_v || !match_c) {
878 esw_warn(esw->dev, "vport[%d] configure egress rules failed, err(%d)\n",
883 /* Allowed vlan rule */
884 MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.cvlan_tag);
885 MLX5_SET_TO_ONES(fte_match_param, match_v, outer_headers.cvlan_tag);
886 MLX5_SET_TO_ONES(fte_match_param, match_c, outer_headers.first_vid);
887 MLX5_SET(fte_match_param, match_v, outer_headers.first_vid, vport->vlan);
889 dest.type = MLX5_FLOW_CONTEXT_DEST_TYPE_VPORT;
890 dest.vport_num = vport->vport;
892 vport->egress.allowed_vlan =
893 mlx5_add_flow_rule(vport->egress.acl,
894 MLX5_MATCH_OUTER_HEADERS,
897 MLX5_FLOW_CONTEXT_ACTION_ALLOW,
899 if (IS_ERR_OR_NULL(vport->egress.allowed_vlan)) {
900 err = PTR_ERR(vport->egress.allowed_vlan);
901 printf("mlx5_core: WARN: ""vport[%d] configure egress allowed vlan rule failed, err(%d)\n", vport->vport, err);
902 vport->egress.allowed_vlan = NULL;
906 /* Drop others rule (star rule) */
907 memset(match_c, 0, MLX5_ST_SZ_BYTES(fte_match_param));
908 memset(match_v, 0, MLX5_ST_SZ_BYTES(fte_match_param));
909 vport->egress.drop_rule =
910 mlx5_add_flow_rule(vport->egress.acl,
914 MLX5_FLOW_CONTEXT_ACTION_DROP,
916 if (IS_ERR_OR_NULL(vport->egress.drop_rule)) {
917 err = PTR_ERR(vport->egress.drop_rule);
918 printf("mlx5_core: WARN: ""vport[%d] configure egress drop rule failed, err(%d)\n", vport->vport, err);
919 vport->egress.drop_rule = NULL;
927 static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num,
930 struct mlx5_vport *vport = &esw->vports[vport_num];
933 mutex_lock(&vport->state_lock);
934 WARN_ON(vport->enabled);
936 esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num);
938 if (vport_num) { /* Only VFs need ACLs for VST and spoofchk filtering */
939 esw_vport_enable_ingress_acl(esw, vport);
940 esw_vport_enable_egress_acl(esw, vport);
941 esw_vport_ingress_config(esw, vport);
942 esw_vport_egress_config(esw, vport);
945 mlx5_modify_vport_admin_state(esw->dev,
946 MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
948 MLX5_ESW_VPORT_ADMIN_STATE_AUTO);
950 /* Sync with current vport context */
951 vport->enabled_events = enable_events;
952 esw_vport_change_handler(&vport->vport_change_handler);
954 spin_lock_irqsave(&vport->lock, flags);
955 vport->enabled = true;
956 spin_unlock_irqrestore(&vport->lock, flags);
958 arm_vport_context_events_cmd(esw->dev, vport_num, enable_events);
960 esw->enabled_vports++;
961 esw_debug(esw->dev, "Enabled VPORT(%d)\n", vport_num);
962 mutex_unlock(&vport->state_lock);
965 static void esw_cleanup_vport(struct mlx5_eswitch *esw, u16 vport_num)
967 struct mlx5_vport *vport = &esw->vports[vport_num];
968 struct l2addr_node *node;
969 struct vport_addr *addr;
970 struct hlist_node *tmp;
973 for_each_l2hash_node(node, tmp, vport->uc_list, hi) {
974 addr = container_of(node, struct vport_addr, node);
975 addr->action = MLX5_ACTION_DEL;
977 esw_apply_vport_addr_list(esw, vport_num, MLX5_NIC_VPORT_LIST_TYPE_UC);
979 for_each_l2hash_node(node, tmp, vport->mc_list, hi) {
980 addr = container_of(node, struct vport_addr, node);
981 addr->action = MLX5_ACTION_DEL;
983 esw_apply_vport_addr_list(esw, vport_num, MLX5_NIC_VPORT_LIST_TYPE_MC);
986 static void esw_disable_vport(struct mlx5_eswitch *esw, int vport_num)
988 struct mlx5_vport *vport = &esw->vports[vport_num];
991 mutex_lock(&vport->state_lock);
992 if (!vport->enabled) {
993 mutex_unlock(&vport->state_lock);
997 esw_debug(esw->dev, "Disabling vport(%d)\n", vport_num);
998 /* Mark this vport as disabled to discard new events */
999 spin_lock_irqsave(&vport->lock, flags);
1000 vport->enabled = false;
1001 vport->enabled_events = 0;
1002 spin_unlock_irqrestore(&vport->lock, flags);
1004 mlx5_modify_vport_admin_state(esw->dev,
1005 MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
1007 MLX5_ESW_VPORT_ADMIN_STATE_DOWN);
1008 /* Wait for current already scheduled events to complete */
1009 flush_workqueue(esw->work_queue);
1010 /* Disable events from this vport */
1011 arm_vport_context_events_cmd(esw->dev, vport->vport, 0);
1012 /* We don't assume VFs will cleanup after themselves */
1013 esw_cleanup_vport(esw, vport_num);
1015 esw_vport_disable_egress_acl(esw, vport);
1016 esw_vport_disable_ingress_acl(esw, vport);
1018 esw->enabled_vports--;
1019 mutex_unlock(&vport->state_lock);
1022 /* Public E-Switch API */
1023 int mlx5_eswitch_enable_sriov(struct mlx5_eswitch *esw, int nvfs)
1028 if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
1029 MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
1032 if (!MLX5_CAP_GEN(esw->dev, eswitch_flow_table) ||
1033 !MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ft_support)) {
1034 esw_warn(esw->dev, "E-Switch FDB is not supported, aborting ...\n");
1038 if (!MLX5_CAP_ESW_INGRESS_ACL(esw->dev, ft_support))
1039 esw_warn(esw->dev, "E-Switch ingress ACL is not supported by FW\n");
1041 if (!MLX5_CAP_ESW_EGRESS_ACL(esw->dev, ft_support))
1042 esw_warn(esw->dev, "E-Switch engress ACL is not supported by FW\n");
1044 esw_info(esw->dev, "E-Switch enable SRIOV: nvfs(%d)\n", nvfs);
1046 esw_disable_vport(esw, 0);
1048 err = esw_create_fdb_table(esw);
1052 for (i = 0; i <= nvfs; i++)
1053 esw_enable_vport(esw, i, SRIOV_VPORT_EVENTS);
1055 esw_info(esw->dev, "SRIOV enabled: active vports(%d)\n",
1056 esw->enabled_vports);
1060 esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
1064 void mlx5_eswitch_disable_sriov(struct mlx5_eswitch *esw)
1068 if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
1069 MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
1072 esw_info(esw->dev, "disable SRIOV: active vports(%d)\n",
1073 esw->enabled_vports);
1075 for (i = 0; i < esw->total_vports; i++)
1076 esw_disable_vport(esw, i);
1078 esw_destroy_fdb_table(esw);
1080 /* VPORT 0 (PF) must be enabled back with non-sriov configuration */
1081 esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
1084 int mlx5_eswitch_init(struct mlx5_core_dev *dev, int total_vports)
1086 int l2_table_size = 1 << MLX5_CAP_GEN(dev, log_max_l2_table);
1087 struct mlx5_eswitch *esw;
1091 if (!MLX5_CAP_GEN(dev, vport_group_manager) ||
1092 MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
1096 "Total vports %d, l2 table size(%d), per vport: max uc(%d) max mc(%d)\n",
1097 total_vports, l2_table_size,
1098 MLX5_MAX_UC_PER_VPORT(dev),
1099 MLX5_MAX_MC_PER_VPORT(dev));
1101 esw = kzalloc(sizeof(*esw), GFP_KERNEL);
1107 esw->l2_table.bitmap = kcalloc(BITS_TO_LONGS(l2_table_size),
1108 sizeof(uintptr_t), GFP_KERNEL);
1109 if (!esw->l2_table.bitmap) {
1113 esw->l2_table.size = l2_table_size;
1115 esw->work_queue = create_singlethread_workqueue("mlx5_esw_wq");
1116 if (!esw->work_queue) {
1121 esw->vports = kcalloc(total_vports, sizeof(struct mlx5_vport),
1128 for (vport_num = 0; vport_num < total_vports; vport_num++) {
1129 struct mlx5_vport *vport = &esw->vports[vport_num];
1131 vport->vport = vport_num;
1133 INIT_WORK(&vport->vport_change_handler,
1134 esw_vport_change_handler);
1135 spin_lock_init(&vport->lock);
1136 mutex_init(&vport->state_lock);
1139 esw->total_vports = total_vports;
1140 esw->enabled_vports = 0;
1142 dev->priv.eswitch = esw;
1143 esw_enable_vport(esw, 0, UC_ADDR_CHANGE);
1144 /* VF Vports will be enabled when SRIOV is enabled */
1147 if (esw->work_queue)
1148 destroy_workqueue(esw->work_queue);
1149 kfree(esw->l2_table.bitmap);
1155 void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
1157 if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager) ||
1158 MLX5_CAP_GEN(esw->dev, port_type) != MLX5_CAP_PORT_TYPE_ETH)
1161 esw_info(esw->dev, "cleanup\n");
1162 esw_disable_vport(esw, 0);
1164 esw->dev->priv.eswitch = NULL;
1165 destroy_workqueue(esw->work_queue);
1166 kfree(esw->l2_table.bitmap);
1171 void mlx5_eswitch_vport_event(struct mlx5_eswitch *esw, struct mlx5_eqe *eqe)
1173 struct mlx5_eqe_vport_change *vc_eqe = &eqe->data.vport_change;
1174 u16 vport_num = be16_to_cpu(vc_eqe->vport_num);
1175 struct mlx5_vport *vport;
1178 printf("mlx5_core: WARN: ""MLX5 E-Switch: vport %d got an event while eswitch is not initialized\n", vport_num);
1182 vport = &esw->vports[vport_num];
1183 spin_lock(&vport->lock);
1185 queue_work(esw->work_queue, &vport->vport_change_handler);
1186 spin_unlock(&vport->lock);
1189 /* Vport Administration */
1190 #define ESW_ALLOWED(esw) \
1191 (esw && MLX5_CAP_GEN(esw->dev, vport_group_manager) && mlx5_core_is_pf(esw->dev))
1192 #define LEGAL_VPORT(esw, vport) (vport >= 0 && vport < esw->total_vports)
1194 static void node_guid_gen_from_mac(u64 *node_guid, u8 mac[ETH_ALEN])
1196 ((u8 *)node_guid)[7] = mac[0];
1197 ((u8 *)node_guid)[6] = mac[1];
1198 ((u8 *)node_guid)[5] = mac[2];
1199 ((u8 *)node_guid)[4] = 0xff;
1200 ((u8 *)node_guid)[3] = 0xfe;
1201 ((u8 *)node_guid)[2] = mac[3];
1202 ((u8 *)node_guid)[1] = mac[4];
1203 ((u8 *)node_guid)[0] = mac[5];
1206 int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
1207 int vport, u8 mac[ETH_ALEN])
1212 if (!ESW_ALLOWED(esw))
1214 if (!LEGAL_VPORT(esw, vport))
1217 err = mlx5_modify_nic_vport_mac_address(esw->dev, vport, mac);
1219 mlx5_core_warn(esw->dev,
1220 "Failed to mlx5_modify_nic_vport_mac vport(%d) err=(%d)\n",
1225 node_guid_gen_from_mac(&node_guid, mac);
1226 err = mlx5_modify_nic_vport_node_guid(esw->dev, vport, node_guid);
1228 mlx5_core_warn(esw->dev,
1229 "Failed to mlx5_modify_nic_vport_node_guid vport(%d) err=(%d)\n",
1237 int mlx5_eswitch_set_vport_state(struct mlx5_eswitch *esw,
1238 int vport, int link_state)
1240 if (!ESW_ALLOWED(esw))
1242 if (!LEGAL_VPORT(esw, vport))
1245 return mlx5_modify_vport_admin_state(esw->dev,
1246 MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
1250 int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw,
1251 int vport, struct mlx5_esw_vport_info *ivi)
1256 if (!ESW_ALLOWED(esw))
1258 if (!LEGAL_VPORT(esw, vport))
1261 memset(ivi, 0, sizeof(*ivi));
1262 ivi->vf = vport - 1;
1264 mlx5_query_nic_vport_mac_address(esw->dev, vport, ivi->mac);
1265 ivi->linkstate = mlx5_query_vport_admin_state(esw->dev,
1266 MLX5_QUERY_VPORT_STATE_IN_OP_MOD_ESW_VPORT,
1268 query_esw_vport_cvlan(esw->dev, vport, &vlan, &qos);
1276 int mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
1277 int vport, u16 vlan, u8 qos)
1279 struct mlx5_vport *evport;
1283 if (!ESW_ALLOWED(esw))
1285 if (!LEGAL_VPORT(esw, vport) || (vlan > 4095) || (qos > 7))
1291 evport = &esw->vports[vport];
1293 err = modify_esw_vport_cvlan(esw->dev, vport, vlan, qos, set);
1297 mutex_lock(&evport->state_lock);
1298 evport->vlan = vlan;
1300 if (evport->enabled) {
1301 esw_vport_ingress_config(esw, evport);
1302 esw_vport_egress_config(esw, evport);
1304 mutex_unlock(&evport->state_lock);