From d1bed300306f297725cd17bd8ce8a4f6797dc492 Mon Sep 17 00:00:00 2001 From: Hans Petter Selasky Date: Thu, 16 May 2019 16:31:39 +0000 Subject: [PATCH] MFC r347306: Implement reading PCI power status in mlx5core. Implement a watchdog as part of the healtcare subsystem which reads the PCI power status during startup and upon the PCI power status change event and store it into the core device structure. This value is then exported to user-space via a read-only SYSCTL. A dmesg print has been added to inform the admin about the PCI power status. Sponsored by: Mellanox Technologies --- sys/dev/mlx5/device.h | 1 + sys/dev/mlx5/driver.h | 6 ++ sys/dev/mlx5/mlx5_core/mlx5_eq.c | 12 +++- sys/dev/mlx5/mlx5_core/mlx5_health.c | 73 ++++++++++++++++++++-- sys/dev/mlx5/mlx5_core/mlx5_main.c | 23 +++++++ sys/dev/mlx5/mlx5_ifc.h | 90 +++++++++++++++++++++++++++- 6 files changed, 197 insertions(+), 8 deletions(-) diff --git a/sys/dev/mlx5/device.h b/sys/dev/mlx5/device.h index 21149206748..9315ec788b9 100644 --- a/sys/dev/mlx5/device.h +++ b/sys/dev/mlx5/device.h @@ -1215,6 +1215,7 @@ static inline int mlx5_get_cqe_format(const struct mlx5_cqe64 *cqe) enum { MLX5_GEN_EVENT_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1, + MLX5_GEN_EVENT_SUBTYPE_PCI_POWER_CHANGE_EVENT = 0x5, }; /* 8 regular priorities + 1 for multicast */ diff --git a/sys/dev/mlx5/driver.h b/sys/dev/mlx5/driver.h index b6ebdcb373e..95fee59b477 100644 --- a/sys/dev/mlx5/driver.h +++ b/sys/dev/mlx5/driver.h @@ -506,6 +506,7 @@ struct mlx5_core_health { int miss_counter; u32 fatal_error; struct workqueue_struct *wq_watchdog; + struct work_struct work_watchdog; /* wq spinlock to synchronize draining */ spinlock_t wq_lock; struct workqueue_struct *wq; @@ -705,6 +706,8 @@ struct mlx5_core_dev { struct sysctl_ctx_list sysctl_ctx; int msix_eqvec; + int pwr_status; + int pwr_value; struct { struct mlx5_rsvd_gids reserved_gids; @@ -955,6 +958,7 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health); void mlx5_drain_health_wq(struct mlx5_core_dev *dev); void mlx5_drain_health_recovery(struct mlx5_core_dev *dev); void mlx5_trigger_health_work(struct mlx5_core_dev *dev); +void mlx5_trigger_health_watchdog(struct mlx5_core_dev *dev); #define mlx5_buf_alloc_node(dev, size, direct, buf, node) \ mlx5_buf_alloc(dev, size, direct, buf) @@ -1089,6 +1093,8 @@ int mlx5_vsc_write(struct mlx5_core_dev *mdev, u32 addr, const u32 *data); int mlx5_vsc_read(struct mlx5_core_dev *mdev, u32 addr, u32 *data); int mlx5_vsc_lock_addr_space(struct mlx5_core_dev *mdev, u32 addr); int mlx5_vsc_unlock_addr_space(struct mlx5_core_dev *mdev, u32 addr); +int mlx5_pci_read_power_status(struct mlx5_core_dev *mdev, + u16 *p_power, u8 *p_status); static inline u32 mlx5_mkey_to_idx(u32 mkey) { diff --git a/sys/dev/mlx5/mlx5_core/mlx5_eq.c b/sys/dev/mlx5/mlx5_core/mlx5_eq.c index f3ab960e54f..43d19f8de6f 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_eq.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_eq.c @@ -561,6 +561,11 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) if (MLX5_CAP_GEN(dev, temp_warn_event)) async_event_mask |= (1ull << MLX5_EVENT_TYPE_TEMP_WARN_EVENT); + if (MLX5_CAP_GEN(dev, general_notification_event)) { + async_event_mask |= (1ull << + MLX5_EVENT_TYPE_CODING_GENERAL_NOTIFICATION_EVENT); + } + err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, "mlx5_cmd_eq", &dev->priv.uuari.uars[0]); @@ -716,8 +721,8 @@ static void mlx5_port_general_notification_event(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe) { u8 port = (eqe->data.port.port >> 4) & 0xf; - u32 rqn = 0; - struct mlx5_eqe_general_notification_event *general_event = NULL; + u32 rqn; + struct mlx5_eqe_general_notification_event *general_event; switch (eqe->sub_type) { case MLX5_GEN_EVENT_SUBTYPE_DELAY_DROP_TIMEOUT: @@ -725,6 +730,9 @@ static void mlx5_port_general_notification_event(struct mlx5_core_dev *dev, rqn = be32_to_cpu(general_event->rq_user_index_delay_drop) & 0xffffff; break; + case MLX5_GEN_EVENT_SUBTYPE_PCI_POWER_CHANGE_EVENT: + mlx5_trigger_health_watchdog(dev); + break; default: mlx5_core_warn(dev, "general event with unrecognized subtype: port %d, sub_type %d\n", diff --git a/sys/dev/mlx5/mlx5_core/mlx5_health.c b/sys/dev/mlx5/mlx5_core/mlx5_health.c index 5ad5ddaa93f..17cd5a47e03 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_health.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_health.c @@ -41,6 +41,7 @@ enum { MLX5_DROP_NEW_HEALTH_WORK, MLX5_DROP_NEW_RECOVERY_WORK, + MLX5_DROP_NEW_WATCHDOG_WORK, }; enum { @@ -506,6 +507,66 @@ static void print_health_info(struct mlx5_core_dev *dev) printf("mlx5_core: INFO: ""raw fw_ver 0x%08x\n", fw); } +static void health_watchdog(struct work_struct *work) +{ + struct mlx5_core_dev *dev; + u16 power; + u8 status; + int err; + + dev = container_of(work, struct mlx5_core_dev, priv.health.work_watchdog); + + if (!MLX5_CAP_GEN(dev, mcam_reg) || + !MLX5_CAP_MCAM_FEATURE(dev, pcie_status_and_power)) + return; + + err = mlx5_pci_read_power_status(dev, &power, &status); + if (err < 0) { + mlx5_core_warn(dev, "Failed reading power status: %d\n", err); + return; + } + + dev->pwr_value = power; + + if (dev->pwr_status != status) { + device_t bsddev = dev->pdev->dev.bsddev; + + switch (status) { + case 0: + dev->pwr_status = status; + device_printf(bsddev, "PCI power is not published by the PCIe slot.\n"); + break; + case 1: + dev->pwr_status = status; + device_printf(bsddev, "PCIe slot advertised sufficient power (%uW).\n", power); + break; + case 2: + dev->pwr_status = status; + device_printf(bsddev, "WARN: Detected insufficient power on the PCIe slot (%uW).\n", power); + break; + default: + dev->pwr_status = 0; + device_printf(bsddev, "WARN: Unknown power state detected(%d).\n", status); + break; + } + } +} + +void +mlx5_trigger_health_watchdog(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + unsigned long flags; + + spin_lock_irqsave(&health->wq_lock, flags); + if (!test_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags)) + queue_work(health->wq_watchdog, &health->work_watchdog); + else + dev_err(&dev->pdev->dev, + "scheduling watchdog is not permitted at this stage\n"); + spin_unlock_irqrestore(&health->wq_lock, flags); +} + static void poll_health(unsigned long data) { struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data; @@ -516,9 +577,6 @@ static void poll_health(unsigned long data) if (dev->state != MLX5_DEVICE_STATE_UP) return; - if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) - goto out; - count = ioread32be(health->health_counter); if (count == health->prev) ++health->miss_counter; @@ -540,7 +598,6 @@ static void poll_health(unsigned long data) mlx5_trigger_health_work(dev); } -out: mod_timer(&health->timer, get_next_poll_jiffies()); } @@ -552,12 +609,16 @@ void mlx5_start_health_poll(struct mlx5_core_dev *dev) health->fatal_error = MLX5_SENSOR_NO_ERR; clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); + clear_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags); health->health = &dev->iseg->health; health->health_counter = &dev->iseg->health_counter; setup_timer(&health->timer, poll_health, (unsigned long)dev); mod_timer(&health->timer, round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL)); + + /* do initial PCI power state readout */ + mlx5_trigger_health_watchdog(dev); } void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) @@ -569,6 +630,7 @@ void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health) spin_lock_irqsave(&health->wq_lock, flags); set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); + set_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags); spin_unlock_irqrestore(&health->wq_lock, flags); } @@ -583,9 +645,11 @@ void mlx5_drain_health_wq(struct mlx5_core_dev *dev) spin_lock_irqsave(&health->wq_lock, flags); set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags); set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags); + set_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags); spin_unlock_irqrestore(&health->wq_lock, flags); cancel_delayed_work_sync(&health->recover_work); cancel_work_sync(&health->work); + cancel_work_sync(&health->work_watchdog); } void mlx5_drain_health_recovery(struct mlx5_core_dev *dev) @@ -628,6 +692,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev) spin_lock_init(&health->wq_lock); INIT_WORK(&health->work, health_care); + INIT_WORK(&health->work_watchdog, health_watchdog); INIT_DELAYED_WORK(&health->recover_work, health_recover); return 0; diff --git a/sys/dev/mlx5/mlx5_core/mlx5_main.c b/sys/dev/mlx5/mlx5_core/mlx5_main.c index 51b846cea1b..9bd04fed67f 100644 --- a/sys/dev/mlx5/mlx5_core/mlx5_main.c +++ b/sys/dev/mlx5/mlx5_core/mlx5_main.c @@ -197,6 +197,21 @@ static int set_dma_caps(struct pci_dev *pdev) return err; } +int mlx5_pci_read_power_status(struct mlx5_core_dev *dev, + u16 *p_power, u8 *p_status) +{ + u32 in[MLX5_ST_SZ_DW(mpein_reg)] = {}; + u32 out[MLX5_ST_SZ_DW(mpein_reg)] = {}; + int err; + + err = mlx5_core_access_reg(dev, in, sizeof(in), out, sizeof(out), + MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MPEIN, 0, 0); + + *p_status = MLX5_GET(mpein_reg, out, pwr_status); + *p_power = MLX5_GET(mpein_reg, out, pci_power); + return err; +} + static int mlx5_pci_enable_device(struct mlx5_core_dev *dev) { struct pci_dev *pdev = dev->pdev; @@ -1273,6 +1288,14 @@ static int init_one(struct pci_dev *pdev, SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)), OID_AUTO, "msix_eqvec", CTLFLAG_RDTUN, &dev->msix_eqvec, 0, "Maximum number of MSIX event queue vectors, if set"); + SYSCTL_ADD_INT(&dev->sysctl_ctx, + SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)), + OID_AUTO, "power_status", CTLFLAG_RD, &dev->pwr_status, 0, + "0:Invalid 1:Sufficient 2:Insufficient"); + SYSCTL_ADD_INT(&dev->sysctl_ctx, + SYSCTL_CHILDREN(device_get_sysctl_tree(bsddev)), + OID_AUTO, "power_value", CTLFLAG_RD, &dev->pwr_value, 0, + "Current power value in Watts"); INIT_LIST_HEAD(&priv->ctx_list); spin_lock_init(&priv->ctx_lock); diff --git a/sys/dev/mlx5/mlx5_ifc.h b/sys/dev/mlx5/mlx5_ifc.h index bcb0b626946..0eca4ff1c0b 100644 --- a/sys/dev/mlx5/mlx5_ifc.h +++ b/sys/dev/mlx5/mlx5_ifc.h @@ -8640,8 +8640,9 @@ struct mlx5_ifc_pcam_reg_bits { }; struct mlx5_ifc_mcam_enhanced_features_bits { - u8 reserved_at_0[0x7f]; - + u8 reserved_at_0[0x6e]; + u8 pcie_status_and_power[0x1]; + u8 reserved_at_111[0x10]; u8 pcie_performance_group[0x1]; }; @@ -10000,6 +10001,91 @@ struct mlx5_ifc_mpcnt_reg_bits { union mlx5_ifc_mpcnt_cntrs_grp_data_layout_bits counter_set; }; +enum { + MLX5_ACCESS_REG_SUMMARY_CTRL_ID_MPEIN = 0x9050, + MLX5_MPEIN_PWR_STATUS_INVALID = 0, + MLX5_MPEIN_PWR_STATUS_SUFFICIENT = 1, + MLX5_MPEIN_PWR_STATUS_INSUFFICIENT = 2, +}; + +struct mlx5_ifc_mpein_reg_bits { + u8 reserved_at_0[0x2]; + u8 depth[0x6]; + u8 pcie_index[0x8]; + u8 node[0x8]; + u8 reserved_at_18[0x8]; + + u8 capability_mask[0x20]; + + u8 reserved_at_40[0x8]; + u8 link_width_enabled[0x8]; + u8 link_speed_enabled[0x10]; + + u8 lane0_physical_position[0x8]; + u8 link_width_active[0x8]; + u8 link_speed_active[0x10]; + + u8 num_of_pfs[0x10]; + u8 num_of_vfs[0x10]; + + u8 bdf0[0x10]; + u8 reserved_at_b0[0x10]; + + u8 max_read_request_size[0x4]; + u8 max_payload_size[0x4]; + u8 reserved_at_c8[0x5]; + u8 pwr_status[0x3]; + u8 port_type[0x4]; + u8 reserved_at_d4[0xb]; + u8 lane_reversal[0x1]; + + u8 reserved_at_e0[0x14]; + u8 pci_power[0xc]; + + u8 reserved_at_100[0x20]; + + u8 device_status[0x10]; + u8 port_state[0x8]; + u8 reserved_at_138[0x8]; + + u8 reserved_at_140[0x10]; + u8 receiver_detect_result[0x10]; + + u8 reserved_at_160[0x20]; +}; + +struct mlx5_ifc_mpein_reg_ext_bits { + u8 reserved_at_0[0x2]; + u8 depth[0x6]; + u8 pcie_index[0x8]; + u8 node[0x8]; + u8 reserved_at_18[0x8]; + + u8 reserved_at_20[0x20]; + + u8 reserved_at_40[0x8]; + u8 link_width_enabled[0x8]; + u8 link_speed_enabled[0x10]; + + u8 lane0_physical_position[0x8]; + u8 link_width_active[0x8]; + u8 link_speed_active[0x10]; + + u8 num_of_pfs[0x10]; + u8 num_of_vfs[0x10]; + + u8 bdf0[0x10]; + u8 reserved_at_b0[0x10]; + + u8 max_read_request_size[0x4]; + u8 max_payload_size[0x4]; + u8 reserved_at_c8[0x5]; + u8 pwr_status[0x3]; + u8 port_type[0x4]; + u8 reserved_at_d4[0xb]; + u8 lane_reversal[0x1]; +}; + struct mlx5_ifc_mcqi_cap_bits { u8 supported_info_bitmask[0x20]; -- 2.45.0