sys/dev/mlx5/mlx5_core/mlx5_health.c

   1 /*-
   2  * Copyright (c) 2013-2019, Mellanox Technologies, Ltd.  All rights reserved.
   3  *
   4  * Redistribution and use in source and binary forms, with or without
   5  * modification, are permitted provided that the following conditions
   6  * are met:
   7  * 1. Redistributions of source code must retain the above copyright
   8  *    notice, this list of conditions and the following disclaimer.
   9  * 2. Redistributions in binary form must reproduce the above copyright
  10  *    notice, this list of conditions and the following disclaimer in the
  11  *    documentation and/or other materials provided with the distribution.
  12  *
  13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
  14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
  17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  23  * SUCH DAMAGE.
  24  *
  25  * $FreeBSD$
  26  */
  27
  28 #include <linux/kernel.h>
  29 #include <linux/module.h>
  30 #include <linux/random.h>
  31 #include <linux/vmalloc.h>
  32 #include <linux/hardirq.h>
  33 #include <linux/delay.h>
  34 #include <dev/mlx5/driver.h>
  35 #include <dev/mlx5/mlx5_ifc.h>
  36 #include "mlx5_core.h"
  37
  38 #define MLX5_HEALTH_POLL_INTERVAL       (2 * HZ)
  39 #define MAX_MISSES                      3
  40
  41 enum {
  42         MLX5_DROP_NEW_HEALTH_WORK,
  43         MLX5_DROP_NEW_RECOVERY_WORK,
  44         MLX5_DROP_NEW_WATCHDOG_WORK,
  45 };
  46
  47 enum  {
  48         MLX5_SENSOR_NO_ERR              = 0,
  49         MLX5_SENSOR_PCI_COMM_ERR        = 1,
  50         MLX5_SENSOR_PCI_ERR             = 2,
  51         MLX5_SENSOR_NIC_DISABLED        = 3,
  52         MLX5_SENSOR_NIC_SW_RESET        = 4,
  53         MLX5_SENSOR_FW_SYND_RFR         = 5,
  54 };
  55
  56 static int mlx5_fw_reset_enable = 1;
  57 SYSCTL_INT(_hw_mlx5, OID_AUTO, fw_reset_enable, CTLFLAG_RWTUN,
  58     &mlx5_fw_reset_enable, 0,
  59     "Enable firmware reset");
  60
  61 static unsigned int sw_reset_to = 1200;
  62 SYSCTL_UINT(_hw_mlx5, OID_AUTO, sw_reset_timeout, CTLFLAG_RWTUN,
  63     &sw_reset_to, 0,
  64     "Minimum timeout in seconds between two firmware resets");
  65
  66
  67 static int lock_sem_sw_reset(struct mlx5_core_dev *dev)
  68 {
  69         int ret;
  70
  71         /* Lock GW access */
  72         ret = -mlx5_vsc_lock(dev);
  73         if (ret) {
  74                 mlx5_core_warn(dev, "Timed out locking gateway %d\n", ret);
  75                 return ret;
  76         }
  77
  78         ret = -mlx5_vsc_lock_addr_space(dev, MLX5_SEMAPHORE_SW_RESET);
  79         if (ret) {
  80                 if (ret == -EBUSY)
  81                         mlx5_core_dbg(dev, "SW reset FW semaphore already locked, another function will handle the reset\n");
  82                 else
  83                         mlx5_core_warn(dev, "SW reset semaphore lock return %d\n", ret);
  84         }
  85
  86         /* Unlock GW access */
  87         mlx5_vsc_unlock(dev);
  88
  89         return ret;
  90 }
  91
  92 static int unlock_sem_sw_reset(struct mlx5_core_dev *dev)
  93 {
  94         int ret;
  95
  96         /* Lock GW access */
  97         ret = -mlx5_vsc_lock(dev);
  98         if (ret) {
  99                 mlx5_core_warn(dev, "Timed out locking gateway %d\n", ret);
 100                 return ret;
 101         }
 102
 103         ret = -mlx5_vsc_unlock_addr_space(dev, MLX5_SEMAPHORE_SW_RESET);
 104
 105         /* Unlock GW access */
 106         mlx5_vsc_unlock(dev);
 107
 108         return ret;
 109 }
 110
 111 u8 mlx5_get_nic_state(struct mlx5_core_dev *dev)
 112 {
 113         return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
 114 }
 115
 116 void mlx5_set_nic_state(struct mlx5_core_dev *dev, u8 state)
 117 {
 118         u32 cur_cmdq_addr_l_sz;
 119
 120         cur_cmdq_addr_l_sz = ioread32be(&dev->iseg->cmdq_addr_l_sz);
 121         iowrite32be((cur_cmdq_addr_l_sz & 0xFFFFF000) |
 122                     state << MLX5_NIC_IFC_OFFSET,
 123                     &dev->iseg->cmdq_addr_l_sz);
 124 }
 125
 126 static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
 127 {
 128         struct mlx5_core_health *health = &dev->priv.health;
 129         struct mlx5_health_buffer __iomem *h = health->health;
 130         u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET;
 131         u8 synd = ioread8(&h->synd);
 132
 133         if (rfr && synd)
 134                 mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
 135         return rfr && synd;
 136 }
 137
 138 static void mlx5_trigger_cmd_completions(struct work_struct *work)
 139 {
 140         struct mlx5_core_dev *dev =
 141             container_of(work, struct mlx5_core_dev, priv.health.work_cmd_completion);
 142         unsigned long flags;
 143         u64 vector;
 144
 145         /* wait for pending handlers to complete */
 146         synchronize_irq(dev->priv.msix_arr[MLX5_EQ_VEC_CMD].vector);
 147         spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
 148         vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
 149         if (!vector)
 150                 goto no_trig;
 151
 152         vector |= MLX5_TRIGGERED_CMD_COMP;
 153         spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
 154
 155         mlx5_core_dbg(dev, "vector 0x%jx\n", (uintmax_t)vector);
 156         mlx5_cmd_comp_handler(dev, vector, MLX5_CMD_MODE_EVENTS);
 157         return;
 158
 159 no_trig:
 160         spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
 161 }
 162
 163 static bool sensor_pci_no_comm(struct mlx5_core_dev *dev)
 164 {
 165         struct mlx5_core_health *health = &dev->priv.health;
 166         struct mlx5_health_buffer __iomem *h = health->health;
 167         bool err = ioread32be(&h->fw_ver) == 0xffffffff;
 168
 169         return err;
 170 }
 171
 172 static bool sensor_nic_disabled(struct mlx5_core_dev *dev)
 173 {
 174         return mlx5_get_nic_state(dev) == MLX5_NIC_IFC_DISABLED;
 175 }
 176
 177 static bool sensor_nic_sw_reset(struct mlx5_core_dev *dev)
 178 {
 179         return mlx5_get_nic_state(dev) == MLX5_NIC_IFC_SW_RESET;
 180 }
 181
 182 static u32 check_fatal_sensors(struct mlx5_core_dev *dev)
 183 {
 184         if (sensor_pci_no_comm(dev))
 185                 return MLX5_SENSOR_PCI_COMM_ERR;
 186         if (pci_channel_offline(dev->pdev))
 187                 return MLX5_SENSOR_PCI_ERR;
 188         if (sensor_nic_disabled(dev))
 189                 return MLX5_SENSOR_NIC_DISABLED;
 190         if (sensor_nic_sw_reset(dev))
 191                 return MLX5_SENSOR_NIC_SW_RESET;
 192         if (sensor_fw_synd_rfr(dev))
 193                 return MLX5_SENSOR_FW_SYND_RFR;
 194
 195         return MLX5_SENSOR_NO_ERR;
 196 }
 197
 198 static void reset_fw_if_needed(struct mlx5_core_dev *dev)
 199 {
 200         bool supported;
 201         u32 cmdq_addr, fatal_error;
 202
 203         if (!mlx5_fw_reset_enable)
 204                 return;
 205         supported = (ioread32be(&dev->iseg->initializing) >>
 206             MLX5_FW_RESET_SUPPORTED_OFFSET) & 1;
 207         if (!supported)
 208                 return;
 209
 210         /* The reset only needs to be issued by one PF. The health buffer is
 211          * shared between all functions, and will be cleared during a reset.
 212          * Check again to avoid a redundant 2nd reset. If the fatal erros was
 213          * PCI related a reset won't help.
 214          */
 215         fatal_error = check_fatal_sensors(dev);
 216         if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
 217             fatal_error == MLX5_SENSOR_NIC_DISABLED ||
 218             fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
 219                 mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help.\n");
 220                 return;
 221         }
 222
 223         mlx5_core_warn(dev, "Issuing FW Reset\n");
 224         /* Write the NIC interface field to initiate the reset, the command
 225          * interface address also resides here, don't overwrite it.
 226          */
 227         cmdq_addr = ioread32be(&dev->iseg->cmdq_addr_l_sz);
 228         iowrite32be((cmdq_addr & 0xFFFFF000) |
 229                     MLX5_NIC_IFC_SW_RESET << MLX5_NIC_IFC_OFFSET,
 230                     &dev->iseg->cmdq_addr_l_sz);
 231 }
 232
 233 static bool
 234 mlx5_health_allow_reset(struct mlx5_core_dev *dev)
 235 {
 236         struct mlx5_core_health *health = &dev->priv.health;
 237         unsigned int delta;
 238         bool ret;
 239
 240         if (health->last_reset_req != 0) {
 241                 delta = ticks - health->last_reset_req;
 242                 delta /= hz;
 243                 ret = delta >= sw_reset_to;
 244         } else {
 245                 ret = true;
 246         }
 247
 248         /*
 249          * In principle, ticks may be 0. Setting it to off by one (-1)
 250          * to prevent certain reset in next request.
 251          */
 252         health->last_reset_req = ticks ? : -1;
 253         if (!ret)
 254                 mlx5_core_warn(dev, "Firmware reset elided due to "
 255                     "auto-reset frequency threshold.\n");
 256         return (ret);
 257 }
 258
 259 #define MLX5_CRDUMP_WAIT_MS     60000
 260 #define MLX5_FW_RESET_WAIT_MS   1000
 261 #define MLX5_NIC_STATE_POLL_MS  5
 262 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
 263 {
 264         int end, delay_ms = MLX5_CRDUMP_WAIT_MS;
 265         u32 fatal_error;
 266         int lock = -EBUSY;
 267
 268         fatal_error = check_fatal_sensors(dev);
 269
 270         if (fatal_error || force) {
 271                 if (xchg(&dev->state, MLX5_DEVICE_STATE_INTERNAL_ERROR) ==
 272                     MLX5_DEVICE_STATE_INTERNAL_ERROR)
 273                         return;
 274                 if (!force)
 275                         mlx5_core_err(dev, "internal state error detected\n");
 276
 277                 /*
 278                  * Queue the command completion handler on the command
 279                  * work queue to avoid racing with the real command
 280                  * completion handler and then wait for it to
 281                  * complete:
 282                  */
 283                 queue_work(dev->priv.health.wq_cmd, &dev->priv.health.work_cmd_completion);
 284                 flush_workqueue(dev->priv.health.wq_cmd);
 285         }
 286
 287         mutex_lock(&dev->intf_state_mutex);
 288
 289         if (force)
 290                 goto err_state_done;
 291
 292         if (fatal_error == MLX5_SENSOR_FW_SYND_RFR &&
 293             mlx5_health_allow_reset(dev)) {
 294                 /* Get cr-dump and reset FW semaphore */
 295                 if (mlx5_core_is_pf(dev))
 296                         lock = lock_sem_sw_reset(dev);
 297
 298                 /* Execute cr-dump and SW reset */
 299                 if (lock != -EBUSY) {
 300                         mlx5_fwdump(dev);
 301                         reset_fw_if_needed(dev);
 302                         delay_ms = MLX5_FW_RESET_WAIT_MS;
 303                 }
 304         }
 305
 306         /* Recover from SW reset */
 307         end = jiffies + msecs_to_jiffies(delay_ms);
 308         do {
 309                 if (sensor_nic_disabled(dev))
 310                         break;
 311
 312                 msleep(MLX5_NIC_STATE_POLL_MS);
 313         } while (!time_after(jiffies, end));
 314
 315         if (!sensor_nic_disabled(dev)) {
 316                 dev_err(&dev->pdev->dev, "NIC IFC still %d after %ums.\n",
 317                         mlx5_get_nic_state(dev), delay_ms);
 318         }
 319
 320         /* Release FW semaphore if you are the lock owner */
 321         if (!lock)
 322                 unlock_sem_sw_reset(dev);
 323
 324         mlx5_core_err(dev, "system error event triggered\n");
 325
 326 err_state_done:
 327         mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 1);
 328         mutex_unlock(&dev->intf_state_mutex);
 329 }
 330
 331 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
 332 {
 333         u8 nic_mode = mlx5_get_nic_state(dev);
 334
 335         if (nic_mode == MLX5_NIC_IFC_SW_RESET) {
 336                 /* The IFC mode field is 3 bits, so it will read 0x7 in two cases:
 337                  * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
 338                  *    and this is a VF), this is not recoverable by SW reset.
 339                  *    Logging of this is handled elsewhere.
 340                  * 2. FW reset has been issued by another function, driver can
 341                  *    be reloaded to recover after the mode switches to
 342                  *    MLX5_NIC_IFC_DISABLED.
 343                  */
 344                 if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
 345                         mlx5_core_warn(dev, "NIC SW reset is already progress\n");
 346                 else
 347                         mlx5_core_warn(dev, "Communication with FW over the PCI link is down\n");
 348         } else {
 349                 mlx5_core_warn(dev, "NIC mode %d\n", nic_mode);
 350         }
 351
 352         mlx5_disable_device(dev);
 353 }
 354
 355 #define MLX5_FW_RESET_WAIT_MS   1000
 356 #define MLX5_NIC_STATE_POLL_MS  5
 357 static void health_recover(struct work_struct *work)
 358 {
 359         unsigned long end = jiffies + msecs_to_jiffies(MLX5_FW_RESET_WAIT_MS);
 360         struct mlx5_core_health *health;
 361         struct delayed_work *dwork;
 362         struct mlx5_core_dev *dev;
 363         struct mlx5_priv *priv;
 364         bool recover = true;
 365         u8 nic_mode;
 366
 367         dwork = container_of(work, struct delayed_work, work);
 368         health = container_of(dwork, struct mlx5_core_health, recover_work);
 369         priv = container_of(health, struct mlx5_priv, health);
 370         dev = container_of(priv, struct mlx5_core_dev, priv);
 371
 372         mtx_lock(&Giant);       /* XXX newbus needs this */
 373
 374         if (sensor_pci_no_comm(dev)) {
 375                 dev_err(&dev->pdev->dev, "health recovery flow aborted, PCI reads still not working\n");
 376                 recover = false;
 377         }
 378
 379         nic_mode = mlx5_get_nic_state(dev);
 380         while (nic_mode != MLX5_NIC_IFC_DISABLED &&
 381                !time_after(jiffies, end)) {
 382                 msleep(MLX5_NIC_STATE_POLL_MS);
 383                 nic_mode = mlx5_get_nic_state(dev);
 384         }
 385
 386         if (nic_mode != MLX5_NIC_IFC_DISABLED) {
 387                 dev_err(&dev->pdev->dev, "health recovery flow aborted, unexpected NIC IFC mode %d.\n",
 388                         nic_mode);
 389                 recover = false;
 390         }
 391
 392         if (recover) {
 393                 dev_err(&dev->pdev->dev, "starting health recovery flow\n");
 394                 mlx5_recover_device(dev);
 395         }
 396
 397         mtx_unlock(&Giant);
 398 }
 399
 400 /* How much time to wait until health resetting the driver (in msecs) */
 401 #define MLX5_RECOVERY_DELAY_MSECS 60000
 402 #define MLX5_RECOVERY_NO_DELAY 0
 403 static unsigned long get_recovery_delay(struct mlx5_core_dev *dev)
 404 {
 405         return dev->priv.health.fatal_error == MLX5_SENSOR_PCI_ERR ||
 406                 dev->priv.health.fatal_error == MLX5_SENSOR_PCI_COMM_ERR        ?
 407                 MLX5_RECOVERY_DELAY_MSECS : MLX5_RECOVERY_NO_DELAY;
 408 }
 409
 410 static void health_care(struct work_struct *work)
 411 {
 412         struct mlx5_core_health *health;
 413         unsigned long recover_delay;
 414         struct mlx5_core_dev *dev;
 415         struct mlx5_priv *priv;
 416         unsigned long flags;
 417
 418         health = container_of(work, struct mlx5_core_health, work);
 419         priv = container_of(health, struct mlx5_priv, health);
 420         dev = container_of(priv, struct mlx5_core_dev, priv);
 421
 422         mlx5_core_warn(dev, "handling bad device here\n");
 423         mlx5_handle_bad_state(dev);
 424         recover_delay = msecs_to_jiffies(get_recovery_delay(dev));
 425
 426         spin_lock_irqsave(&health->wq_lock, flags);
 427         if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags)) {
 428                 mlx5_core_warn(dev, "Scheduling recovery work with %lums delay\n",
 429                                recover_delay);
 430                 schedule_delayed_work(&health->recover_work, recover_delay);
 431         } else {
 432                 dev_err(&dev->pdev->dev,
 433                         "new health works are not permitted at this stage\n");
 434         }
 435         spin_unlock_irqrestore(&health->wq_lock, flags);
 436 }
 437
 438 static int get_next_poll_jiffies(void)
 439 {
 440         unsigned long next;
 441
 442         get_random_bytes(&next, sizeof(next));
 443         next %= HZ;
 444         next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
 445
 446         return next;
 447 }
 448
 449 void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
 450 {
 451         struct mlx5_core_health *health = &dev->priv.health;
 452         unsigned long flags;
 453
 454         spin_lock_irqsave(&health->wq_lock, flags);
 455         if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
 456                 queue_work(health->wq, &health->work);
 457         else
 458                 dev_err(&dev->pdev->dev,
 459                         "new health works are not permitted at this stage\n");
 460         spin_unlock_irqrestore(&health->wq_lock, flags);
 461 }
 462
 463 static const char *hsynd_str(u8 synd)
 464 {
 465         switch (synd) {
 466         case MLX5_HEALTH_SYNDR_FW_ERR:
 467                 return "firmware internal error";
 468         case MLX5_HEALTH_SYNDR_IRISC_ERR:
 469                 return "irisc not responding";
 470         case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR:
 471                 return "unrecoverable hardware error";
 472         case MLX5_HEALTH_SYNDR_CRC_ERR:
 473                 return "firmware CRC error";
 474         case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR:
 475                 return "ICM fetch PCI error";
 476         case MLX5_HEALTH_SYNDR_HW_FTL_ERR:
 477                 return "HW fatal error\n";
 478         case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR:
 479                 return "async EQ buffer overrun";
 480         case MLX5_HEALTH_SYNDR_EQ_ERR:
 481                 return "EQ error";
 482         case MLX5_HEALTH_SYNDR_EQ_INV:
 483                 return "Invalid EQ referenced";
 484         case MLX5_HEALTH_SYNDR_FFSER_ERR:
 485                 return "FFSER error";
 486         case MLX5_HEALTH_SYNDR_HIGH_TEMP:
 487                 return "High temprature";
 488         default:
 489                 return "unrecognized error";
 490         }
 491 }
 492
 493 static void print_health_info(struct mlx5_core_dev *dev)
 494 {
 495         struct mlx5_core_health *health = &dev->priv.health;
 496         struct mlx5_health_buffer __iomem *h = health->health;
 497         char fw_str[18];
 498         u32 fw;
 499         int i;
 500
 501         /* If the syndrom is 0, the device is OK and no need to print buffer */
 502         if (!ioread8(&h->synd))
 503                 return;
 504
 505         for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
 506                 printf("mlx5_core: INFO: ""assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i));
 507
 508         printf("mlx5_core: INFO: ""assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr));
 509         printf("mlx5_core: INFO: ""assert_callra 0x%08x\n", ioread32be(&h->assert_callra));
 510         snprintf(fw_str, sizeof(fw_str), "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
 511         printf("mlx5_core: INFO: ""fw_ver %s\n", fw_str);
 512         printf("mlx5_core: INFO: ""hw_id 0x%08x\n", ioread32be(&h->hw_id));
 513         printf("mlx5_core: INFO: ""irisc_index %d\n", ioread8(&h->irisc_index));
 514         printf("mlx5_core: INFO: ""synd 0x%x: %s\n", ioread8(&h->synd), hsynd_str(ioread8(&h->synd)));
 515         printf("mlx5_core: INFO: ""ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
 516         fw = ioread32be(&h->fw_ver);
 517         printf("mlx5_core: INFO: ""raw fw_ver 0x%08x\n", fw);
 518 }
 519
 520 static void health_watchdog(struct work_struct *work)
 521 {
 522         struct mlx5_core_dev *dev;
 523         u16 power;
 524         u8 status;
 525         int err;
 526
 527         dev = container_of(work, struct mlx5_core_dev, priv.health.work_watchdog);
 528
 529         if (!MLX5_CAP_GEN(dev, mcam_reg) ||
 530             !MLX5_CAP_MCAM_FEATURE(dev, pcie_status_and_power))
 531                 return;
 532
 533         err = mlx5_pci_read_power_status(dev, &power, &status);
 534         if (err < 0) {
 535                 mlx5_core_warn(dev, "Failed reading power status: %d\n", err);
 536                 return;
 537         }
 538
 539         dev->pwr_value = power;
 540
 541         if (dev->pwr_status != status) {
 542                 device_t bsddev = dev->pdev->dev.bsddev;
 543
 544                 switch (status) {
 545                 case 0:
 546                         dev->pwr_status = status;
 547                         device_printf(bsddev, "PCI power is not published by the PCIe slot.\n");
 548                         break;
 549                 case 1:
 550                         dev->pwr_status = status;
 551                         device_printf(bsddev, "PCIe slot advertised sufficient power (%uW).\n", power);
 552                         break;
 553                 case 2:
 554                         dev->pwr_status = status;
 555                         device_printf(bsddev, "WARN: Detected insufficient power on the PCIe slot (%uW).\n", power);
 556                         break;
 557                 default:
 558                         dev->pwr_status = 0;
 559                         device_printf(bsddev, "WARN: Unknown power state detected(%d).\n", status);
 560                         break;
 561                 }
 562         }
 563 }
 564
 565 void
 566 mlx5_trigger_health_watchdog(struct mlx5_core_dev *dev)
 567 {
 568         struct mlx5_core_health *health = &dev->priv.health;
 569         unsigned long flags;
 570
 571         spin_lock_irqsave(&health->wq_lock, flags);
 572         if (!test_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags))
 573                 queue_work(health->wq_watchdog, &health->work_watchdog);
 574         else
 575                 dev_err(&dev->pdev->dev,
 576                         "scheduling watchdog is not permitted at this stage\n");
 577         spin_unlock_irqrestore(&health->wq_lock, flags);
 578 }
 579
 580 static void poll_health(unsigned long data)
 581 {
 582         struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data;
 583         struct mlx5_core_health *health = &dev->priv.health;
 584         u32 fatal_error;
 585         u32 count;
 586
 587         if (dev->state != MLX5_DEVICE_STATE_UP)
 588                 return;
 589
 590         count = ioread32be(health->health_counter);
 591         if (count == health->prev)
 592                 ++health->miss_counter;
 593         else
 594                 health->miss_counter = 0;
 595
 596         health->prev = count;
 597         if (health->miss_counter == MAX_MISSES) {
 598                 mlx5_core_err(dev, "device's health compromised - reached miss count\n");
 599                 print_health_info(dev);
 600         }
 601
 602         fatal_error = check_fatal_sensors(dev);
 603
 604         if (fatal_error && !health->fatal_error) {
 605                 mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
 606                 dev->priv.health.fatal_error = fatal_error;
 607                 print_health_info(dev);
 608                 mlx5_trigger_health_work(dev);
 609         }
 610
 611         mod_timer(&health->timer, get_next_poll_jiffies());
 612 }
 613
 614 void mlx5_start_health_poll(struct mlx5_core_dev *dev)
 615 {
 616         struct mlx5_core_health *health = &dev->priv.health;
 617
 618         init_timer(&health->timer);
 619         health->fatal_error = MLX5_SENSOR_NO_ERR;
 620         clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
 621         clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 622         clear_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags);
 623         health->health = &dev->iseg->health;
 624         health->health_counter = &dev->iseg->health_counter;
 625
 626         setup_timer(&health->timer, poll_health, (unsigned long)dev);
 627         mod_timer(&health->timer,
 628                   round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL));
 629
 630         /* do initial PCI power state readout */
 631         mlx5_trigger_health_watchdog(dev);
 632 }
 633
 634 void mlx5_stop_health_poll(struct mlx5_core_dev *dev, bool disable_health)
 635 {
 636         struct mlx5_core_health *health = &dev->priv.health;
 637         unsigned long flags;
 638
 639         if (disable_health) {
 640                 spin_lock_irqsave(&health->wq_lock, flags);
 641                 set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
 642                 set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 643                 set_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags);
 644                 spin_unlock_irqrestore(&health->wq_lock, flags);
 645         }
 646
 647         del_timer_sync(&health->timer);
 648 }
 649
 650 void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
 651 {
 652         struct mlx5_core_health *health = &dev->priv.health;
 653         unsigned long flags;
 654
 655         spin_lock_irqsave(&health->wq_lock, flags);
 656         set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
 657         set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 658         set_bit(MLX5_DROP_NEW_WATCHDOG_WORK, &health->flags);
 659         spin_unlock_irqrestore(&health->wq_lock, flags);
 660         cancel_delayed_work_sync(&health->recover_work);
 661         cancel_work_sync(&health->work);
 662         cancel_work_sync(&health->work_watchdog);
 663 }
 664
 665 void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
 666 {
 667         struct mlx5_core_health *health = &dev->priv.health;
 668         unsigned long flags;
 669
 670         spin_lock_irqsave(&health->wq_lock, flags);
 671         set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
 672         spin_unlock_irqrestore(&health->wq_lock, flags);
 673         cancel_delayed_work_sync(&dev->priv.health.recover_work);
 674 }
 675
 676 void mlx5_health_cleanup(struct mlx5_core_dev *dev)
 677 {
 678         struct mlx5_core_health *health = &dev->priv.health;
 679
 680         destroy_workqueue(health->wq);
 681         destroy_workqueue(health->wq_watchdog);
 682         destroy_workqueue(health->wq_cmd);
 683 }
 684
 685 int mlx5_health_init(struct mlx5_core_dev *dev)
 686 {
 687         struct mlx5_core_health *health;
 688         char name[64];
 689
 690         health = &dev->priv.health;
 691
 692         snprintf(name, sizeof(name), "%s-rec", dev_name(&dev->pdev->dev));
 693         health->wq = create_singlethread_workqueue(name);
 694         if (!health->wq)
 695                 goto err_recovery;
 696
 697         snprintf(name, sizeof(name), "%s-wdg", dev_name(&dev->pdev->dev));
 698         health->wq_watchdog = create_singlethread_workqueue(name);
 699         if (!health->wq_watchdog)
 700                 goto err_watchdog;
 701
 702         snprintf(name, sizeof(name), "%s-cmd", dev_name(&dev->pdev->dev));
 703         health->wq_cmd = create_singlethread_workqueue(name);
 704         if (!health->wq_cmd)
 705                 goto err_cmd;
 706
 707         spin_lock_init(&health->wq_lock);
 708         INIT_WORK(&health->work, health_care);
 709         INIT_WORK(&health->work_watchdog, health_watchdog);
 710         INIT_WORK(&health->work_cmd_completion, mlx5_trigger_cmd_completions);
 711         INIT_DELAYED_WORK(&health->recover_work, health_recover);
 712
 713         return 0;
 714
 715 err_cmd:
 716         destroy_workqueue(health->wq_watchdog);
 717 err_watchdog:
 718         destroy_workqueue(health->wq);
 719 err_recovery:
 720         return -ENOMEM;
 721 }