]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/dev/mlx5/mlx5_core/mlx5_health.c
MFV r333668:
[FreeBSD/FreeBSD.git] / sys / dev / mlx5 / mlx5_core / mlx5_health.c
1 /*-
2  * Copyright (c) 2013-2015, Mellanox Technologies, Ltd.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS `AS IS' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD$
26  */
27
28 #include <linux/kernel.h>
29 #include <linux/module.h>
30 #include <linux/random.h>
31 #include <linux/vmalloc.h>
32 #include <linux/hardirq.h>
33 #include <linux/delay.h>
34 #include <dev/mlx5/driver.h>
35 #include <dev/mlx5/mlx5_ifc.h>
36 #include "mlx5_core.h"
37
38 #define MLX5_HEALTH_POLL_INTERVAL       (2 * HZ)
39 #define MAX_MISSES                      3
40
41 enum {
42         MLX5_NIC_IFC_FULL               = 0,
43         MLX5_NIC_IFC_DISABLED           = 1,
44         MLX5_NIC_IFC_NO_DRAM_NIC        = 2,
45         MLX5_NIC_IFC_SW_RESET           = 7,
46 };
47
48 enum {
49         MLX5_DROP_NEW_HEALTH_WORK,
50         MLX5_DROP_NEW_RECOVERY_WORK,
51 };
52
53 enum  {
54         MLX5_SENSOR_NO_ERR              = 0,
55         MLX5_SENSOR_PCI_COMM_ERR        = 1,
56         MLX5_SENSOR_PCI_ERR             = 2,
57         MLX5_SENSOR_NIC_DISABLED        = 3,
58         MLX5_SENSOR_NIC_SW_RESET        = 4,
59         MLX5_SENSOR_FW_SYND_RFR         = 5,
60 };
61
62 static int lock_sem_sw_reset(struct mlx5_core_dev *dev, int state)
63 {
64         int ret, err;
65
66         /* Lock GW access */
67         ret = mlx5_pciconf_cap9_sem(dev, LOCK);
68         if (ret) {
69                 mlx5_core_warn(dev, "Timed out locking gateway %d, %d\n", state, ret);
70                 return ret;
71         }
72
73         ret = mlx5_pciconf_set_sem_addr_space(dev, MLX5_SEMAPHORE_SW_RESET, state);
74         if (ret && state == LOCK) {
75                 if (ret == -EBUSY)
76                         mlx5_core_dbg(dev, "SW reset FW semaphore already locked, another function will handle the reset\n");
77                 else
78                         mlx5_core_warn(dev, "SW reset semaphore lock return %d\n", ret);
79         }
80
81         /* Unlock GW access */
82         err = mlx5_pciconf_cap9_sem(dev, UNLOCK);
83         if (err)
84                 mlx5_core_warn(dev, "Timed out unlocking gateway: state %d, err %d\n", state, err);
85
86         return ret;
87 }
88
89 static u8 get_nic_mode(struct mlx5_core_dev *dev)
90 {
91         return (ioread32be(&dev->iseg->cmdq_addr_l_sz) >> 8) & 7;
92 }
93
94 static bool sensor_fw_synd_rfr(struct mlx5_core_dev *dev)
95 {
96         struct mlx5_core_health *health = &dev->priv.health;
97         struct mlx5_health_buffer __iomem *h = health->health;
98         u32 rfr = ioread32be(&h->rfr) >> MLX5_RFR_OFFSET;
99         u8 synd = ioread8(&h->synd);
100
101         if (rfr && synd)
102                 mlx5_core_dbg(dev, "FW requests reset, synd: %d\n", synd);
103         return rfr && synd;
104 }
105
106 static void mlx5_trigger_cmd_completions(struct mlx5_core_dev *dev)
107 {
108         unsigned long flags;
109         u64 vector;
110
111         /* wait for pending handlers to complete */
112         synchronize_irq(dev->priv.msix_arr[MLX5_EQ_VEC_CMD].vector);
113         spin_lock_irqsave(&dev->cmd.alloc_lock, flags);
114         vector = ~dev->cmd.bitmask & ((1ul << (1 << dev->cmd.log_sz)) - 1);
115         if (!vector)
116                 goto no_trig;
117
118         vector |= MLX5_TRIGGERED_CMD_COMP;
119         spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
120
121         mlx5_core_dbg(dev, "vector 0x%jx\n", (uintmax_t)vector);
122         mlx5_cmd_comp_handler(dev, vector);
123         return;
124
125 no_trig:
126         spin_unlock_irqrestore(&dev->cmd.alloc_lock, flags);
127 }
128
129 static bool sensor_pci_no_comm(struct mlx5_core_dev *dev)
130 {
131         struct mlx5_core_health *health = &dev->priv.health;
132         struct mlx5_health_buffer __iomem *h = health->health;
133         bool err = ioread32be(&h->fw_ver) == 0xffffffff;
134
135         return err;
136 }
137
138 static bool sensor_nic_disabled(struct mlx5_core_dev *dev)
139 {
140         return get_nic_mode(dev) == MLX5_NIC_IFC_DISABLED;
141 }
142
143 static bool sensor_nic_sw_reset(struct mlx5_core_dev *dev)
144 {
145         return get_nic_mode(dev) == MLX5_NIC_IFC_SW_RESET;
146 }
147
148 static u32 check_fatal_sensors(struct mlx5_core_dev *dev)
149 {
150         if (sensor_pci_no_comm(dev))
151                 return MLX5_SENSOR_PCI_COMM_ERR;
152         if (pci_channel_offline(dev->pdev))
153                 return MLX5_SENSOR_PCI_ERR;
154         if (sensor_nic_disabled(dev))
155                 return MLX5_SENSOR_NIC_DISABLED;
156         if (sensor_nic_sw_reset(dev))
157                 return MLX5_SENSOR_NIC_SW_RESET;
158         if (sensor_fw_synd_rfr(dev))
159                 return MLX5_SENSOR_FW_SYND_RFR;
160
161         return MLX5_SENSOR_NO_ERR;
162 }
163
164 static void reset_fw_if_needed(struct mlx5_core_dev *dev)
165 {
166         bool supported = (ioread32be(&dev->iseg->initializing) >>
167                           MLX5_FW_RESET_SUPPORTED_OFFSET) & 1;
168         u32 cmdq_addr, fatal_error;
169
170         if (!supported)
171                 return;
172
173         /* The reset only needs to be issued by one PF. The health buffer is
174          * shared between all functions, and will be cleared during a reset.
175          * Check again to avoid a redundant 2nd reset. If the fatal erros was
176          * PCI related a reset won't help.
177          */
178         fatal_error = check_fatal_sensors(dev);
179         if (fatal_error == MLX5_SENSOR_PCI_COMM_ERR ||
180             fatal_error == MLX5_SENSOR_NIC_DISABLED ||
181             fatal_error == MLX5_SENSOR_NIC_SW_RESET) {
182                 mlx5_core_warn(dev, "Not issuing FW reset. Either it's already done or won't help.\n");
183                 return;
184         }
185
186         mlx5_core_warn(dev, "Issuing FW Reset\n");
187         /* Write the NIC interface field to initiate the reset, the command
188          * interface address also resides here, don't overwrite it.
189          */
190         cmdq_addr = ioread32be(&dev->iseg->cmdq_addr_l_sz);
191         iowrite32be((cmdq_addr & 0xFFFFF000) |
192                     MLX5_NIC_IFC_SW_RESET << MLX5_NIC_IFC_OFFSET,
193                     &dev->iseg->cmdq_addr_l_sz);
194 }
195
196 #define MLX5_CRDUMP_WAIT_MS     60000
197 #define MLX5_FW_RESET_WAIT_MS   1000
198 #define MLX5_NIC_STATE_POLL_MS  5
199 void mlx5_enter_error_state(struct mlx5_core_dev *dev, bool force)
200 {
201         unsigned long end, delay_ms = MLX5_CRDUMP_WAIT_MS;
202         u32 fatal_error;
203         int lock = -EBUSY;
204
205         mutex_lock(&dev->intf_state_mutex);
206         if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
207                 goto unlock;
208                 return;
209         }
210
211         fatal_error = check_fatal_sensors(dev);
212
213         if (fatal_error || force) {
214                 if (!force)
215                         mlx5_core_err(dev, "internal state error detected\n");
216                 dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
217                 mlx5_trigger_cmd_completions(dev);
218         }
219
220         if (force)
221                 goto err_state_done;
222
223         if (fatal_error == MLX5_SENSOR_FW_SYND_RFR) {
224                 /* Get cr-dump and reset FW semaphore */
225                 if (mlx5_core_is_pf(dev))
226                         lock = lock_sem_sw_reset(dev, LOCK);
227
228                 /* Execute cr-dump and SW reset */
229                 if (lock != -EBUSY) {
230                         mlx5_fwdump(dev);
231                         reset_fw_if_needed(dev);
232                         delay_ms = MLX5_FW_RESET_WAIT_MS;
233                 }
234         }
235
236         /* Recover from SW reset */
237         end = jiffies + msecs_to_jiffies(delay_ms);
238         do {
239                 if (sensor_nic_disabled(dev))
240                         break;
241
242                 msleep(MLX5_NIC_STATE_POLL_MS);
243         } while (!time_after(jiffies, end));
244
245         if (!sensor_nic_disabled(dev)) {
246                 dev_err(&dev->pdev->dev, "NIC IFC still %d after %lums.\n",
247                         get_nic_mode(dev), delay_ms);
248         }
249
250         /* Release FW semaphore if you are the lock owner */
251         if (!lock)
252                 lock_sem_sw_reset(dev, UNLOCK);
253
254         mlx5_core_err(dev, "system error event triggered\n");
255
256 err_state_done:
257         mlx5_core_event(dev, MLX5_DEV_EVENT_SYS_ERROR, 0);
258 unlock:
259         mutex_unlock(&dev->intf_state_mutex);
260 }
261
262 static void mlx5_handle_bad_state(struct mlx5_core_dev *dev)
263 {
264         u8 nic_mode = get_nic_mode(dev);
265
266         if (nic_mode == MLX5_NIC_IFC_SW_RESET) {
267                 /* The IFC mode field is 3 bits, so it will read 0x7 in two cases:
268                  * 1. PCI has been disabled (ie. PCI-AER, PF driver unloaded
269                  *    and this is a VF), this is not recoverable by SW reset.
270                  *    Logging of this is handled elsewhere.
271                  * 2. FW reset has been issued by another function, driver can
272                  *    be reloaded to recover after the mode switches to
273                  *    MLX5_NIC_IFC_DISABLED.
274                  */
275                 if (dev->priv.health.fatal_error != MLX5_SENSOR_PCI_COMM_ERR)
276                         mlx5_core_warn(dev, "NIC SW reset is already progress\n");
277                 else
278                         mlx5_core_warn(dev, "Communication with FW over the PCI link is down\n");
279         } else {
280                 mlx5_core_warn(dev, "NIC mode %d\n", nic_mode);
281         }
282
283         mlx5_disable_device(dev);
284 }
285
286 #define MLX5_FW_RESET_WAIT_MS   1000
287 #define MLX5_NIC_STATE_POLL_MS  5
288 static void health_recover(struct work_struct *work)
289 {
290         unsigned long end = jiffies + msecs_to_jiffies(MLX5_FW_RESET_WAIT_MS);
291         struct mlx5_core_health *health;
292         struct delayed_work *dwork;
293         struct mlx5_core_dev *dev;
294         struct mlx5_priv *priv;
295         bool recover = true;
296         u8 nic_mode;
297
298         dwork = container_of(work, struct delayed_work, work);
299         health = container_of(dwork, struct mlx5_core_health, recover_work);
300         priv = container_of(health, struct mlx5_priv, health);
301         dev = container_of(priv, struct mlx5_core_dev, priv);
302
303         mtx_lock(&Giant);       /* XXX newbus needs this */
304
305         if (sensor_pci_no_comm(dev)) {
306                 dev_err(&dev->pdev->dev, "health recovery flow aborted, PCI reads still not working\n");
307                 recover = false;
308         }
309
310         nic_mode = get_nic_mode(dev);
311         while (nic_mode != MLX5_NIC_IFC_DISABLED &&
312                !time_after(jiffies, end)) {
313                 msleep(MLX5_NIC_STATE_POLL_MS);
314                 nic_mode = get_nic_mode(dev);
315         }
316
317         if (nic_mode != MLX5_NIC_IFC_DISABLED) {
318                 dev_err(&dev->pdev->dev, "health recovery flow aborted, unexpected NIC IFC mode %d.\n",
319                         nic_mode);
320                 recover = false;
321         }
322
323         if (recover) {
324                 dev_err(&dev->pdev->dev, "starting health recovery flow\n");
325                 mlx5_recover_device(dev);
326         }
327
328         mtx_unlock(&Giant);
329 }
330
331 /* How much time to wait until health resetting the driver (in msecs) */
332 #define MLX5_RECOVERY_DELAY_MSECS 60000
333 #define MLX5_RECOVERY_NO_DELAY 0
334 static unsigned long get_recovery_delay(struct mlx5_core_dev *dev)
335 {
336         return dev->priv.health.fatal_error == MLX5_SENSOR_PCI_ERR ||
337                 dev->priv.health.fatal_error == MLX5_SENSOR_PCI_COMM_ERR        ?
338                 MLX5_RECOVERY_DELAY_MSECS : MLX5_RECOVERY_NO_DELAY;
339 }
340
341 static void health_care(struct work_struct *work)
342 {
343         struct mlx5_core_health *health;
344         unsigned long recover_delay;
345         struct mlx5_core_dev *dev;
346         struct mlx5_priv *priv;
347         unsigned long flags;
348
349         health = container_of(work, struct mlx5_core_health, work);
350         priv = container_of(health, struct mlx5_priv, health);
351         dev = container_of(priv, struct mlx5_core_dev, priv);
352
353         mlx5_core_warn(dev, "handling bad device here\n");
354         mlx5_handle_bad_state(dev);
355         recover_delay = msecs_to_jiffies(get_recovery_delay(dev));
356
357         spin_lock_irqsave(&health->wq_lock, flags);
358         if (!test_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags)) {
359                 mlx5_core_warn(dev, "Scheduling recovery work with %lums delay\n",
360                                recover_delay);
361                 schedule_delayed_work(&health->recover_work, recover_delay);
362         } else {
363                 dev_err(&dev->pdev->dev,
364                         "new health works are not permitted at this stage\n");
365         }
366         spin_unlock_irqrestore(&health->wq_lock, flags);
367 }
368
369 static int get_next_poll_jiffies(void)
370 {
371         unsigned long next;
372
373         get_random_bytes(&next, sizeof(next));
374         next %= HZ;
375         next += jiffies + MLX5_HEALTH_POLL_INTERVAL;
376
377         return next;
378 }
379
380 void mlx5_trigger_health_work(struct mlx5_core_dev *dev)
381 {
382         struct mlx5_core_health *health = &dev->priv.health;
383         unsigned long flags;
384
385         spin_lock_irqsave(&health->wq_lock, flags);
386         if (!test_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags))
387                 queue_work(health->wq, &health->work);
388         else
389                 dev_err(&dev->pdev->dev,
390                         "new health works are not permitted at this stage\n");
391         spin_unlock_irqrestore(&health->wq_lock, flags);
392 }
393
394 static const char *hsynd_str(u8 synd)
395 {
396         switch (synd) {
397         case MLX5_HEALTH_SYNDR_FW_ERR:
398                 return "firmware internal error";
399         case MLX5_HEALTH_SYNDR_IRISC_ERR:
400                 return "irisc not responding";
401         case MLX5_HEALTH_SYNDR_HW_UNRECOVERABLE_ERR:
402                 return "unrecoverable hardware error";
403         case MLX5_HEALTH_SYNDR_CRC_ERR:
404                 return "firmware CRC error";
405         case MLX5_HEALTH_SYNDR_FETCH_PCI_ERR:
406                 return "ICM fetch PCI error";
407         case MLX5_HEALTH_SYNDR_HW_FTL_ERR:
408                 return "HW fatal error\n";
409         case MLX5_HEALTH_SYNDR_ASYNC_EQ_OVERRUN_ERR:
410                 return "async EQ buffer overrun";
411         case MLX5_HEALTH_SYNDR_EQ_ERR:
412                 return "EQ error";
413         case MLX5_HEALTH_SYNDR_EQ_INV:
414                 return "Invalid EQ referenced";
415         case MLX5_HEALTH_SYNDR_FFSER_ERR:
416                 return "FFSER error";
417         case MLX5_HEALTH_SYNDR_HIGH_TEMP:
418                 return "High temprature";
419         default:
420                 return "unrecognized error";
421         }
422 }
423
424 static void print_health_info(struct mlx5_core_dev *dev)
425 {
426         struct mlx5_core_health *health = &dev->priv.health;
427         struct mlx5_health_buffer __iomem *h = health->health;
428         char fw_str[18];
429         u32 fw;
430         int i;
431
432         /* If the syndrom is 0, the device is OK and no need to print buffer */
433         if (!ioread8(&h->synd))
434                 return;
435
436         for (i = 0; i < ARRAY_SIZE(h->assert_var); i++)
437                 printf("mlx5_core: INFO: ""assert_var[%d] 0x%08x\n", i, ioread32be(h->assert_var + i));
438
439         printf("mlx5_core: INFO: ""assert_exit_ptr 0x%08x\n", ioread32be(&h->assert_exit_ptr));
440         printf("mlx5_core: INFO: ""assert_callra 0x%08x\n", ioread32be(&h->assert_callra));
441         snprintf(fw_str, sizeof(fw_str), "%d.%d.%d", fw_rev_maj(dev), fw_rev_min(dev), fw_rev_sub(dev));
442         printf("mlx5_core: INFO: ""fw_ver %s\n", fw_str);
443         printf("mlx5_core: INFO: ""hw_id 0x%08x\n", ioread32be(&h->hw_id));
444         printf("mlx5_core: INFO: ""irisc_index %d\n", ioread8(&h->irisc_index));
445         printf("mlx5_core: INFO: ""synd 0x%x: %s\n", ioread8(&h->synd), hsynd_str(ioread8(&h->synd)));
446         printf("mlx5_core: INFO: ""ext_synd 0x%04x\n", ioread16be(&h->ext_synd));
447         fw = ioread32be(&h->fw_ver);
448         printf("mlx5_core: INFO: ""raw fw_ver 0x%08x\n", fw);
449 }
450
451 static void poll_health(unsigned long data)
452 {
453         struct mlx5_core_dev *dev = (struct mlx5_core_dev *)data;
454         struct mlx5_core_health *health = &dev->priv.health;
455         u32 fatal_error;
456         u32 count;
457
458         if (dev->state != MLX5_DEVICE_STATE_UP)
459                 return;
460
461         if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
462                 goto out;
463
464         count = ioread32be(health->health_counter);
465         if (count == health->prev)
466                 ++health->miss_counter;
467         else
468                 health->miss_counter = 0;
469
470         health->prev = count;
471         if (health->miss_counter == MAX_MISSES) {
472                 mlx5_core_err(dev, "device's health compromised - reached miss count\n");
473                 print_health_info(dev);
474         }
475
476         fatal_error = check_fatal_sensors(dev);
477
478         if (fatal_error && !health->fatal_error) {
479                 mlx5_core_err(dev, "Fatal error %u detected\n", fatal_error);
480                 dev->priv.health.fatal_error = fatal_error;
481                 print_health_info(dev);
482                 mlx5_trigger_health_work(dev);
483         }
484
485 out:
486         mod_timer(&health->timer, get_next_poll_jiffies());
487 }
488
489 void mlx5_start_health_poll(struct mlx5_core_dev *dev)
490 {
491         struct mlx5_core_health *health = &dev->priv.health;
492
493         init_timer(&health->timer);
494         health->fatal_error = MLX5_SENSOR_NO_ERR;
495         clear_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
496         clear_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
497         health->health = &dev->iseg->health;
498         health->health_counter = &dev->iseg->health_counter;
499
500         setup_timer(&health->timer, poll_health, (unsigned long)dev);
501         mod_timer(&health->timer,
502                   round_jiffies(jiffies + MLX5_HEALTH_POLL_INTERVAL));
503 }
504
505 void mlx5_stop_health_poll(struct mlx5_core_dev *dev)
506 {
507         struct mlx5_core_health *health = &dev->priv.health;
508
509         del_timer_sync(&health->timer);
510 }
511
512 void mlx5_drain_health_wq(struct mlx5_core_dev *dev)
513 {
514         struct mlx5_core_health *health = &dev->priv.health;
515         unsigned long flags;
516
517         spin_lock_irqsave(&health->wq_lock, flags);
518         set_bit(MLX5_DROP_NEW_HEALTH_WORK, &health->flags);
519         set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
520         spin_unlock_irqrestore(&health->wq_lock, flags);
521         cancel_delayed_work_sync(&health->recover_work);
522         cancel_work_sync(&health->work);
523 }
524
525 void mlx5_drain_health_recovery(struct mlx5_core_dev *dev)
526 {
527         struct mlx5_core_health *health = &dev->priv.health;
528         unsigned long flags;
529
530         spin_lock_irqsave(&health->wq_lock, flags);
531         set_bit(MLX5_DROP_NEW_RECOVERY_WORK, &health->flags);
532         spin_unlock_irqrestore(&health->wq_lock, flags);
533         cancel_delayed_work_sync(&dev->priv.health.recover_work);
534 }
535
536 void mlx5_health_cleanup(struct mlx5_core_dev *dev)
537 {
538         struct mlx5_core_health *health = &dev->priv.health;
539
540         destroy_workqueue(health->wq);
541 }
542
543 #define HEALTH_NAME "mlx5_health"
544 int mlx5_health_init(struct mlx5_core_dev *dev)
545 {
546         struct mlx5_core_health *health;
547         char *name;
548         int len;
549
550         health = &dev->priv.health;
551         len = strlen(HEALTH_NAME) + strlen(dev_name(&dev->pdev->dev));
552         name = kmalloc(len + 1, GFP_KERNEL);
553         if (!name)
554                 return -ENOMEM;
555
556         snprintf(name, len, "%s:%s", HEALTH_NAME, dev_name(&dev->pdev->dev));
557         health->wq = create_singlethread_workqueue(name);
558         kfree(name);
559         if (!health->wq)
560                 return -ENOMEM;
561
562         spin_lock_init(&health->wq_lock);
563         INIT_WORK(&health->work, health_care);
564         INIT_DELAYED_WORK(&health->recover_work, health_recover);
565
566         return 0;
567 }