4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License Version 1.0 (CDDL-1.0).
6 * You can obtain a copy of the license from the top-level file
7 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
8 * You may not use this file except in compliance with the license.
14 * Copyright (c) 2016, Intel Corporation.
15 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>
18 #include <libnvpair.h>
25 #include <sys/sysevent/eventdefs.h>
26 #include <sys/sysevent/dev.h>
27 #include <sys/fm/protocol.h>
28 #include <sys/fm/fs/zfs.h>
32 #include "zfs_agents.h"
34 #include "../zed_log.h"
40 static pthread_mutex_t agent_lock = PTHREAD_MUTEX_INITIALIZER;
41 static pthread_cond_t agent_cond = PTHREAD_COND_INITIALIZER;
42 static list_t agent_events; /* list of pending events */
43 static int agent_exiting;
45 typedef struct agent_event {
52 pthread_t g_agents_tid;
54 libzfs_handle_t *g_zfs_hdl;
56 /* guid search data */
57 typedef enum device_type {
58 DEVICE_TYPE_L2ARC, /* l2arc device */
59 DEVICE_TYPE_SPARE, /* spare device */
60 DEVICE_TYPE_PRIMARY /* any primary pool storage device */
63 typedef struct guid_search {
64 uint64_t gs_pool_guid;
65 uint64_t gs_vdev_guid;
67 device_type_t gs_vdev_type;
68 uint64_t gs_vdev_expandtime; /* vdev expansion time */
72 * Walks the vdev tree recursively looking for a matching devid.
73 * Returns B_TRUE as soon as a matching device is found, B_FALSE otherwise.
76 zfs_agent_iter_vdev(zpool_handle_t *zhp, nvlist_t *nvl, void *arg)
78 guid_search_t *gsp = arg;
84 * First iterate over any children.
86 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_CHILDREN,
87 &child, &children) == 0) {
88 for (c = 0; c < children; c++) {
89 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
90 gsp->gs_vdev_type = DEVICE_TYPE_PRIMARY;
96 * Iterate over any spares and cache devices
98 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_SPARES,
99 &child, &children) == 0) {
100 for (c = 0; c < children; c++) {
101 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
102 gsp->gs_vdev_type = DEVICE_TYPE_L2ARC;
107 if (nvlist_lookup_nvlist_array(nvl, ZPOOL_CONFIG_L2CACHE,
108 &child, &children) == 0) {
109 for (c = 0; c < children; c++) {
110 if (zfs_agent_iter_vdev(zhp, child[c], gsp)) {
111 gsp->gs_vdev_type = DEVICE_TYPE_SPARE;
117 * On a devid match, grab the vdev guid and expansion time, if any.
119 if (gsp->gs_devid != NULL &&
120 (nvlist_lookup_string(nvl, ZPOOL_CONFIG_DEVID, &path) == 0) &&
121 (strcmp(gsp->gs_devid, path) == 0)) {
122 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_GUID,
124 (void) nvlist_lookup_uint64(nvl, ZPOOL_CONFIG_EXPANSION_TIME,
125 &gsp->gs_vdev_expandtime);
133 zfs_agent_iter_pool(zpool_handle_t *zhp, void *arg)
135 guid_search_t *gsp = arg;
136 nvlist_t *config, *nvl;
139 * For each vdev in this pool, look for a match by devid
141 if ((config = zpool_get_config(zhp, NULL)) != NULL) {
142 if (nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE,
144 (void) zfs_agent_iter_vdev(zhp, nvl, gsp);
148 * if a match was found then grab the pool guid
150 if (gsp->gs_vdev_guid) {
151 (void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID,
156 return (gsp->gs_vdev_guid != 0);
160 zfs_agent_post_event(const char *class, const char *subclass, nvlist_t *nvl)
162 agent_event_t *event;
164 if (subclass == NULL)
167 event = malloc(sizeof (agent_event_t));
168 if (event == NULL || nvlist_dup(nvl, &event->ae_nvl, 0) != 0) {
174 if (strcmp(class, "sysevent.fs.zfs.vdev_check") == 0) {
176 subclass = ESC_ZFS_VDEV_CHECK;
180 * On Linux, we don't get the expected FM_RESOURCE_REMOVED ereport
181 * from the vdev_disk layer after a hot unplug. Fortunately we do
182 * get an EC_DEV_REMOVE from our disk monitor and it is a suitable
183 * proxy so we remap it here for the benefit of the diagnosis engine.
185 if ((strcmp(class, EC_DEV_REMOVE) == 0) &&
186 (strcmp(subclass, ESC_DISK) == 0) &&
187 (nvlist_exists(nvl, ZFS_EV_VDEV_GUID) ||
188 nvlist_exists(nvl, DEV_IDENTIFIER))) {
189 nvlist_t *payload = event->ae_nvl;
192 uint64_t pool_guid = 0, vdev_guid = 0;
193 guid_search_t search = { 0 };
194 device_type_t devtype = DEVICE_TYPE_PRIMARY;
196 class = "resource.fs.zfs.removed";
199 (void) nvlist_add_string(payload, FM_CLASS, class);
200 (void) nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &pool_guid);
201 (void) nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &vdev_guid);
203 (void) gettimeofday(&tv, NULL);
206 (void) nvlist_add_int64_array(payload, FM_EREPORT_TIME, tod, 2);
209 * For multipath, spare and l2arc devices ZFS_EV_VDEV_GUID or
210 * ZFS_EV_POOL_GUID may be missing so find them.
212 (void) nvlist_lookup_string(nvl, DEV_IDENTIFIER,
214 (void) zpool_iter(g_zfs_hdl, zfs_agent_iter_pool, &search);
215 pool_guid = search.gs_pool_guid;
216 vdev_guid = search.gs_vdev_guid;
217 devtype = search.gs_vdev_type;
220 * We want to avoid reporting "remove" events coming from
221 * libudev for VDEVs which were expanded recently (10s) and
222 * avoid activating spares in response to partitions being
223 * deleted and created in rapid succession.
225 if (search.gs_vdev_expandtime != 0 &&
226 search.gs_vdev_expandtime + 10 > tv.tv_sec) {
227 zed_log_msg(LOG_INFO, "agent post event: ignoring '%s' "
228 "for recently expanded device '%s'", EC_DEV_REMOVE,
233 (void) nvlist_add_uint64(payload,
234 FM_EREPORT_PAYLOAD_ZFS_POOL_GUID, pool_guid);
235 (void) nvlist_add_uint64(payload,
236 FM_EREPORT_PAYLOAD_ZFS_VDEV_GUID, vdev_guid);
238 case DEVICE_TYPE_L2ARC:
239 (void) nvlist_add_string(payload,
240 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE,
243 case DEVICE_TYPE_SPARE:
244 (void) nvlist_add_string(payload,
245 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_SPARE);
247 case DEVICE_TYPE_PRIMARY:
248 (void) nvlist_add_string(payload,
249 FM_EREPORT_PAYLOAD_ZFS_VDEV_TYPE, VDEV_TYPE_DISK);
253 zed_log_msg(LOG_INFO, "agent post event: mapping '%s' to '%s'",
254 EC_DEV_REMOVE, class);
257 (void) strlcpy(event->ae_class, class, sizeof (event->ae_class));
258 (void) strlcpy(event->ae_subclass, subclass,
259 sizeof (event->ae_subclass));
261 (void) pthread_mutex_lock(&agent_lock);
262 list_insert_tail(&agent_events, event);
263 (void) pthread_mutex_unlock(&agent_lock);
266 (void) pthread_cond_signal(&agent_cond);
270 zfs_agent_dispatch(const char *class, const char *subclass, nvlist_t *nvl)
273 * The diagnosis engine subscribes to the following events.
274 * On illumos these subscriptions reside in:
275 * /usr/lib/fm/fmd/plugins/zfs-diagnosis.conf
277 if (strstr(class, "ereport.fs.zfs.") != NULL ||
278 strstr(class, "resource.fs.zfs.") != NULL ||
279 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0 ||
280 strcmp(class, "sysevent.fs.zfs.vdev_remove_dev") == 0 ||
281 strcmp(class, "sysevent.fs.zfs.pool_destroy") == 0) {
282 fmd_module_recv(fmd_module_hdl("zfs-diagnosis"), nvl, class);
286 * The retire agent subscribes to the following events.
287 * On illumos these subscriptions reside in:
288 * /usr/lib/fm/fmd/plugins/zfs-retire.conf
290 * NOTE: faults events come directly from our diagnosis engine
291 * and will not pass through the zfs kernel module.
293 if (strcmp(class, FM_LIST_SUSPECT_CLASS) == 0 ||
294 strcmp(class, "resource.fs.zfs.removed") == 0 ||
295 strcmp(class, "resource.fs.zfs.statechange") == 0 ||
296 strcmp(class, "sysevent.fs.zfs.vdev_remove") == 0) {
297 fmd_module_recv(fmd_module_hdl("zfs-retire"), nvl, class);
301 * The SLM module only consumes disk events and vdev check events
303 * NOTE: disk events come directly from disk monitor and will
304 * not pass through the zfs kernel module.
306 if (strstr(class, "EC_dev_") != NULL ||
307 strcmp(class, EC_ZFS) == 0) {
308 (void) zfs_slm_event(class, subclass, nvl);
313 * Events are consumed and dispatched from this thread
314 * An agent can also post an event so event list lock
315 * is not held when calling an agent.
316 * One event is consumed at a time.
319 zfs_agent_consumer_thread(void *arg)
322 agent_event_t *event;
324 (void) pthread_mutex_lock(&agent_lock);
326 /* wait for an event to show up */
327 while (!agent_exiting && list_is_empty(&agent_events))
328 (void) pthread_cond_wait(&agent_cond, &agent_lock);
331 (void) pthread_mutex_unlock(&agent_lock);
332 zed_log_msg(LOG_INFO, "zfs_agent_consumer_thread: "
337 if ((event = (list_head(&agent_events))) != NULL) {
338 list_remove(&agent_events, event);
340 (void) pthread_mutex_unlock(&agent_lock);
342 /* dispatch to all event subscribers */
343 zfs_agent_dispatch(event->ae_class, event->ae_subclass,
346 nvlist_free(event->ae_nvl);
351 (void) pthread_mutex_unlock(&agent_lock);
358 zfs_agent_init(libzfs_handle_t *zfs_hdl)
364 if (zfs_slm_init() != 0)
365 zed_log_die("Failed to initialize zfs slm");
366 zed_log_msg(LOG_INFO, "Add Agent: init");
368 hdl = fmd_module_hdl("zfs-diagnosis");
369 _zfs_diagnosis_init(hdl);
370 if (!fmd_module_initialized(hdl))
371 zed_log_die("Failed to initialize zfs diagnosis");
373 hdl = fmd_module_hdl("zfs-retire");
374 _zfs_retire_init(hdl);
375 if (!fmd_module_initialized(hdl))
376 zed_log_die("Failed to initialize zfs retire");
378 list_create(&agent_events, sizeof (agent_event_t),
379 offsetof(struct agent_event, ae_node));
381 if (pthread_create(&g_agents_tid, NULL, zfs_agent_consumer_thread,
383 list_destroy(&agent_events);
384 zed_log_die("Failed to initialize agents");
392 agent_event_t *event;
395 (void) pthread_cond_signal(&agent_cond);
397 /* wait for zfs_enum_pools thread to complete */
398 (void) pthread_join(g_agents_tid, NULL);
400 /* drain any pending events */
401 while ((event = (list_head(&agent_events))) != NULL) {
402 list_remove(&agent_events, event);
403 nvlist_free(event->ae_nvl);
407 list_destroy(&agent_events);
409 if ((hdl = fmd_module_hdl("zfs-retire")) != NULL) {
410 _zfs_retire_fini(hdl);
411 fmd_hdl_unregister(hdl);
413 if ((hdl = fmd_module_hdl("zfs-diagnosis")) != NULL) {
414 _zfs_diagnosis_fini(hdl);
415 fmd_hdl_unregister(hdl);
418 zed_log_msg(LOG_INFO, "Add Agent: fini");