4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License Version 1.0 (CDDL-1.0).
6 * You can obtain a copy of the license from the top-level file
7 * "OPENSOLARIS.LICENSE" or at <http://opensource.org/licenses/CDDL-1.0>.
8 * You may not use this file except in compliance with the license.
14 * Copyright (c) 2016, Intel Corporation.
21 #include <libnvpair.h>
28 #include <sys/sysevent/eventdefs.h>
29 #include <sys/sysevent/dev.h>
32 #include "zed_disk_event.h"
33 #include "agents/zfs_agents.h"
36 * Portions of ZED need to see disk events for disks belonging to ZFS pools.
37 * A libudev monitor is established to monitor block device actions and pass
38 * them on to internal ZED logic modules. Initially, zfs_mod.c is the only
39 * consumer and is the Linux equivalent for the illumos syseventd ZFS SLM
40 * module responsible for handeling disk events for ZFS.
45 struct udev_monitor *g_mon;
48 #define DEV_BYID_PATH "/dev/disk/by-id/"
50 /* 64MB is minimum usable disk for ZFS */
51 #define MINIMUM_SECTORS 131072
55 * Post disk event to SLM module
57 * occurs in the context of monitor thread
60 zed_udev_event(const char *class, const char *subclass, nvlist_t *nvl)
65 zed_log_msg(LOG_INFO, "zed_disk_event:");
66 zed_log_msg(LOG_INFO, "\tclass: %s", class);
67 zed_log_msg(LOG_INFO, "\tsubclass: %s", subclass);
68 if (nvlist_lookup_string(nvl, DEV_NAME, &strval) == 0)
69 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_NAME, strval);
70 if (nvlist_lookup_string(nvl, DEV_PATH, &strval) == 0)
71 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PATH, strval);
72 if (nvlist_lookup_string(nvl, DEV_IDENTIFIER, &strval) == 0)
73 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_IDENTIFIER, strval);
74 if (nvlist_lookup_string(nvl, DEV_PHYS_PATH, &strval) == 0)
75 zed_log_msg(LOG_INFO, "\t%s: %s", DEV_PHYS_PATH, strval);
76 if (nvlist_lookup_uint64(nvl, DEV_SIZE, &numval) == 0)
77 zed_log_msg(LOG_INFO, "\t%s: %llu", DEV_SIZE, numval);
78 if (nvlist_lookup_uint64(nvl, ZFS_EV_POOL_GUID, &numval) == 0)
79 zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_POOL_GUID, numval);
80 if (nvlist_lookup_uint64(nvl, ZFS_EV_VDEV_GUID, &numval) == 0)
81 zed_log_msg(LOG_INFO, "\t%s: %llu", ZFS_EV_VDEV_GUID, numval);
83 (void) zfs_slm_event(class, subclass, nvl);
87 * dev_event_nvlist: place event schema into an nv pair list
89 * NAME VALUE (example)
90 * -------------- --------------------------------------------------------
92 * DEV_PATH /devices/pci0000:00/0000:00:03.0/0000:04:00.0/host0/...
93 * DEV_IDENTIFIER ata-Hitachi_HTS725050A9A362_100601PCG420VLJ37DMC
94 * DEV_PHYS_PATH pci-0000:04:00.0-sas-0x4433221101000000-lun-0
96 * DEV_SIZE 500107862016
97 * ZFS_EV_POOL_GUID 17523635698032189180
98 * ZFS_EV_VDEV_GUID 14663607734290803088
101 dev_event_nvlist(struct udev_device *dev)
105 const char *value, *path;
108 if (nvlist_alloc(&nvl, NV_UNIQUE_NAME, 0) != 0)
111 if (zfs_device_get_devid(dev, strval, sizeof (strval)) == 0)
112 (void) nvlist_add_string(nvl, DEV_IDENTIFIER, strval);
113 if (zfs_device_get_physical(dev, strval, sizeof (strval)) == 0)
114 (void) nvlist_add_string(nvl, DEV_PHYS_PATH, strval);
115 if ((path = udev_device_get_devnode(dev)) != NULL)
116 (void) nvlist_add_string(nvl, DEV_NAME, path);
117 if ((value = udev_device_get_devpath(dev)) != NULL)
118 (void) nvlist_add_string(nvl, DEV_PATH, value);
119 value = udev_device_get_devtype(dev);
120 if ((value != NULL && strcmp("partition", value) == 0) ||
121 (udev_device_get_property_value(dev, "ID_PART_ENTRY_NUMBER")
123 (void) nvlist_add_boolean(nvl, DEV_IS_PART);
125 if ((value = udev_device_get_sysattr_value(dev, "size")) != NULL) {
126 uint64_t numval = DEV_BSIZE;
128 numval *= strtoull(value, NULL, 10);
129 (void) nvlist_add_uint64(nvl, DEV_SIZE, numval);
133 * Grab the pool and vdev guids from blkid cache
135 value = udev_device_get_property_value(dev, "ID_FS_UUID");
136 if (value != NULL && (guid = strtoull(value, NULL, 10)) != 0)
137 (void) nvlist_add_uint64(nvl, ZFS_EV_POOL_GUID, guid);
139 value = udev_device_get_property_value(dev, "ID_FS_UUID_SUB");
140 if (value != NULL && (guid = strtoull(value, NULL, 10)) != 0)
141 (void) nvlist_add_uint64(nvl, ZFS_EV_VDEV_GUID, guid);
144 * Either a vdev guid or a devid must be present for matching
146 if (!nvlist_exists(nvl, DEV_IDENTIFIER) &&
147 !nvlist_exists(nvl, ZFS_EV_VDEV_GUID)) {
156 * Listen for block device uevents
159 zed_udev_monitor(void *arg)
161 struct udev_monitor *mon = arg;
163 zed_log_msg(LOG_INFO, "Waiting for new uduev disk events...");
166 struct udev_device *dev;
167 const char *action, *type, *part, *sectors;
168 const char *bus, *uuid;
169 const char *class, *subclass;
171 boolean_t is_zfs = B_FALSE;
173 /* allow a cancellation while blocked (recvmsg) */
174 pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL);
176 /* blocks at recvmsg until an event occurs */
177 if ((dev = udev_monitor_receive_device(mon)) == NULL) {
178 zed_log_msg(LOG_WARNING, "zed_udev_monitor: receive "
179 "device error %d", errno);
183 /* allow all steps to complete before a cancellation */
184 pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL);
187 * Strongly typed device is the prefered filter
189 type = udev_device_get_property_value(dev, "ID_FS_TYPE");
190 if (type != NULL && type[0] != '\0') {
191 if (strcmp(type, "zfs_member") == 0) {
194 /* not ours, so skip */
195 zed_log_msg(LOG_INFO, "zed_udev_monitor: skip "
197 udev_device_get_devnode(dev), type);
198 udev_device_unref(dev);
204 * if this is a disk and it is partitioned, then the
205 * zfs label will reside in a DEVTYPE=partition and
206 * we can skip passing this event
208 type = udev_device_get_property_value(dev, "DEVTYPE");
209 part = udev_device_get_property_value(dev,
210 "ID_PART_TABLE_TYPE");
211 if (type != NULL && type[0] != '\0' &&
212 strcmp(type, "disk") == 0 &&
213 part != NULL && part[0] != '\0') {
214 /* skip and wait for partition event */
215 zed_log_msg(LOG_INFO, "zed_udev_monitor: %s waiting "
216 "for slice", udev_device_get_devnode(dev));
217 udev_device_unref(dev);
222 * ignore small partitions
224 sectors = udev_device_get_property_value(dev,
225 "ID_PART_ENTRY_SIZE");
227 sectors = udev_device_get_sysattr_value(dev, "size");
228 if (sectors != NULL &&
229 strtoull(sectors, NULL, 10) < MINIMUM_SECTORS) {
230 udev_device_unref(dev);
235 * If the blkid probe didn't find ZFS, then a persistent
236 * device id string is required in the message schema
237 * for matching with vdevs. Preflight here for expected
240 bus = udev_device_get_property_value(dev, "ID_BUS");
241 uuid = udev_device_get_property_value(dev, "DM_UUID");
242 if (!is_zfs && (bus == NULL && uuid == NULL)) {
243 zed_log_msg(LOG_INFO, "zed_udev_monitor: %s no devid "
244 "source", udev_device_get_devnode(dev));
245 udev_device_unref(dev);
249 action = udev_device_get_action(dev);
250 if (strcmp(action, "add") == 0) {
253 } else if (strcmp(action, "remove") == 0) {
254 class = EC_DEV_REMOVE;
256 } else if (strcmp(action, "change") == 0) {
257 class = EC_DEV_STATUS;
258 subclass = ESC_DEV_DLE;
260 zed_log_msg(LOG_WARNING, "zed_udev_monitor: %s unknown",
262 udev_device_unref(dev);
267 * Special case an EC_DEV_ADD for multipath devices
269 * When a multipath device is created, udev reports the
272 * 1. "add" event of the dm device for the multipath device
274 * 2. "change" event to create the actual multipath device
275 * symlink (like /dev/mapper/mpatha). The event also
276 * passes back the relevant DM vars we care about, like
278 * 3. Another "change" event identical to #2 (that we ignore).
280 * To get the behavior we want, we treat the "change" event
281 * in #2 as a "add" event; as if "/dev/mapper/mpatha" was
282 * a new disk being added.
284 if (strcmp(class, EC_DEV_STATUS) == 0 &&
285 udev_device_get_property_value(dev, "DM_UUID") &&
286 udev_device_get_property_value(dev, "MPATH_SBIN_PATH")) {
287 /* Fake a MP "change" event to look like a "create" */
292 if ((nvl = dev_event_nvlist(dev)) != NULL) {
293 zed_udev_event(class, subclass, nvl);
297 udev_device_unref(dev);
304 zed_disk_event_init()
308 if ((g_udev = udev_new()) == NULL) {
309 zed_log_msg(LOG_WARNING, "udev_new failed (%d)", errno);
313 /* Set up a udev monitor for block devices */
314 g_mon = udev_monitor_new_from_netlink(g_udev, "udev");
315 udev_monitor_filter_add_match_subsystem_devtype(g_mon, "block", "disk");
316 udev_monitor_filter_add_match_subsystem_devtype(g_mon, "block",
318 udev_monitor_enable_receiving(g_mon);
320 /* Make sure monitoring socket is blocking */
321 fd = udev_monitor_get_fd(g_mon);
322 if ((fflags = fcntl(fd, F_GETFL)) & O_NONBLOCK)
323 (void) fcntl(fd, F_SETFL, fflags & ~O_NONBLOCK);
325 /* spawn a thread to monitor events */
326 if (pthread_create(&g_mon_tid, NULL, zed_udev_monitor, g_mon) != 0) {
327 udev_monitor_unref(g_mon);
329 zed_log_msg(LOG_WARNING, "pthread_create failed");
333 zed_log_msg(LOG_INFO, "zed_disk_event_init");
339 zed_disk_event_fini()
341 /* cancel monitor thread at recvmsg() */
342 (void) pthread_cancel(g_mon_tid);
343 (void) pthread_join(g_mon_tid, NULL);
345 /* cleanup udev resources */
346 udev_monitor_unref(g_mon);
349 zed_log_msg(LOG_INFO, "zed_disk_event_fini");
354 #include "zed_disk_event.h"
357 zed_disk_event_init()
363 zed_disk_event_fini()
367 #endif /* HAVE_LIBUDEV */