2 * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions, and the following disclaimer,
10 * without modification.
11 * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12 * substantially similar to the "NO WARRANTY" disclaimer below
13 * ("Disclaimer") and any redistribution must be conditioned upon
14 * including a substantially similar Disclaimer requirement for further
15 * binary redistribution.
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27 * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 * POSSIBILITY OF SUCH DAMAGES.
30 * Authors: Justin T. Gibbs (Spectra Logic Corporation)
36 * We keep case files for any leaf vdev that is not in the optimal state.
37 * However, we only serialize to disk those events that need to be preserved
38 * across reboots. For now, this is just a log of soft errors which we
39 * accumulate in order to mark a device as degraded.
41 #include <sys/cdefs.h>
44 #include <sys/fs/zfs.h>
60 #include <devdctl/guid.h>
61 #include <devdctl/event.h>
62 #include <devdctl/event_factory.h>
63 #include <devdctl/exception.h>
64 #include <devdctl/consumer.h>
67 #include "vdev_iterator.h"
68 #include "zfsd_event.h"
69 #include "case_file.h"
72 #include "zfsd_exception.h"
73 #include "zpool_list.h"
75 __FBSDID("$FreeBSD$");
77 /*============================ Namespace Control =============================*/
81 using std::stringstream;
86 using DevdCtl::EventFactory;
87 using DevdCtl::EventList;
89 using DevdCtl::ParseException;
91 /*--------------------------------- CaseFile ---------------------------------*/
92 //- CaseFile Static Data -------------------------------------------------------
94 CaseFileList CaseFile::s_activeCases;
95 const string CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
96 const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
98 //- CaseFile Static Public Methods ---------------------------------------------
100 CaseFile::Find(Guid poolGUID, Guid vdevGUID)
102 for (CaseFileList::iterator curCase = s_activeCases.begin();
103 curCase != s_activeCases.end(); curCase++) {
105 if ((*curCase)->PoolGUID() != poolGUID
106 || (*curCase)->VdevGUID() != vdevGUID)
110 * We only carry one active case per-vdev.
118 CaseFile::Find(const string &physPath)
120 CaseFile *result = NULL;
122 for (CaseFileList::iterator curCase = s_activeCases.begin();
123 curCase != s_activeCases.end(); curCase++) {
125 if ((*curCase)->PhysicalPath() != physPath)
128 if (result != NULL) {
129 syslog(LOG_WARNING, "Multiple casefiles found for "
131 "This is most likely a bug in zfsd",
141 CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
143 CaseFileList::iterator casefile;
144 for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
145 CaseFileList::iterator next = casefile;
147 if (poolGUID == (*casefile)->PoolGUID())
148 (*casefile)->ReEvaluate(event);
154 CaseFile::Create(Vdev &vdev)
156 CaseFile *activeCase;
158 activeCase = Find(vdev.PoolGUID(), vdev.GUID());
159 if (activeCase == NULL)
160 activeCase = new CaseFile(vdev);
162 return (*activeCase);
166 CaseFile::DeSerialize()
168 struct dirent **caseFiles;
170 int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
171 DeSerializeSelector, /*compar*/NULL));
173 if (numCaseFiles == -1)
175 if (numCaseFiles == 0) {
180 for (int i = 0; i < numCaseFiles; i++) {
182 DeSerializeFile(caseFiles[i]->d_name);
191 for (CaseFileList::iterator curCase = s_activeCases.begin();
192 curCase != s_activeCases.end(); curCase++)
200 * Serialize casefiles before deleting them so that they can be reread
201 * and revalidated during BuildCaseFiles.
202 * CaseFiles remove themselves from this list on destruction.
204 while (s_activeCases.size() != 0) {
205 CaseFile *casefile = s_activeCases.front();
206 casefile->Serialize();
212 //- CaseFile Public Methods ----------------------------------------------------
214 CaseFile::RefreshVdevState()
216 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
217 zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
218 if (casePool == NULL)
221 Vdev vd(casePool, CaseVdev(casePool));
222 if (vd.DoesNotExist())
225 m_vdevState = vd.State();
226 m_vdevPhysPath = vd.PhysicalPath();
231 CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
233 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
234 zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
236 if (pool == NULL || !RefreshVdevState()) {
238 * The pool or vdev for this case file is no longer
239 * part of the configuration. This can happen
240 * if we process a device arrival notification
241 * before seeing the ZFS configuration change
245 "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured. "
247 PoolGUIDString().c_str(),
248 VdevGUIDString().c_str());
252 * Since this event was not used to close this
253 * case, do not report it as consumed.
255 return (/*consumed*/false);
258 if (VdevState() > VDEV_STATE_CANT_OPEN) {
260 * For now, newly discovered devices only help for
261 * devices that are missing. In the future, we might
262 * use a newly inserted spare to replace a degraded
265 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
266 PoolGUIDString().c_str(), VdevGUIDString().c_str());
267 return (/*consumed*/false);
271 && vdev->PoolGUID() == m_poolGUID
272 && vdev->GUID() == m_vdevGUID) {
274 zpool_vdev_online(pool, vdev->GUIDString().c_str(),
275 ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
277 syslog(LOG_INFO, "Onlined vdev(%s/%s:%s). State now %s.\n",
278 zpool_get_name(pool), vdev->GUIDString().c_str(),
280 zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
283 * Check the vdev state post the online action to see
284 * if we can retire this case.
288 return (/*consumed*/true);
292 * If the auto-replace policy is enabled, and we have physical
293 * path information, try a physical path replacement.
295 if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
297 "CaseFile(%s:%s:%s): AutoReplace not set. "
298 "Ignoring device insertion.\n",
299 PoolGUIDString().c_str(),
300 VdevGUIDString().c_str(),
301 zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
302 return (/*consumed*/false);
305 if (PhysicalPath().empty()) {
307 "CaseFile(%s:%s:%s): No physical path information. "
308 "Ignoring device insertion.\n",
309 PoolGUIDString().c_str(),
310 VdevGUIDString().c_str(),
311 zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
312 return (/*consumed*/false);
315 if (physPath != PhysicalPath()) {
317 "CaseFile(%s:%s:%s): Physical path mismatch. "
318 "Ignoring device insertion.\n",
319 PoolGUIDString().c_str(),
320 VdevGUIDString().c_str(),
321 zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
322 return (/*consumed*/false);
325 /* Write a label on the newly inserted disk. */
326 if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
328 "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
329 zpool_get_name(pool), VdevGUIDString().c_str(),
330 libzfs_error_action(g_zfsHandle),
331 libzfs_error_description(g_zfsHandle));
332 return (/*consumed*/false);
335 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
336 PoolGUIDString().c_str(), VdevGUIDString().c_str(),
338 return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
342 CaseFile::ReEvaluate(const ZfsEvent &event)
344 bool consumed(false);
346 if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
348 * The Vdev we represent has been removed from the
349 * configuration. This case is no longer of value.
353 return (/*consumed*/true);
354 } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") {
355 /* This Pool has been destroyed. Discard the case */
358 return (/*consumed*/true);
359 } else if (event.Value("type") == "misc.fs.zfs.config_sync") {
361 if (VdevState() < VDEV_STATE_HEALTHY)
362 consumed = ActivateSpare();
366 if (event.Value("class") == "resource.fs.zfs.removed") {
367 bool spare_activated;
369 if (!RefreshVdevState()) {
371 * The pool or vdev for this case file is no longer
372 * part of the configuration. This can happen
373 * if we process a device arrival notification
374 * before seeing the ZFS configuration change
378 "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
379 "unconfigured. Closing\n",
380 PoolGUIDString().c_str(),
381 VdevGUIDString().c_str());
383 * Close the case now so we won't waste cycles in the
389 * Since this event was not used to close this
390 * case, do not report it as consumed.
392 return (/*consumed*/false);
396 * Discard any tentative I/O error events for
397 * this case. They were most likely caused by the
398 * hot-unplug of this device.
400 PurgeTentativeEvents();
402 /* Try to activate spares if they are available */
403 spare_activated = ActivateSpare();
406 * Rescan the drives in the system to see if a recent
407 * drive arrival can be used to solve this case.
409 ZfsDaemon::RequestSystemRescan();
412 * Consume the event if we successfully activated a spare.
413 * Otherwise, leave it in the unconsumed events list so that the
414 * future addition of a spare to this pool might be able to
417 consumed = spare_activated;
418 } else if (event.Value("class") == "resource.fs.zfs.statechange") {
421 * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
422 * activate a hotspare. Otherwise, ignore the event
424 if (VdevState() == VDEV_STATE_FAULTED ||
425 VdevState() == VDEV_STATE_DEGRADED ||
426 VdevState() == VDEV_STATE_CANT_OPEN)
427 (void) ActivateSpare();
430 else if (event.Value("class") == "ereport.fs.zfs.io" ||
431 event.Value("class") == "ereport.fs.zfs.checksum") {
433 m_tentativeEvents.push_front(event.DeepCopy());
434 RegisterCallout(event);
438 bool closed(CloseIfSolved());
440 return (consumed || closed);
445 CaseFile::ActivateSpare() {
446 nvlist_t *config, *nvroot;
448 char *devPath, *vdev_type;
449 const char *poolname;
453 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
454 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
456 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
457 "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID);
460 poolname = zpool_get_name(zhp);
461 config = zpool_get_config(zhp, NULL);
462 if (config == NULL) {
463 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
464 "config for pool %s", poolname);
467 error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
469 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
470 "tree for pool %s", poolname);
474 nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
477 /* The pool has no spares configured */
478 syslog(LOG_INFO, "CaseFile::ActivateSpare: "
479 "No spares available for pool %s", poolname);
482 for (i = 0; i < nspares; i++) {
483 uint64_t *nvlist_array;
487 if (nvlist_lookup_uint64_array(spares[i],
488 ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
489 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
490 "find vdev stats for pool %s, spare %d",
494 vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
496 if ((vs->vs_aux != VDEV_AUX_SPARED)
497 && (vs->vs_state == VDEV_STATE_HEALTHY)) {
498 /* We found a usable spare */
504 /* No available spares were found */
508 error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath);
510 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
511 "the path of pool %s, spare %d. Error %d",
516 error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type);
518 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
519 "the vdev type of pool %s, spare %d. Error %d",
524 return (Replace(vdev_type, devPath, /*isspare*/true));
528 CaseFile::RegisterCallout(const Event &event)
530 timeval now, countdown, elapsed, timestamp, zero, remaining;
532 gettimeofday(&now, 0);
533 timestamp = event.GetTimestamp();
534 timersub(&now, ×tamp, &elapsed);
535 timersub(&s_removeGracePeriod, &elapsed, &countdown);
537 * If countdown is <= zero, Reset the timer to the
538 * smallest positive time value instead
541 if (timercmp(&countdown, &zero, <=)) {
542 timerclear(&countdown);
543 countdown.tv_usec = 1;
546 remaining = m_tentativeTimer.TimeRemaining();
548 if (!m_tentativeTimer.IsPending()
549 || timercmp(&countdown, &remaining, <))
550 m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this);
555 CaseFile::CloseIfSolved()
558 && m_tentativeEvents.empty()) {
561 * We currently do not track or take actions on
562 * devices in the degraded or faulted state.
563 * Once we have support for spare pools, we'll
564 * retain these cases so that any spares added in
565 * the future can be applied to them.
567 switch (VdevState()) {
568 case VDEV_STATE_HEALTHY:
569 /* No need to keep cases for healthy vdevs */
572 case VDEV_STATE_REMOVED:
573 case VDEV_STATE_CANT_OPEN:
575 * Keep open. We may solve it with a newly inserted
578 case VDEV_STATE_FAULTED:
579 case VDEV_STATE_DEGRADED:
581 * Keep open. We may solve it with the future
582 * addition of a spare to the pool
584 case VDEV_STATE_UNKNOWN:
585 case VDEV_STATE_CLOSED:
586 case VDEV_STATE_OFFLINE:
588 * Keep open? This may not be the correct behavior,
589 * but it's what we've always done
595 * Re-serialize the case in order to remove any
596 * previous event data.
607 syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
608 VdevGUIDString().c_str(), PhysicalPath().c_str());
609 syslog(LOG_INFO, "\tVdev State = %s\n",
610 zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
611 if (m_tentativeEvents.size() != 0) {
612 syslog(LOG_INFO, "\t=== Tentative Events ===\n");
613 for (EventList::iterator event(m_tentativeEvents.begin());
614 event != m_tentativeEvents.end(); event++)
615 (*event)->Log(LOG_INFO);
617 if (m_events.size() != 0) {
618 syslog(LOG_INFO, "\t=== Events ===\n");
619 for (EventList::iterator event(m_events.begin());
620 event != m_events.end(); event++)
621 (*event)->Log(LOG_INFO);
625 //- CaseFile Static Protected Methods ------------------------------------------
627 CaseFile::OnGracePeriodEnded(void *arg)
629 CaseFile &casefile(*static_cast<CaseFile *>(arg));
631 casefile.OnGracePeriodEnded();
635 CaseFile::DeSerializeSelector(const struct dirent *dirEntry)
640 if (dirEntry->d_type == DT_REG
641 && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
642 &poolGUID, &vdevGUID) == 2)
648 CaseFile::DeSerializeFile(const char *fileName)
650 string fullName(s_caseFilePath + '/' + fileName);
651 CaseFile *existingCaseFile(NULL);
652 CaseFile *caseFile(NULL);
659 sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
660 &poolGUID, &vdevGUID);
661 existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID));
662 if (existingCaseFile != NULL) {
664 * If the vdev is already degraded or faulted,
665 * there's no point in keeping the state around
666 * that we use to put a drive into the degraded
667 * state. However, if the vdev is simply missing,
668 * preserve the case data in the hopes that it will
671 caseFile = existingCaseFile;
672 vdev_state curState(caseFile->VdevState());
673 if (curState > VDEV_STATE_CANT_OPEN
674 && curState < VDEV_STATE_HEALTHY) {
679 ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
681 || (vdevConf = VdevIterator(zpl.front())
682 .Find(vdevGUID)) == NULL) {
684 * Either the pool no longer exists
685 * or this vdev is no longer a member of
688 unlink(fullName.c_str());
693 * Any vdev we find that does not have a case file
694 * must be in the healthy state and thus worthy of
695 * continued SERD data tracking.
697 caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
700 ifstream caseStream(fullName.c_str());
702 throw ZfsdException("CaseFile::DeSerialize: Unable to "
703 "read %s.\n", fileName);
705 caseFile->DeSerialize(caseStream);
706 } catch (const ParseException &exp) {
709 if (caseFile != existingCaseFile)
713 * Since we can't parse the file, unlink it so we don't
714 * trip over it again.
717 } catch (const ZfsdException &zfsException) {
720 if (caseFile != existingCaseFile)
725 //- CaseFile Protected Methods -------------------------------------------------
726 CaseFile::CaseFile(const Vdev &vdev)
727 : m_poolGUID(vdev.PoolGUID()),
728 m_vdevGUID(vdev.GUID()),
729 m_vdevState(vdev.State()),
730 m_vdevPhysPath(vdev.PhysicalPath())
732 stringstream guidString;
734 guidString << m_vdevGUID;
735 m_vdevGUIDString = guidString.str();
737 guidString << m_poolGUID;
738 m_poolGUIDString = guidString.str();
740 s_activeCases.push_back(this);
742 syslog(LOG_INFO, "Creating new CaseFile:\n");
746 CaseFile::~CaseFile()
749 PurgeTentativeEvents();
750 m_tentativeTimer.Stop();
751 s_activeCases.remove(this);
755 CaseFile::PurgeEvents()
757 for (EventList::iterator event(m_events.begin());
758 event != m_events.end(); event++)
765 CaseFile::PurgeTentativeEvents()
767 for (EventList::iterator event(m_tentativeEvents.begin());
768 event != m_tentativeEvents.end(); event++)
771 m_tentativeEvents.clear();
775 CaseFile::SerializeEvList(const EventList events, int fd,
776 const char* prefix) const
780 for (EventList::const_iterator curEvent = events.begin();
781 curEvent != events.end(); curEvent++) {
782 const string &eventString((*curEvent)->GetEventString());
784 // TODO: replace many write(2) calls with a single writev(2)
786 write(fd, prefix, strlen(prefix));
787 write(fd, eventString.c_str(), eventString.length());
792 CaseFile::Serialize()
794 stringstream saveFile;
796 saveFile << setfill('0')
797 << s_caseFilePath << "/"
798 << "pool_" << PoolGUIDString()
799 << "_vdev_" << VdevGUIDString()
802 if (m_events.empty() && m_tentativeEvents.empty()) {
803 unlink(saveFile.str().c_str());
807 int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644));
809 syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n",
810 saveFile.str().c_str());
813 SerializeEvList(m_events, fd);
814 SerializeEvList(m_tentativeEvents, fd, "tentative ");
819 * XXX: This method assumes that events may not contain embedded newlines. If
820 * ever events can contain embedded newlines, then CaseFile must switch
821 * serialization formats
824 CaseFile::DeSerialize(ifstream &caseStream)
827 const EventFactory &factory(ZfsDaemon::Get().GetFactory());
829 caseStream >> std::noskipws >> std::ws;
830 while (caseStream.good()) {
833 * read the beginning of a line and check it for
834 * "tentative". If found, discard "tentative".
838 EventList* destEvents;
839 const string tentFlag("tentative ");
841 std::stringbuf lineBuf;
843 caseStream.get(lineBuf);
844 caseStream.ignore(); /*discard the newline character*/
845 line = lineBuf.str();
846 if (line.compare(0, tentFlag.size(), tentFlag) == 0) {
847 /* Discard "tentative" */
848 line.erase(0, tentFlag.size());
849 destEvents = &m_tentativeEvents;
851 destEvents = &m_events;
853 Event *event(Event::CreateEvent(factory, line));
855 destEvents->push_back(event);
856 RegisterCallout(*event);
865 * This case is no longer relevant. Clean up our
866 * serialization file, and delete the case.
868 syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n",
869 PoolGUIDString().c_str(), VdevGUIDString().c_str(),
870 zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
873 * Serialization of a Case with no event data, clears the
874 * Serialization data for that event.
883 CaseFile::OnGracePeriodEnded()
885 bool should_fault, should_degrade;
886 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
887 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
889 m_events.splice(m_events.begin(), m_tentativeEvents);
890 should_fault = ShouldFault();
891 should_degrade = ShouldDegrade();
893 if (should_fault || should_degrade) {
895 || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) {
897 * Either the pool no longer exists
898 * or this vdev is no longer a member of
907 /* A fault condition has priority over a degrade condition */
909 /* Fault the vdev and close the case. */
910 if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID,
911 VDEV_AUX_ERR_EXCEEDED) == 0) {
912 syslog(LOG_INFO, "Faulting vdev(%s/%s)",
913 PoolGUIDString().c_str(),
914 VdevGUIDString().c_str());
919 syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n",
920 PoolGUIDString().c_str(),
921 VdevGUIDString().c_str(),
922 libzfs_error_action(g_zfsHandle),
923 libzfs_error_description(g_zfsHandle));
926 else if (ShouldDegrade()) {
927 /* Degrade the vdev and close the case. */
928 if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID,
929 VDEV_AUX_ERR_EXCEEDED) == 0) {
930 syslog(LOG_INFO, "Degrading vdev(%s/%s)",
931 PoolGUIDString().c_str(),
932 VdevGUIDString().c_str());
937 syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n",
938 PoolGUIDString().c_str(),
939 VdevGUIDString().c_str(),
940 libzfs_error_action(g_zfsHandle),
941 libzfs_error_description(g_zfsHandle));
948 CaseFile::BeingReplacedBy(zpool_handle_t *zhp) {
949 Vdev vd(zhp, CaseVdev(zhp));
950 std::list<Vdev> children;
951 std::list<Vdev>::iterator children_it;
953 Vdev parent(vd.Parent());
954 Vdev replacing(NonexistentVdev);
957 * To determine whether we are being replaced by another spare that
958 * is still working, then make sure that it is currently spared and
959 * that the spare is either resilvering or healthy. If any of these
960 * conditions fail, then we are not being replaced by a spare.
962 * If the spare is healthy, then the case file should be closed very
963 * soon after this check.
965 if (parent.DoesNotExist()
966 || parent.Name(zhp, /*verbose*/false) != "spare")
967 return (NonexistentVdev);
969 children = parent.Children();
970 children_it = children.begin();
971 for (;children_it != children.end(); children_it++) {
972 Vdev child = *children_it;
975 if (child.GUID() == VdevGUID())
978 * Accept the first child that doesn't match our GUID, or
979 * any resilvering/healthy device if one exists.
981 if (replacing.DoesNotExist() || child.IsResilvering()
982 || child.State() == VDEV_STATE_HEALTHY)
990 CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
991 nvlist_t *nvroot, *newvd;
992 const char *poolname;
993 string oldstr(VdevGUIDString());
996 /* Figure out what pool we're working on */
997 ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
998 zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1000 syslog(LOG_ERR, "CaseFile::Replace: could not find pool for "
1001 "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID);
1004 poolname = zpool_get_name(zhp);
1005 Vdev vd(zhp, CaseVdev(zhp));
1006 Vdev replaced(BeingReplacedBy(zhp));
1008 if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) {
1009 /* If we are already being replaced by a working spare, pass. */
1010 if (replaced.IsResilvering()
1011 || replaced.State() == VDEV_STATE_HEALTHY) {
1012 syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already "
1013 "replaced", VdevGUIDString().c_str(), path);
1014 return (/*consumed*/false);
1017 * If we have already been replaced by a spare, but that spare
1018 * is broken, we must spare the spare, not the original device.
1020 oldstr = replaced.GUIDString();
1021 syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing "
1022 "broken spare %s instead", VdevGUIDString().c_str(),
1023 path, oldstr.c_str());
1027 * Build a root vdev/leaf vdev configuration suitable for
1028 * zpool_vdev_attach. Only enough data for the kernel to find
1029 * the device (i.e. type and disk device node path) are needed.
1034 if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
1035 || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1036 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate "
1037 "configuration data.", poolname, oldstr.c_str());
1039 nvlist_free(nvroot);
1042 if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0
1043 || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0
1044 || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
1045 || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1047 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize "
1048 "configuration data.", poolname, oldstr.c_str());
1050 nvlist_free(nvroot);
1054 /* Data was copied when added to the root vdev. */
1057 retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot,
1058 /*replace*/B_TRUE) == 0);
1060 syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
1061 poolname, oldstr.c_str(), path);
1063 syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n",
1064 poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle),
1065 libzfs_error_description(g_zfsHandle));
1066 nvlist_free(nvroot);
1071 /* Does the argument event refer to a checksum error? */
1073 IsChecksumEvent(const Event* const event)
1075 return ("ereport.fs.zfs.checksum" == event->Value("type"));
1078 /* Does the argument event refer to an IO error? */
1080 IsIOEvent(const Event* const event)
1082 return ("ereport.fs.zfs.io" == event->Value("type"));
1086 CaseFile::ShouldDegrade() const
1088 return (std::count_if(m_events.begin(), m_events.end(),
1089 IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
1093 CaseFile::ShouldFault() const
1095 return (std::count_if(m_events.begin(), m_events.end(),
1096 IsIOEvent) > ZFS_DEGRADE_IO_COUNT);
1100 CaseFile::CaseVdev(zpool_handle_t *zhp) const
1102 return (VdevIterator(zhp).Find(VdevGUID()));