]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - cddl/usr.sbin/zfsd/case_file.cc
Import bhyve_graphics into CURRENT. Thanks to all who tested
[FreeBSD/FreeBSD.git] / cddl / usr.sbin / zfsd / case_file.cc
1 /*-
2  * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions, and the following disclaimer,
10  *    without modification.
11  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12  *    substantially similar to the "NO WARRANTY" disclaimer below
13  *    ("Disclaimer") and any redistribution must be conditioned upon
14  *    including a substantially similar Disclaimer requirement for further
15  *    binary redistribution.
16  *
17  * NO WARRANTY
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGES.
29  *
30  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31  */
32
33 /**
34  * \file case_file.cc
35  *
36  * We keep case files for any leaf vdev that is not in the optimal state.
37  * However, we only serialize to disk those events that need to be preserved
38  * across reboots.  For now, this is just a log of soft errors which we
39  * accumulate in order to mark a device as degraded.
40  */
41 #include <sys/cdefs.h>
42 #include <sys/time.h>
43
44 #include <sys/fs/zfs.h>
45
46 #include <dirent.h>
47 #include <iomanip>
48 #include <fstream>
49 #include <functional>
50 #include <sstream>
51 #include <syslog.h>
52 #include <unistd.h>
53
54 #include <libzfs.h>
55
56 #include <list>
57 #include <map>
58 #include <string>
59
60 #include <devdctl/guid.h>
61 #include <devdctl/event.h>
62 #include <devdctl/event_factory.h>
63 #include <devdctl/exception.h>
64 #include <devdctl/consumer.h>
65
66 #include "callout.h"
67 #include "vdev_iterator.h"
68 #include "zfsd_event.h"
69 #include "case_file.h"
70 #include "vdev.h"
71 #include "zfsd.h"
72 #include "zfsd_exception.h"
73 #include "zpool_list.h"
74
75 __FBSDID("$FreeBSD$");
76
77 /*============================ Namespace Control =============================*/
78 using std::auto_ptr;
79 using std::hex;
80 using std::ifstream;
81 using std::stringstream;
82 using std::setfill;
83 using std::setw;
84
85 using DevdCtl::Event;
86 using DevdCtl::EventFactory;
87 using DevdCtl::EventList;
88 using DevdCtl::Guid;
89 using DevdCtl::ParseException;
90
91 /*--------------------------------- CaseFile ---------------------------------*/
92 //- CaseFile Static Data -------------------------------------------------------
93
94 CaseFileList  CaseFile::s_activeCases;
95 const string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
96 const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
97
98 //- CaseFile Static Public Methods ---------------------------------------------
99 CaseFile *
100 CaseFile::Find(Guid poolGUID, Guid vdevGUID)
101 {
102         for (CaseFileList::iterator curCase = s_activeCases.begin();
103              curCase != s_activeCases.end(); curCase++) {
104
105                 if ((*curCase)->PoolGUID() != poolGUID
106                  || (*curCase)->VdevGUID() != vdevGUID)
107                         continue;
108
109                 /*
110                  * We only carry one active case per-vdev.
111                  */
112                 return (*curCase);
113         }
114         return (NULL);
115 }
116
117 CaseFile *
118 CaseFile::Find(const string &physPath)
119 {
120         CaseFile *result = NULL;
121
122         for (CaseFileList::iterator curCase = s_activeCases.begin();
123              curCase != s_activeCases.end(); curCase++) {
124
125                 if ((*curCase)->PhysicalPath() != physPath)
126                         continue;
127
128                 if (result != NULL) {
129                         syslog(LOG_WARNING, "Multiple casefiles found for "
130                             "physical path %s.  "
131                             "This is most likely a bug in zfsd",
132                             physPath.c_str());
133                 }
134                 result = *curCase;
135         }
136         return (result);
137 }
138
139
140 void
141 CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
142 {
143         CaseFileList::iterator casefile;
144         for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
145                 CaseFileList::iterator next = casefile;
146                 next++;
147                 if (poolGUID == (*casefile)->PoolGUID())
148                         (*casefile)->ReEvaluate(event);
149                 casefile = next;
150         }
151 }
152
153 CaseFile &
154 CaseFile::Create(Vdev &vdev)
155 {
156         CaseFile *activeCase;
157
158         activeCase = Find(vdev.PoolGUID(), vdev.GUID());
159         if (activeCase == NULL)
160                 activeCase = new CaseFile(vdev);
161
162         return (*activeCase);
163 }
164
165 void
166 CaseFile::DeSerialize()
167 {
168         struct dirent **caseFiles;
169
170         int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
171                          DeSerializeSelector, /*compar*/NULL));
172
173         if (numCaseFiles == -1)
174                 return;
175         if (numCaseFiles == 0) {
176                 free(caseFiles);
177                 return;
178         }
179
180         for (int i = 0; i < numCaseFiles; i++) {
181
182                 DeSerializeFile(caseFiles[i]->d_name);
183                 free(caseFiles[i]);
184         }
185         free(caseFiles);
186 }
187
188 void
189 CaseFile::LogAll()
190 {
191         for (CaseFileList::iterator curCase = s_activeCases.begin();
192              curCase != s_activeCases.end(); curCase++)
193                 (*curCase)->Log();
194 }
195
196 void
197 CaseFile::PurgeAll()
198 {
199         /*
200          * Serialize casefiles before deleting them so that they can be reread
201          * and revalidated during BuildCaseFiles.
202          * CaseFiles remove themselves from this list on destruction.
203          */
204         while (s_activeCases.size() != 0) {
205                 CaseFile *casefile = s_activeCases.front();
206                 casefile->Serialize();
207                 delete casefile;
208         }
209
210 }
211
212 //- CaseFile Public Methods ----------------------------------------------------
213 bool
214 CaseFile::RefreshVdevState()
215 {
216         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
217         zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
218         if (casePool == NULL)
219                 return (false);
220
221         Vdev vd(casePool, CaseVdev(casePool));
222         if (vd.DoesNotExist())
223                 return (false);
224
225         m_vdevState    = vd.State();
226         m_vdevPhysPath = vd.PhysicalPath();
227         return (true);
228 }
229
230 bool
231 CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
232 {
233         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
234         zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
235
236         if (pool == NULL || !RefreshVdevState()) {
237                 /*
238                  * The pool or vdev for this case file is no longer
239                  * part of the configuration.  This can happen
240                  * if we process a device arrival notification
241                  * before seeing the ZFS configuration change
242                  * event.
243                  */
244                 syslog(LOG_INFO,
245                        "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
246                        "Closing\n",
247                        PoolGUIDString().c_str(),
248                        VdevGUIDString().c_str());
249                 Close();
250
251                 /*
252                  * Since this event was not used to close this
253                  * case, do not report it as consumed.
254                  */
255                 return (/*consumed*/false);
256         }
257
258         if (VdevState() > VDEV_STATE_CANT_OPEN) {
259                 /*
260                  * For now, newly discovered devices only help for
261                  * devices that are missing.  In the future, we might
262                  * use a newly inserted spare to replace a degraded
263                  * or faulted device.
264                  */
265                 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
266                     PoolGUIDString().c_str(), VdevGUIDString().c_str());
267                 return (/*consumed*/false);
268         }
269
270         if (vdev != NULL
271          && vdev->PoolGUID() == m_poolGUID
272          && vdev->GUID() == m_vdevGUID) {
273
274                 zpool_vdev_online(pool, vdev->GUIDString().c_str(),
275                                   ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
276                                   &m_vdevState);
277                 syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s.\n",
278                        zpool_get_name(pool), vdev->GUIDString().c_str(),
279                        devPath.c_str(),
280                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
281
282                 /*
283                  * Check the vdev state post the online action to see
284                  * if we can retire this case.
285                  */
286                 CloseIfSolved();
287
288                 return (/*consumed*/true);
289         }
290
291         /*
292          * If the auto-replace policy is enabled, and we have physical
293          * path information, try a physical path replacement.
294          */
295         if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
296                 syslog(LOG_INFO,
297                        "CaseFile(%s:%s:%s): AutoReplace not set.  "
298                        "Ignoring device insertion.\n",
299                        PoolGUIDString().c_str(),
300                        VdevGUIDString().c_str(),
301                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
302                 return (/*consumed*/false);
303         }
304
305         if (PhysicalPath().empty()) {
306                 syslog(LOG_INFO,
307                        "CaseFile(%s:%s:%s): No physical path information.  "
308                        "Ignoring device insertion.\n",
309                        PoolGUIDString().c_str(),
310                        VdevGUIDString().c_str(),
311                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
312                 return (/*consumed*/false);
313         }
314
315         if (physPath != PhysicalPath()) {
316                 syslog(LOG_INFO,
317                        "CaseFile(%s:%s:%s): Physical path mismatch.  "
318                        "Ignoring device insertion.\n",
319                        PoolGUIDString().c_str(),
320                        VdevGUIDString().c_str(),
321                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
322                 return (/*consumed*/false);
323         }
324
325         /* Write a label on the newly inserted disk. */
326         if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
327                 syslog(LOG_ERR,
328                        "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
329                        zpool_get_name(pool), VdevGUIDString().c_str(),
330                        libzfs_error_action(g_zfsHandle),
331                        libzfs_error_description(g_zfsHandle));
332                 return (/*consumed*/false);
333         }
334
335         syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
336             PoolGUIDString().c_str(), VdevGUIDString().c_str(),
337             devPath.c_str());
338         return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
339 }
340
341 bool
342 CaseFile::ReEvaluate(const ZfsEvent &event)
343 {
344         bool consumed(false);
345
346         if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
347                 /*
348                  * The Vdev we represent has been removed from the
349                  * configuration.  This case is no longer of value.
350                  */
351                 Close();
352
353                 return (/*consumed*/true);
354         } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") {
355                 /* This Pool has been destroyed.  Discard the case */
356                 Close();
357
358                 return (/*consumed*/true);
359         } else if (event.Value("type") == "misc.fs.zfs.config_sync") {
360                 RefreshVdevState();
361                 if (VdevState() < VDEV_STATE_HEALTHY)
362                         consumed = ActivateSpare();
363         }
364
365
366         if (event.Value("class") == "resource.fs.zfs.removed") {
367                 bool spare_activated;
368
369                 if (!RefreshVdevState()) {
370                         /*
371                          * The pool or vdev for this case file is no longer
372                          * part of the configuration.  This can happen
373                          * if we process a device arrival notification
374                          * before seeing the ZFS configuration change
375                          * event.
376                          */
377                         syslog(LOG_INFO,
378                                "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
379                                "unconfigured.  Closing\n",
380                                PoolGUIDString().c_str(),
381                                VdevGUIDString().c_str());
382                         /*
383                          * Close the case now so we won't waste cycles in the
384                          * system rescan
385                          */
386                         Close();
387
388                         /*
389                          * Since this event was not used to close this
390                          * case, do not report it as consumed.
391                          */
392                         return (/*consumed*/false);
393                 }
394
395                 /*
396                  * Discard any tentative I/O error events for
397                  * this case.  They were most likely caused by the
398                  * hot-unplug of this device.
399                  */
400                 PurgeTentativeEvents();
401
402                 /* Try to activate spares if they are available */
403                 spare_activated = ActivateSpare();
404
405                 /*
406                  * Rescan the drives in the system to see if a recent
407                  * drive arrival can be used to solve this case.
408                  */
409                 ZfsDaemon::RequestSystemRescan();
410
411                 /*
412                  * Consume the event if we successfully activated a spare.
413                  * Otherwise, leave it in the unconsumed events list so that the
414                  * future addition of a spare to this pool might be able to
415                  * close the case
416                  */
417                 consumed = spare_activated;
418         } else if (event.Value("class") == "resource.fs.zfs.statechange") {
419                 RefreshVdevState();
420                 /*
421                  * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
422                  * activate a hotspare.  Otherwise, ignore the event
423                  */
424                 if (VdevState() == VDEV_STATE_FAULTED ||
425                     VdevState() == VDEV_STATE_DEGRADED ||
426                     VdevState() == VDEV_STATE_CANT_OPEN)
427                         (void) ActivateSpare();
428                 consumed = true;
429         }
430         else if (event.Value("class") == "ereport.fs.zfs.io" ||
431                  event.Value("class") == "ereport.fs.zfs.checksum") {
432
433                 m_tentativeEvents.push_front(event.DeepCopy());
434                 RegisterCallout(event);
435                 consumed = true;
436         }
437
438         bool closed(CloseIfSolved());
439
440         return (consumed || closed);
441 }
442
443
444 bool
445 CaseFile::ActivateSpare() {
446         nvlist_t        *config, *nvroot;
447         nvlist_t       **spares;
448         char            *devPath, *vdev_type;
449         const char      *poolname;
450         u_int            nspares, i;
451         int              error;
452
453         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
454         zpool_handle_t  *zhp(zpl.empty() ? NULL : zpl.front());
455         if (zhp == NULL) {
456                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
457                        "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID);
458                 return (false);
459         }
460         poolname = zpool_get_name(zhp);
461         config = zpool_get_config(zhp, NULL);
462         if (config == NULL) {
463                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
464                        "config for pool %s", poolname);
465                 return (false);
466         }
467         error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
468         if (error != 0){
469                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
470                        "tree for pool %s", poolname);
471                 return (false);
472         }
473         nspares = 0;
474         nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
475                                    &nspares);
476         if (nspares == 0) {
477                 /* The pool has no spares configured */
478                 syslog(LOG_INFO, "CaseFile::ActivateSpare: "
479                        "No spares available for pool %s", poolname);
480                 return (false);
481         }
482         for (i = 0; i < nspares; i++) {
483                 uint64_t    *nvlist_array;
484                 vdev_stat_t *vs;
485                 uint_t       nstats;
486
487                 if (nvlist_lookup_uint64_array(spares[i],
488                     ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
489                         syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
490                                "find vdev stats for pool %s, spare %d",
491                                poolname, i);
492                         return (false);
493                 }
494                 vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
495
496                 if ((vs->vs_aux != VDEV_AUX_SPARED)
497                  && (vs->vs_state == VDEV_STATE_HEALTHY)) {
498                         /* We found a usable spare */
499                         break;
500                 }
501         }
502
503         if (i == nspares) {
504                 /* No available spares were found */
505                 return (false);
506         }
507
508         error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath);
509         if (error != 0) {
510                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
511                        "the path of pool %s, spare %d. Error %d",
512                        poolname, i, error);
513                 return (false);
514         }
515
516         error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type);
517         if (error != 0) {
518                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
519                        "the vdev type of pool %s, spare %d. Error %d",
520                        poolname, i, error);
521                 return (false);
522         }
523
524         return (Replace(vdev_type, devPath, /*isspare*/true));
525 }
526
527 void
528 CaseFile::RegisterCallout(const Event &event)
529 {
530         timeval now, countdown, elapsed, timestamp, zero, remaining;
531
532         gettimeofday(&now, 0);
533         timestamp = event.GetTimestamp();
534         timersub(&now, &timestamp, &elapsed);
535         timersub(&s_removeGracePeriod, &elapsed, &countdown);
536         /*
537          * If countdown is <= zero, Reset the timer to the
538          * smallest positive time value instead
539          */
540         timerclear(&zero);
541         if (timercmp(&countdown, &zero, <=)) {
542                 timerclear(&countdown);
543                 countdown.tv_usec = 1;
544         }
545
546         remaining = m_tentativeTimer.TimeRemaining();
547
548         if (!m_tentativeTimer.IsPending()
549          || timercmp(&countdown, &remaining, <))
550                 m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this);
551 }
552
553
554 bool
555 CaseFile::CloseIfSolved()
556 {
557         if (m_events.empty()
558          && m_tentativeEvents.empty()) {
559
560                 /*
561                  * We currently do not track or take actions on
562                  * devices in the degraded or faulted state.
563                  * Once we have support for spare pools, we'll
564                  * retain these cases so that any spares added in
565                  * the future can be applied to them.
566                  */
567                 switch (VdevState()) {
568                 case VDEV_STATE_HEALTHY:
569                         /* No need to keep cases for healthy vdevs */
570                         Close();
571                         return (true);
572                 case VDEV_STATE_REMOVED:
573                 case VDEV_STATE_CANT_OPEN:
574                         /*
575                          * Keep open.  We may solve it with a newly inserted
576                          * device.
577                          */
578                 case VDEV_STATE_FAULTED:
579                 case VDEV_STATE_DEGRADED:
580                         /*
581                          * Keep open.  We may solve it with the future
582                          * addition of a spare to the pool
583                          */
584                 case VDEV_STATE_UNKNOWN:
585                 case VDEV_STATE_CLOSED:
586                 case VDEV_STATE_OFFLINE:
587                         /*
588                          * Keep open?  This may not be the correct behavior,
589                          * but it's what we've always done
590                          */
591                         ;
592                 }
593
594                 /*
595                  * Re-serialize the case in order to remove any
596                  * previous event data.
597                  */
598                 Serialize();
599         }
600
601         return (false);
602 }
603
604 void
605 CaseFile::Log()
606 {
607         syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
608                VdevGUIDString().c_str(), PhysicalPath().c_str());
609         syslog(LOG_INFO, "\tVdev State = %s\n",
610                zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
611         if (m_tentativeEvents.size() != 0) {
612                 syslog(LOG_INFO, "\t=== Tentative Events ===\n");
613                 for (EventList::iterator event(m_tentativeEvents.begin());
614                      event != m_tentativeEvents.end(); event++)
615                         (*event)->Log(LOG_INFO);
616         }
617         if (m_events.size() != 0) {
618                 syslog(LOG_INFO, "\t=== Events ===\n");
619                 for (EventList::iterator event(m_events.begin());
620                      event != m_events.end(); event++)
621                         (*event)->Log(LOG_INFO);
622         }
623 }
624
625 //- CaseFile Static Protected Methods ------------------------------------------
626 void
627 CaseFile::OnGracePeriodEnded(void *arg)
628 {
629         CaseFile &casefile(*static_cast<CaseFile *>(arg));
630
631         casefile.OnGracePeriodEnded();
632 }
633
634 int
635 CaseFile::DeSerializeSelector(const struct dirent *dirEntry)
636 {
637         uint64_t poolGUID;
638         uint64_t vdevGUID;
639
640         if (dirEntry->d_type == DT_REG
641          && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
642                    &poolGUID, &vdevGUID) == 2)
643                 return (1);
644         return (0);
645 }
646
647 void
648 CaseFile::DeSerializeFile(const char *fileName)
649 {
650         string    fullName(s_caseFilePath + '/' + fileName);
651         CaseFile *existingCaseFile(NULL);
652         CaseFile *caseFile(NULL);
653
654         try {
655                 uint64_t poolGUID;
656                 uint64_t vdevGUID;
657                 nvlist_t *vdevConf;
658
659                 sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
660                        &poolGUID, &vdevGUID);
661                 existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID));
662                 if (existingCaseFile != NULL) {
663                         /*
664                          * If the vdev is already degraded or faulted,
665                          * there's no point in keeping the state around
666                          * that we use to put a drive into the degraded
667                          * state.  However, if the vdev is simply missing,
668                          * preserve the case data in the hopes that it will
669                          * return.
670                          */
671                         caseFile = existingCaseFile;
672                         vdev_state curState(caseFile->VdevState());
673                         if (curState > VDEV_STATE_CANT_OPEN
674                          && curState < VDEV_STATE_HEALTHY) {
675                                 unlink(fileName);
676                                 return;
677                         }
678                 } else {
679                         ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
680                         if (zpl.empty()
681                          || (vdevConf = VdevIterator(zpl.front())
682                                                     .Find(vdevGUID)) == NULL) {
683                                 /*
684                                  * Either the pool no longer exists
685                                  * or this vdev is no longer a member of
686                                  * the pool.
687                                  */
688                                 unlink(fullName.c_str());
689                                 return;
690                         }
691
692                         /*
693                          * Any vdev we find that does not have a case file
694                          * must be in the healthy state and thus worthy of
695                          * continued SERD data tracking.
696                          */
697                         caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
698                 }
699
700                 ifstream caseStream(fullName.c_str());
701                 if (!caseStream)
702                         throw ZfsdException("CaseFile::DeSerialize: Unable to "
703                                             "read %s.\n", fileName);
704
705                 caseFile->DeSerialize(caseStream);
706         } catch (const ParseException &exp) {
707
708                 exp.Log();
709                 if (caseFile != existingCaseFile)
710                         delete caseFile;
711
712                 /*
713                  * Since we can't parse the file, unlink it so we don't
714                  * trip over it again.
715                  */
716                 unlink(fileName);
717         } catch (const ZfsdException &zfsException) {
718
719                 zfsException.Log();
720                 if (caseFile != existingCaseFile)
721                         delete caseFile;
722         }
723 }
724
725 //- CaseFile Protected Methods -------------------------------------------------
726 CaseFile::CaseFile(const Vdev &vdev)
727  : m_poolGUID(vdev.PoolGUID()),
728    m_vdevGUID(vdev.GUID()),
729    m_vdevState(vdev.State()),
730    m_vdevPhysPath(vdev.PhysicalPath())
731 {
732         stringstream guidString;
733
734         guidString << m_vdevGUID;
735         m_vdevGUIDString = guidString.str();
736         guidString.str("");
737         guidString << m_poolGUID;
738         m_poolGUIDString = guidString.str();
739
740         s_activeCases.push_back(this);
741
742         syslog(LOG_INFO, "Creating new CaseFile:\n");
743         Log();
744 }
745
746 CaseFile::~CaseFile()
747 {
748         PurgeEvents();
749         PurgeTentativeEvents();
750         m_tentativeTimer.Stop();
751         s_activeCases.remove(this);
752 }
753
754 void
755 CaseFile::PurgeEvents()
756 {
757         for (EventList::iterator event(m_events.begin());
758              event != m_events.end(); event++)
759                 delete *event;
760
761         m_events.clear();
762 }
763
764 void
765 CaseFile::PurgeTentativeEvents()
766 {
767         for (EventList::iterator event(m_tentativeEvents.begin());
768              event != m_tentativeEvents.end(); event++)
769                 delete *event;
770
771         m_tentativeEvents.clear();
772 }
773
774 void
775 CaseFile::SerializeEvList(const EventList events, int fd,
776                 const char* prefix) const
777 {
778         if (events.empty())
779                 return;
780         for (EventList::const_iterator curEvent = events.begin();
781              curEvent != events.end(); curEvent++) {
782                 const string &eventString((*curEvent)->GetEventString());
783
784                 // TODO: replace many write(2) calls with a single writev(2)
785                 if (prefix)
786                         write(fd, prefix, strlen(prefix));
787                 write(fd, eventString.c_str(), eventString.length());
788         }
789 }
790
791 void
792 CaseFile::Serialize()
793 {
794         stringstream saveFile;
795
796         saveFile << setfill('0')
797                  << s_caseFilePath << "/"
798                  << "pool_" << PoolGUIDString()
799                  << "_vdev_" << VdevGUIDString()
800                  << ".case";
801
802         if (m_events.empty() && m_tentativeEvents.empty()) {
803                 unlink(saveFile.str().c_str());
804                 return;
805         }
806
807         int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644));
808         if (fd == -1) {
809                 syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n",
810                        saveFile.str().c_str());
811                 return;
812         }
813         SerializeEvList(m_events, fd);
814         SerializeEvList(m_tentativeEvents, fd, "tentative ");
815         close(fd);
816 }
817
818 /*
819  * XXX: This method assumes that events may not contain embedded newlines.  If
820  * ever events can contain embedded newlines, then CaseFile must switch
821  * serialization formats
822  */
823 void
824 CaseFile::DeSerialize(ifstream &caseStream)
825 {
826         string        evString;
827         const EventFactory &factory(ZfsDaemon::Get().GetFactory());
828
829         caseStream >> std::noskipws >> std::ws;
830         while (caseStream.good()) {
831                 /*
832                  * Outline:
833                  * read the beginning of a line and check it for
834                  * "tentative".  If found, discard "tentative".
835                  * Create a new event
836                  * continue
837                  */
838                 EventList* destEvents;
839                 const string tentFlag("tentative ");
840                 string line;
841                 std::stringbuf lineBuf;
842
843                 caseStream.get(lineBuf);
844                 caseStream.ignore();  /*discard the newline character*/
845                 line = lineBuf.str();
846                 if (line.compare(0, tentFlag.size(), tentFlag) == 0) {
847                         /* Discard "tentative" */
848                         line.erase(0, tentFlag.size());
849                         destEvents = &m_tentativeEvents;
850                 } else {
851                         destEvents = &m_events;
852                 }
853                 Event *event(Event::CreateEvent(factory, line));
854                 if (event != NULL) {
855                         destEvents->push_back(event);
856                         RegisterCallout(*event);
857                 }
858         }
859 }
860
861 void
862 CaseFile::Close()
863 {
864         /*
865          * This case is no longer relevant.  Clean up our
866          * serialization file, and delete the case.
867          */
868         syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n",
869                PoolGUIDString().c_str(), VdevGUIDString().c_str(),
870                zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
871
872         /*
873          * Serialization of a Case with no event data, clears the
874          * Serialization data for that event.
875          */
876         PurgeEvents();
877         Serialize();
878
879         delete this;
880 }
881
882 void
883 CaseFile::OnGracePeriodEnded()
884 {
885         bool should_fault, should_degrade;
886         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
887         zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
888
889         m_events.splice(m_events.begin(), m_tentativeEvents);
890         should_fault = ShouldFault();
891         should_degrade = ShouldDegrade();
892
893         if (should_fault || should_degrade) {
894                 if (zhp == NULL
895                  || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) {
896                         /*
897                          * Either the pool no longer exists
898                          * or this vdev is no longer a member of
899                          * the pool.
900                          */
901                         Close();
902                         return;
903                 }
904
905         }
906
907         /* A fault condition has priority over a degrade condition */
908         if (ShouldFault()) {
909                 /* Fault the vdev and close the case. */
910                 if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID,
911                                        VDEV_AUX_ERR_EXCEEDED) == 0) {
912                         syslog(LOG_INFO, "Faulting vdev(%s/%s)",
913                                PoolGUIDString().c_str(),
914                                VdevGUIDString().c_str());
915                         Close();
916                         return;
917                 }
918                 else {
919                         syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n",
920                                PoolGUIDString().c_str(),
921                                VdevGUIDString().c_str(),
922                                libzfs_error_action(g_zfsHandle),
923                                libzfs_error_description(g_zfsHandle));
924                 }
925         }
926         else if (ShouldDegrade()) {
927                 /* Degrade the vdev and close the case. */
928                 if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID,
929                                        VDEV_AUX_ERR_EXCEEDED) == 0) {
930                         syslog(LOG_INFO, "Degrading vdev(%s/%s)",
931                                PoolGUIDString().c_str(),
932                                VdevGUIDString().c_str());
933                         Close();
934                         return;
935                 }
936                 else {
937                         syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n",
938                                PoolGUIDString().c_str(),
939                                VdevGUIDString().c_str(),
940                                libzfs_error_action(g_zfsHandle),
941                                libzfs_error_description(g_zfsHandle));
942                 }
943         }
944         Serialize();
945 }
946
947 Vdev
948 CaseFile::BeingReplacedBy(zpool_handle_t *zhp) {
949         Vdev vd(zhp, CaseVdev(zhp));
950         std::list<Vdev> children;
951         std::list<Vdev>::iterator children_it;
952
953         Vdev parent(vd.Parent());
954         Vdev replacing(NonexistentVdev);
955
956         /*
957          * To determine whether we are being replaced by another spare that
958          * is still working, then make sure that it is currently spared and
959          * that the spare is either resilvering or healthy.  If any of these
960          * conditions fail, then we are not being replaced by a spare.
961          *
962          * If the spare is healthy, then the case file should be closed very
963          * soon after this check.
964          */
965         if (parent.DoesNotExist()
966          || parent.Name(zhp, /*verbose*/false) != "spare")
967                 return (NonexistentVdev);
968
969         children = parent.Children();
970         children_it = children.begin();
971         for (;children_it != children.end(); children_it++) {
972                 Vdev child = *children_it;
973
974                 /* Skip our vdev. */
975                 if (child.GUID() == VdevGUID())
976                         continue;
977                 /*
978                  * Accept the first child that doesn't match our GUID, or
979                  * any resilvering/healthy device if one exists.
980                  */
981                 if (replacing.DoesNotExist() || child.IsResilvering()
982                  || child.State() == VDEV_STATE_HEALTHY)
983                         replacing = child;
984         }
985
986         return (replacing);
987 }
988
989 bool
990 CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
991         nvlist_t *nvroot, *newvd;
992         const char *poolname;
993         string oldstr(VdevGUIDString());
994         bool retval = true;
995
996         /* Figure out what pool we're working on */
997         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
998         zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
999         if (zhp == NULL) {
1000                 syslog(LOG_ERR, "CaseFile::Replace: could not find pool for "
1001                        "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID);
1002                 return (false);
1003         }
1004         poolname = zpool_get_name(zhp);
1005         Vdev vd(zhp, CaseVdev(zhp));
1006         Vdev replaced(BeingReplacedBy(zhp));
1007
1008         if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) {
1009                 /* If we are already being replaced by a working spare, pass. */
1010                 if (replaced.IsResilvering()
1011                  || replaced.State() == VDEV_STATE_HEALTHY) {
1012                         syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already "
1013                             "replaced", VdevGUIDString().c_str(), path);
1014                         return (/*consumed*/false);
1015                 }
1016                 /*
1017                  * If we have already been replaced by a spare, but that spare
1018                  * is broken, we must spare the spare, not the original device.
1019                  */
1020                 oldstr = replaced.GUIDString();
1021                 syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing "
1022                     "broken spare %s instead", VdevGUIDString().c_str(),
1023                     path, oldstr.c_str());
1024         }
1025
1026         /*
1027          * Build a root vdev/leaf vdev configuration suitable for
1028          * zpool_vdev_attach. Only enough data for the kernel to find
1029          * the device (i.e. type and disk device node path) are needed.
1030          */
1031         nvroot = NULL;
1032         newvd = NULL;
1033
1034         if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
1035          || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1036                 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate "
1037                     "configuration data.", poolname, oldstr.c_str());
1038                 if (nvroot != NULL)
1039                         nvlist_free(nvroot);
1040                 return (false);
1041         }
1042         if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0
1043          || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0
1044          || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
1045          || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1046                                     &newvd, 1) != 0) {
1047                 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize "
1048                     "configuration data.", poolname, oldstr.c_str());
1049                 nvlist_free(newvd);
1050                 nvlist_free(nvroot);
1051                 return (true);
1052         }
1053
1054         /* Data was copied when added to the root vdev. */
1055         nvlist_free(newvd);
1056
1057         retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot,
1058             /*replace*/B_TRUE) == 0);
1059         if (retval)
1060                 syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
1061                     poolname, oldstr.c_str(), path);
1062         else
1063                 syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n",
1064                     poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle),
1065                     libzfs_error_description(g_zfsHandle));
1066         nvlist_free(nvroot);
1067
1068         return (retval);
1069 }
1070
1071 /* Does the argument event refer to a checksum error? */
1072 static bool
1073 IsChecksumEvent(const Event* const event)
1074 {
1075         return ("ereport.fs.zfs.checksum" == event->Value("type"));
1076 }
1077
1078 /* Does the argument event refer to an IO error? */
1079 static bool
1080 IsIOEvent(const Event* const event)
1081 {
1082         return ("ereport.fs.zfs.io" == event->Value("type"));
1083 }
1084
1085 bool
1086 CaseFile::ShouldDegrade() const
1087 {
1088         return (std::count_if(m_events.begin(), m_events.end(),
1089                               IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
1090 }
1091
1092 bool
1093 CaseFile::ShouldFault() const
1094 {
1095         return (std::count_if(m_events.begin(), m_events.end(),
1096                               IsIOEvent) > ZFS_DEGRADE_IO_COUNT);
1097 }
1098
1099 nvlist_t *
1100 CaseFile::CaseVdev(zpool_handle_t *zhp) const
1101 {
1102         return (VdevIterator(zhp).Find(VdevGUID()));
1103 }