]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - cddl/usr.sbin/zfsd/case_file.cc
MFC r325011, r325016
[FreeBSD/FreeBSD.git] / cddl / usr.sbin / zfsd / case_file.cc
1 /*-
2  * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions, and the following disclaimer,
10  *    without modification.
11  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12  *    substantially similar to the "NO WARRANTY" disclaimer below
13  *    ("Disclaimer") and any redistribution must be conditioned upon
14  *    including a substantially similar Disclaimer requirement for further
15  *    binary redistribution.
16  *
17  * NO WARRANTY
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGES.
29  *
30  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31  */
32
33 /**
34  * \file case_file.cc
35  *
36  * We keep case files for any leaf vdev that is not in the optimal state.
37  * However, we only serialize to disk those events that need to be preserved
38  * across reboots.  For now, this is just a log of soft errors which we
39  * accumulate in order to mark a device as degraded.
40  */
41 #include <sys/cdefs.h>
42 #include <sys/time.h>
43
44 #include <sys/fs/zfs.h>
45
46 #include <dirent.h>
47 #include <iomanip>
48 #include <fstream>
49 #include <functional>
50 #include <sstream>
51 #include <syslog.h>
52 #include <unistd.h>
53
54 #include <libzfs.h>
55
56 #include <list>
57 #include <map>
58 #include <string>
59
60 #include <devdctl/guid.h>
61 #include <devdctl/event.h>
62 #include <devdctl/event_factory.h>
63 #include <devdctl/exception.h>
64 #include <devdctl/consumer.h>
65
66 #include "callout.h"
67 #include "vdev_iterator.h"
68 #include "zfsd_event.h"
69 #include "case_file.h"
70 #include "vdev.h"
71 #include "zfsd.h"
72 #include "zfsd_exception.h"
73 #include "zpool_list.h"
74
75 __FBSDID("$FreeBSD$");
76
77 /*============================ Namespace Control =============================*/
78 using std::auto_ptr;
79 using std::hex;
80 using std::ifstream;
81 using std::stringstream;
82 using std::setfill;
83 using std::setw;
84
85 using DevdCtl::Event;
86 using DevdCtl::EventFactory;
87 using DevdCtl::EventList;
88 using DevdCtl::Guid;
89 using DevdCtl::ParseException;
90
91 /*--------------------------------- CaseFile ---------------------------------*/
92 //- CaseFile Static Data -------------------------------------------------------
93
94 CaseFileList  CaseFile::s_activeCases;
95 const string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
96 const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
97
98 //- CaseFile Static Public Methods ---------------------------------------------
99 CaseFile *
100 CaseFile::Find(Guid poolGUID, Guid vdevGUID)
101 {
102         for (CaseFileList::iterator curCase = s_activeCases.begin();
103              curCase != s_activeCases.end(); curCase++) {
104
105                 if (((*curCase)->PoolGUID() != poolGUID
106                   && Guid::InvalidGuid() != poolGUID)
107                  || (*curCase)->VdevGUID() != vdevGUID)
108                         continue;
109
110                 /*
111                  * We only carry one active case per-vdev.
112                  */
113                 return (*curCase);
114         }
115         return (NULL);
116 }
117
118 CaseFile *
119 CaseFile::Find(const string &physPath)
120 {
121         CaseFile *result = NULL;
122
123         for (CaseFileList::iterator curCase = s_activeCases.begin();
124              curCase != s_activeCases.end(); curCase++) {
125
126                 if ((*curCase)->PhysicalPath() != physPath)
127                         continue;
128
129                 if (result != NULL) {
130                         syslog(LOG_WARNING, "Multiple casefiles found for "
131                             "physical path %s.  "
132                             "This is most likely a bug in zfsd",
133                             physPath.c_str());
134                 }
135                 result = *curCase;
136         }
137         return (result);
138 }
139
140
141 void
142 CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
143 {
144         CaseFileList::iterator casefile;
145         for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
146                 CaseFileList::iterator next = casefile;
147                 next++;
148                 if (poolGUID == (*casefile)->PoolGUID())
149                         (*casefile)->ReEvaluate(event);
150                 casefile = next;
151         }
152 }
153
154 CaseFile &
155 CaseFile::Create(Vdev &vdev)
156 {
157         CaseFile *activeCase;
158
159         activeCase = Find(vdev.PoolGUID(), vdev.GUID());
160         if (activeCase == NULL)
161                 activeCase = new CaseFile(vdev);
162
163         return (*activeCase);
164 }
165
166 void
167 CaseFile::DeSerialize()
168 {
169         struct dirent **caseFiles;
170
171         int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
172                          DeSerializeSelector, /*compar*/NULL));
173
174         if (numCaseFiles == -1)
175                 return;
176         if (numCaseFiles == 0) {
177                 free(caseFiles);
178                 return;
179         }
180
181         for (int i = 0; i < numCaseFiles; i++) {
182
183                 DeSerializeFile(caseFiles[i]->d_name);
184                 free(caseFiles[i]);
185         }
186         free(caseFiles);
187 }
188
189 void
190 CaseFile::LogAll()
191 {
192         for (CaseFileList::iterator curCase = s_activeCases.begin();
193              curCase != s_activeCases.end(); curCase++)
194                 (*curCase)->Log();
195 }
196
197 void
198 CaseFile::PurgeAll()
199 {
200         /*
201          * Serialize casefiles before deleting them so that they can be reread
202          * and revalidated during BuildCaseFiles.
203          * CaseFiles remove themselves from this list on destruction.
204          */
205         while (s_activeCases.size() != 0) {
206                 CaseFile *casefile = s_activeCases.front();
207                 casefile->Serialize();
208                 delete casefile;
209         }
210
211 }
212
213 //- CaseFile Public Methods ----------------------------------------------------
214 bool
215 CaseFile::RefreshVdevState()
216 {
217         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
218         zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
219         if (casePool == NULL)
220                 return (false);
221
222         Vdev vd(casePool, CaseVdev(casePool));
223         if (vd.DoesNotExist())
224                 return (false);
225
226         m_vdevState    = vd.State();
227         m_vdevPhysPath = vd.PhysicalPath();
228         return (true);
229 }
230
231 bool
232 CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
233 {
234         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
235         zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
236
237         if (pool == NULL || !RefreshVdevState()) {
238                 /*
239                  * The pool or vdev for this case file is no longer
240                  * part of the configuration.  This can happen
241                  * if we process a device arrival notification
242                  * before seeing the ZFS configuration change
243                  * event.
244                  */
245                 syslog(LOG_INFO,
246                        "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
247                        "Closing\n",
248                        PoolGUIDString().c_str(),
249                        VdevGUIDString().c_str());
250                 Close();
251
252                 /*
253                  * Since this event was not used to close this
254                  * case, do not report it as consumed.
255                  */
256                 return (/*consumed*/false);
257         }
258
259         if (VdevState() > VDEV_STATE_CANT_OPEN) {
260                 /*
261                  * For now, newly discovered devices only help for
262                  * devices that are missing.  In the future, we might
263                  * use a newly inserted spare to replace a degraded
264                  * or faulted device.
265                  */
266                 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
267                     PoolGUIDString().c_str(), VdevGUIDString().c_str());
268                 return (/*consumed*/false);
269         }
270
271         if (vdev != NULL
272          && ( vdev->PoolGUID() == m_poolGUID
273            || vdev->PoolGUID() == Guid::InvalidGuid())
274          && vdev->GUID() == m_vdevGUID) {
275
276                 zpool_vdev_online(pool, vdev->GUIDString().c_str(),
277                                   ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
278                                   &m_vdevState);
279                 syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s.\n",
280                        zpool_get_name(pool), vdev->GUIDString().c_str(),
281                        devPath.c_str(),
282                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
283
284                 /*
285                  * Check the vdev state post the online action to see
286                  * if we can retire this case.
287                  */
288                 CloseIfSolved();
289
290                 return (/*consumed*/true);
291         }
292
293         /*
294          * If the auto-replace policy is enabled, and we have physical
295          * path information, try a physical path replacement.
296          */
297         if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
298                 syslog(LOG_INFO,
299                        "CaseFile(%s:%s:%s): AutoReplace not set.  "
300                        "Ignoring device insertion.\n",
301                        PoolGUIDString().c_str(),
302                        VdevGUIDString().c_str(),
303                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
304                 return (/*consumed*/false);
305         }
306
307         if (PhysicalPath().empty()) {
308                 syslog(LOG_INFO,
309                        "CaseFile(%s:%s:%s): No physical path information.  "
310                        "Ignoring device insertion.\n",
311                        PoolGUIDString().c_str(),
312                        VdevGUIDString().c_str(),
313                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
314                 return (/*consumed*/false);
315         }
316
317         if (physPath != PhysicalPath()) {
318                 syslog(LOG_INFO,
319                        "CaseFile(%s:%s:%s): Physical path mismatch.  "
320                        "Ignoring device insertion.\n",
321                        PoolGUIDString().c_str(),
322                        VdevGUIDString().c_str(),
323                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
324                 return (/*consumed*/false);
325         }
326
327         /* Write a label on the newly inserted disk. */
328         if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
329                 syslog(LOG_ERR,
330                        "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
331                        zpool_get_name(pool), VdevGUIDString().c_str(),
332                        libzfs_error_action(g_zfsHandle),
333                        libzfs_error_description(g_zfsHandle));
334                 return (/*consumed*/false);
335         }
336
337         syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
338             PoolGUIDString().c_str(), VdevGUIDString().c_str(),
339             devPath.c_str());
340         return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
341 }
342
343 bool
344 CaseFile::ReEvaluate(const ZfsEvent &event)
345 {
346         bool consumed(false);
347
348         if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
349                 /*
350                  * The Vdev we represent has been removed from the
351                  * configuration.  This case is no longer of value.
352                  */
353                 Close();
354
355                 return (/*consumed*/true);
356         } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") {
357                 /* This Pool has been destroyed.  Discard the case */
358                 Close();
359
360                 return (/*consumed*/true);
361         } else if (event.Value("type") == "misc.fs.zfs.config_sync") {
362                 RefreshVdevState();
363                 if (VdevState() < VDEV_STATE_HEALTHY)
364                         consumed = ActivateSpare();
365         }
366
367
368         if (event.Value("class") == "resource.fs.zfs.removed") {
369                 bool spare_activated;
370
371                 if (!RefreshVdevState()) {
372                         /*
373                          * The pool or vdev for this case file is no longer
374                          * part of the configuration.  This can happen
375                          * if we process a device arrival notification
376                          * before seeing the ZFS configuration change
377                          * event.
378                          */
379                         syslog(LOG_INFO,
380                                "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
381                                "unconfigured.  Closing\n",
382                                PoolGUIDString().c_str(),
383                                VdevGUIDString().c_str());
384                         /*
385                          * Close the case now so we won't waste cycles in the
386                          * system rescan
387                          */
388                         Close();
389
390                         /*
391                          * Since this event was not used to close this
392                          * case, do not report it as consumed.
393                          */
394                         return (/*consumed*/false);
395                 }
396
397                 /*
398                  * Discard any tentative I/O error events for
399                  * this case.  They were most likely caused by the
400                  * hot-unplug of this device.
401                  */
402                 PurgeTentativeEvents();
403
404                 /* Try to activate spares if they are available */
405                 spare_activated = ActivateSpare();
406
407                 /*
408                  * Rescan the drives in the system to see if a recent
409                  * drive arrival can be used to solve this case.
410                  */
411                 ZfsDaemon::RequestSystemRescan();
412
413                 /*
414                  * Consume the event if we successfully activated a spare.
415                  * Otherwise, leave it in the unconsumed events list so that the
416                  * future addition of a spare to this pool might be able to
417                  * close the case
418                  */
419                 consumed = spare_activated;
420         } else if (event.Value("class") == "resource.fs.zfs.statechange") {
421                 RefreshVdevState();
422                 /*
423                  * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
424                  * activate a hotspare.  Otherwise, ignore the event
425                  */
426                 if (VdevState() == VDEV_STATE_FAULTED ||
427                     VdevState() == VDEV_STATE_DEGRADED ||
428                     VdevState() == VDEV_STATE_CANT_OPEN)
429                         (void) ActivateSpare();
430                 consumed = true;
431         }
432         else if (event.Value("class") == "ereport.fs.zfs.io" ||
433                  event.Value("class") == "ereport.fs.zfs.checksum") {
434
435                 m_tentativeEvents.push_front(event.DeepCopy());
436                 RegisterCallout(event);
437                 consumed = true;
438         }
439
440         bool closed(CloseIfSolved());
441
442         return (consumed || closed);
443 }
444
445
446 bool
447 CaseFile::ActivateSpare() {
448         nvlist_t        *config, *nvroot;
449         nvlist_t       **spares;
450         char            *devPath, *vdev_type;
451         const char      *poolname;
452         u_int            nspares, i;
453         int              error;
454
455         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
456         zpool_handle_t  *zhp(zpl.empty() ? NULL : zpl.front());
457         if (zhp == NULL) {
458                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
459                        "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID);
460                 return (false);
461         }
462         poolname = zpool_get_name(zhp);
463         config = zpool_get_config(zhp, NULL);
464         if (config == NULL) {
465                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
466                        "config for pool %s", poolname);
467                 return (false);
468         }
469         error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
470         if (error != 0){
471                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
472                        "tree for pool %s", poolname);
473                 return (false);
474         }
475         nspares = 0;
476         nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
477                                    &nspares);
478         if (nspares == 0) {
479                 /* The pool has no spares configured */
480                 syslog(LOG_INFO, "CaseFile::ActivateSpare: "
481                        "No spares available for pool %s", poolname);
482                 return (false);
483         }
484         for (i = 0; i < nspares; i++) {
485                 uint64_t    *nvlist_array;
486                 vdev_stat_t *vs;
487                 uint_t       nstats;
488
489                 if (nvlist_lookup_uint64_array(spares[i],
490                     ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
491                         syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
492                                "find vdev stats for pool %s, spare %d",
493                                poolname, i);
494                         return (false);
495                 }
496                 vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
497
498                 if ((vs->vs_aux != VDEV_AUX_SPARED)
499                  && (vs->vs_state == VDEV_STATE_HEALTHY)) {
500                         /* We found a usable spare */
501                         break;
502                 }
503         }
504
505         if (i == nspares) {
506                 /* No available spares were found */
507                 return (false);
508         }
509
510         error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath);
511         if (error != 0) {
512                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
513                        "the path of pool %s, spare %d. Error %d",
514                        poolname, i, error);
515                 return (false);
516         }
517
518         error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type);
519         if (error != 0) {
520                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
521                        "the vdev type of pool %s, spare %d. Error %d",
522                        poolname, i, error);
523                 return (false);
524         }
525
526         return (Replace(vdev_type, devPath, /*isspare*/true));
527 }
528
529 void
530 CaseFile::RegisterCallout(const Event &event)
531 {
532         timeval now, countdown, elapsed, timestamp, zero, remaining;
533
534         gettimeofday(&now, 0);
535         timestamp = event.GetTimestamp();
536         timersub(&now, &timestamp, &elapsed);
537         timersub(&s_removeGracePeriod, &elapsed, &countdown);
538         /*
539          * If countdown is <= zero, Reset the timer to the
540          * smallest positive time value instead
541          */
542         timerclear(&zero);
543         if (timercmp(&countdown, &zero, <=)) {
544                 timerclear(&countdown);
545                 countdown.tv_usec = 1;
546         }
547
548         remaining = m_tentativeTimer.TimeRemaining();
549
550         if (!m_tentativeTimer.IsPending()
551          || timercmp(&countdown, &remaining, <))
552                 m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this);
553 }
554
555
556 bool
557 CaseFile::CloseIfSolved()
558 {
559         if (m_events.empty()
560          && m_tentativeEvents.empty()) {
561
562                 /*
563                  * We currently do not track or take actions on
564                  * devices in the degraded or faulted state.
565                  * Once we have support for spare pools, we'll
566                  * retain these cases so that any spares added in
567                  * the future can be applied to them.
568                  */
569                 switch (VdevState()) {
570                 case VDEV_STATE_HEALTHY:
571                         /* No need to keep cases for healthy vdevs */
572                         Close();
573                         return (true);
574                 case VDEV_STATE_REMOVED:
575                 case VDEV_STATE_CANT_OPEN:
576                         /*
577                          * Keep open.  We may solve it with a newly inserted
578                          * device.
579                          */
580                 case VDEV_STATE_FAULTED:
581                 case VDEV_STATE_DEGRADED:
582                         /*
583                          * Keep open.  We may solve it with the future
584                          * addition of a spare to the pool
585                          */
586                 case VDEV_STATE_UNKNOWN:
587                 case VDEV_STATE_CLOSED:
588                 case VDEV_STATE_OFFLINE:
589                         /*
590                          * Keep open?  This may not be the correct behavior,
591                          * but it's what we've always done
592                          */
593                         ;
594                 }
595
596                 /*
597                  * Re-serialize the case in order to remove any
598                  * previous event data.
599                  */
600                 Serialize();
601         }
602
603         return (false);
604 }
605
606 void
607 CaseFile::Log()
608 {
609         syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
610                VdevGUIDString().c_str(), PhysicalPath().c_str());
611         syslog(LOG_INFO, "\tVdev State = %s\n",
612                zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
613         if (m_tentativeEvents.size() != 0) {
614                 syslog(LOG_INFO, "\t=== Tentative Events ===\n");
615                 for (EventList::iterator event(m_tentativeEvents.begin());
616                      event != m_tentativeEvents.end(); event++)
617                         (*event)->Log(LOG_INFO);
618         }
619         if (m_events.size() != 0) {
620                 syslog(LOG_INFO, "\t=== Events ===\n");
621                 for (EventList::iterator event(m_events.begin());
622                      event != m_events.end(); event++)
623                         (*event)->Log(LOG_INFO);
624         }
625 }
626
627 //- CaseFile Static Protected Methods ------------------------------------------
628 void
629 CaseFile::OnGracePeriodEnded(void *arg)
630 {
631         CaseFile &casefile(*static_cast<CaseFile *>(arg));
632
633         casefile.OnGracePeriodEnded();
634 }
635
636 int
637 CaseFile::DeSerializeSelector(const struct dirent *dirEntry)
638 {
639         uint64_t poolGUID;
640         uint64_t vdevGUID;
641
642         if (dirEntry->d_type == DT_REG
643          && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
644                    &poolGUID, &vdevGUID) == 2)
645                 return (1);
646         return (0);
647 }
648
649 void
650 CaseFile::DeSerializeFile(const char *fileName)
651 {
652         string    fullName(s_caseFilePath + '/' + fileName);
653         CaseFile *existingCaseFile(NULL);
654         CaseFile *caseFile(NULL);
655
656         try {
657                 uint64_t poolGUID;
658                 uint64_t vdevGUID;
659                 nvlist_t *vdevConf;
660
661                 if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
662                        &poolGUID, &vdevGUID) != 2) {
663                         throw ZfsdException("CaseFile::DeSerialize: "
664                             "Unintelligible CaseFile filename %s.\n", fileName);
665                 }
666                 existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID));
667                 if (existingCaseFile != NULL) {
668                         /*
669                          * If the vdev is already degraded or faulted,
670                          * there's no point in keeping the state around
671                          * that we use to put a drive into the degraded
672                          * state.  However, if the vdev is simply missing,
673                          * preserve the case data in the hopes that it will
674                          * return.
675                          */
676                         caseFile = existingCaseFile;
677                         vdev_state curState(caseFile->VdevState());
678                         if (curState > VDEV_STATE_CANT_OPEN
679                          && curState < VDEV_STATE_HEALTHY) {
680                                 unlink(fileName);
681                                 return;
682                         }
683                 } else {
684                         ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
685                         if (zpl.empty()
686                          || (vdevConf = VdevIterator(zpl.front())
687                                                     .Find(vdevGUID)) == NULL) {
688                                 /*
689                                  * Either the pool no longer exists
690                                  * or this vdev is no longer a member of
691                                  * the pool.
692                                  */
693                                 unlink(fullName.c_str());
694                                 return;
695                         }
696
697                         /*
698                          * Any vdev we find that does not have a case file
699                          * must be in the healthy state and thus worthy of
700                          * continued SERD data tracking.
701                          */
702                         caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
703                 }
704
705                 ifstream caseStream(fullName.c_str());
706                 if (!caseStream)
707                         throw ZfsdException("CaseFile::DeSerialize: Unable to "
708                                             "read %s.\n", fileName);
709
710                 caseFile->DeSerialize(caseStream);
711         } catch (const ParseException &exp) {
712
713                 exp.Log();
714                 if (caseFile != existingCaseFile)
715                         delete caseFile;
716
717                 /*
718                  * Since we can't parse the file, unlink it so we don't
719                  * trip over it again.
720                  */
721                 unlink(fileName);
722         } catch (const ZfsdException &zfsException) {
723
724                 zfsException.Log();
725                 if (caseFile != existingCaseFile)
726                         delete caseFile;
727         }
728 }
729
730 //- CaseFile Protected Methods -------------------------------------------------
731 CaseFile::CaseFile(const Vdev &vdev)
732  : m_poolGUID(vdev.PoolGUID()),
733    m_vdevGUID(vdev.GUID()),
734    m_vdevState(vdev.State()),
735    m_vdevPhysPath(vdev.PhysicalPath())
736 {
737         stringstream guidString;
738
739         guidString << m_vdevGUID;
740         m_vdevGUIDString = guidString.str();
741         guidString.str("");
742         guidString << m_poolGUID;
743         m_poolGUIDString = guidString.str();
744
745         s_activeCases.push_back(this);
746
747         syslog(LOG_INFO, "Creating new CaseFile:\n");
748         Log();
749 }
750
751 CaseFile::~CaseFile()
752 {
753         PurgeEvents();
754         PurgeTentativeEvents();
755         m_tentativeTimer.Stop();
756         s_activeCases.remove(this);
757 }
758
759 void
760 CaseFile::PurgeEvents()
761 {
762         for (EventList::iterator event(m_events.begin());
763              event != m_events.end(); event++)
764                 delete *event;
765
766         m_events.clear();
767 }
768
769 void
770 CaseFile::PurgeTentativeEvents()
771 {
772         for (EventList::iterator event(m_tentativeEvents.begin());
773              event != m_tentativeEvents.end(); event++)
774                 delete *event;
775
776         m_tentativeEvents.clear();
777 }
778
779 void
780 CaseFile::SerializeEvList(const EventList events, int fd,
781                 const char* prefix) const
782 {
783         if (events.empty())
784                 return;
785         for (EventList::const_iterator curEvent = events.begin();
786              curEvent != events.end(); curEvent++) {
787                 const string &eventString((*curEvent)->GetEventString());
788
789                 // TODO: replace many write(2) calls with a single writev(2)
790                 if (prefix)
791                         write(fd, prefix, strlen(prefix));
792                 write(fd, eventString.c_str(), eventString.length());
793         }
794 }
795
796 void
797 CaseFile::Serialize()
798 {
799         stringstream saveFile;
800
801         saveFile << setfill('0')
802                  << s_caseFilePath << "/"
803                  << "pool_" << PoolGUIDString()
804                  << "_vdev_" << VdevGUIDString()
805                  << ".case";
806
807         if (m_events.empty() && m_tentativeEvents.empty()) {
808                 unlink(saveFile.str().c_str());
809                 return;
810         }
811
812         int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644));
813         if (fd == -1) {
814                 syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n",
815                        saveFile.str().c_str());
816                 return;
817         }
818         SerializeEvList(m_events, fd);
819         SerializeEvList(m_tentativeEvents, fd, "tentative ");
820         close(fd);
821 }
822
823 /*
824  * XXX: This method assumes that events may not contain embedded newlines.  If
825  * ever events can contain embedded newlines, then CaseFile must switch
826  * serialization formats
827  */
828 void
829 CaseFile::DeSerialize(ifstream &caseStream)
830 {
831         string        evString;
832         const EventFactory &factory(ZfsDaemon::Get().GetFactory());
833
834         caseStream >> std::noskipws >> std::ws;
835         while (caseStream.good()) {
836                 /*
837                  * Outline:
838                  * read the beginning of a line and check it for
839                  * "tentative".  If found, discard "tentative".
840                  * Create a new event
841                  * continue
842                  */
843                 EventList* destEvents;
844                 const string tentFlag("tentative ");
845                 string line;
846                 std::stringbuf lineBuf;
847
848                 caseStream.get(lineBuf);
849                 caseStream.ignore();  /*discard the newline character*/
850                 line = lineBuf.str();
851                 if (line.compare(0, tentFlag.size(), tentFlag) == 0) {
852                         /* Discard "tentative" */
853                         line.erase(0, tentFlag.size());
854                         destEvents = &m_tentativeEvents;
855                 } else {
856                         destEvents = &m_events;
857                 }
858                 Event *event(Event::CreateEvent(factory, line));
859                 if (event != NULL) {
860                         destEvents->push_back(event);
861                         RegisterCallout(*event);
862                 }
863         }
864 }
865
866 void
867 CaseFile::Close()
868 {
869         /*
870          * This case is no longer relevant.  Clean up our
871          * serialization file, and delete the case.
872          */
873         syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n",
874                PoolGUIDString().c_str(), VdevGUIDString().c_str(),
875                zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
876
877         /*
878          * Serialization of a Case with no event data, clears the
879          * Serialization data for that event.
880          */
881         PurgeEvents();
882         Serialize();
883
884         delete this;
885 }
886
887 void
888 CaseFile::OnGracePeriodEnded()
889 {
890         bool should_fault, should_degrade;
891         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
892         zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
893
894         m_events.splice(m_events.begin(), m_tentativeEvents);
895         should_fault = ShouldFault();
896         should_degrade = ShouldDegrade();
897
898         if (should_fault || should_degrade) {
899                 if (zhp == NULL
900                  || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) {
901                         /*
902                          * Either the pool no longer exists
903                          * or this vdev is no longer a member of
904                          * the pool.
905                          */
906                         Close();
907                         return;
908                 }
909
910         }
911
912         /* A fault condition has priority over a degrade condition */
913         if (ShouldFault()) {
914                 /* Fault the vdev and close the case. */
915                 if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID,
916                                        VDEV_AUX_ERR_EXCEEDED) == 0) {
917                         syslog(LOG_INFO, "Faulting vdev(%s/%s)",
918                                PoolGUIDString().c_str(),
919                                VdevGUIDString().c_str());
920                         Close();
921                         return;
922                 }
923                 else {
924                         syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n",
925                                PoolGUIDString().c_str(),
926                                VdevGUIDString().c_str(),
927                                libzfs_error_action(g_zfsHandle),
928                                libzfs_error_description(g_zfsHandle));
929                 }
930         }
931         else if (ShouldDegrade()) {
932                 /* Degrade the vdev and close the case. */
933                 if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID,
934                                        VDEV_AUX_ERR_EXCEEDED) == 0) {
935                         syslog(LOG_INFO, "Degrading vdev(%s/%s)",
936                                PoolGUIDString().c_str(),
937                                VdevGUIDString().c_str());
938                         Close();
939                         return;
940                 }
941                 else {
942                         syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n",
943                                PoolGUIDString().c_str(),
944                                VdevGUIDString().c_str(),
945                                libzfs_error_action(g_zfsHandle),
946                                libzfs_error_description(g_zfsHandle));
947                 }
948         }
949         Serialize();
950 }
951
952 Vdev
953 CaseFile::BeingReplacedBy(zpool_handle_t *zhp) {
954         Vdev vd(zhp, CaseVdev(zhp));
955         std::list<Vdev> children;
956         std::list<Vdev>::iterator children_it;
957
958         Vdev parent(vd.Parent());
959         Vdev replacing(NonexistentVdev);
960
961         /*
962          * To determine whether we are being replaced by another spare that
963          * is still working, then make sure that it is currently spared and
964          * that the spare is either resilvering or healthy.  If any of these
965          * conditions fail, then we are not being replaced by a spare.
966          *
967          * If the spare is healthy, then the case file should be closed very
968          * soon after this check.
969          */
970         if (parent.DoesNotExist()
971          || parent.Name(zhp, /*verbose*/false) != "spare")
972                 return (NonexistentVdev);
973
974         children = parent.Children();
975         children_it = children.begin();
976         for (;children_it != children.end(); children_it++) {
977                 Vdev child = *children_it;
978
979                 /* Skip our vdev. */
980                 if (child.GUID() == VdevGUID())
981                         continue;
982                 /*
983                  * Accept the first child that doesn't match our GUID, or
984                  * any resilvering/healthy device if one exists.
985                  */
986                 if (replacing.DoesNotExist() || child.IsResilvering()
987                  || child.State() == VDEV_STATE_HEALTHY)
988                         replacing = child;
989         }
990
991         return (replacing);
992 }
993
994 bool
995 CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
996         nvlist_t *nvroot, *newvd;
997         const char *poolname;
998         string oldstr(VdevGUIDString());
999         bool retval = true;
1000
1001         /* Figure out what pool we're working on */
1002         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
1003         zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1004         if (zhp == NULL) {
1005                 syslog(LOG_ERR, "CaseFile::Replace: could not find pool for "
1006                        "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID);
1007                 return (false);
1008         }
1009         poolname = zpool_get_name(zhp);
1010         Vdev vd(zhp, CaseVdev(zhp));
1011         Vdev replaced(BeingReplacedBy(zhp));
1012
1013         if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) {
1014                 /* If we are already being replaced by a working spare, pass. */
1015                 if (replaced.IsResilvering()
1016                  || replaced.State() == VDEV_STATE_HEALTHY) {
1017                         syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already "
1018                             "replaced", VdevGUIDString().c_str(), path);
1019                         return (/*consumed*/false);
1020                 }
1021                 /*
1022                  * If we have already been replaced by a spare, but that spare
1023                  * is broken, we must spare the spare, not the original device.
1024                  */
1025                 oldstr = replaced.GUIDString();
1026                 syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing "
1027                     "broken spare %s instead", VdevGUIDString().c_str(),
1028                     path, oldstr.c_str());
1029         }
1030
1031         /*
1032          * Build a root vdev/leaf vdev configuration suitable for
1033          * zpool_vdev_attach. Only enough data for the kernel to find
1034          * the device (i.e. type and disk device node path) are needed.
1035          */
1036         nvroot = NULL;
1037         newvd = NULL;
1038
1039         if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
1040          || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1041                 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate "
1042                     "configuration data.", poolname, oldstr.c_str());
1043                 if (nvroot != NULL)
1044                         nvlist_free(nvroot);
1045                 return (false);
1046         }
1047         if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0
1048          || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0
1049          || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
1050          || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1051                                     &newvd, 1) != 0) {
1052                 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize "
1053                     "configuration data.", poolname, oldstr.c_str());
1054                 nvlist_free(newvd);
1055                 nvlist_free(nvroot);
1056                 return (true);
1057         }
1058
1059         /* Data was copied when added to the root vdev. */
1060         nvlist_free(newvd);
1061
1062         retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot,
1063             /*replace*/B_TRUE) == 0);
1064         if (retval)
1065                 syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
1066                     poolname, oldstr.c_str(), path);
1067         else
1068                 syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n",
1069                     poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle),
1070                     libzfs_error_description(g_zfsHandle));
1071         nvlist_free(nvroot);
1072
1073         return (retval);
1074 }
1075
1076 /* Does the argument event refer to a checksum error? */
1077 static bool
1078 IsChecksumEvent(const Event* const event)
1079 {
1080         return ("ereport.fs.zfs.checksum" == event->Value("type"));
1081 }
1082
1083 /* Does the argument event refer to an IO error? */
1084 static bool
1085 IsIOEvent(const Event* const event)
1086 {
1087         return ("ereport.fs.zfs.io" == event->Value("type"));
1088 }
1089
1090 bool
1091 CaseFile::ShouldDegrade() const
1092 {
1093         return (std::count_if(m_events.begin(), m_events.end(),
1094                               IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
1095 }
1096
1097 bool
1098 CaseFile::ShouldFault() const
1099 {
1100         return (std::count_if(m_events.begin(), m_events.end(),
1101                               IsIOEvent) > ZFS_DEGRADE_IO_COUNT);
1102 }
1103
1104 nvlist_t *
1105 CaseFile::CaseVdev(zpool_handle_t *zhp) const
1106 {
1107         return (VdevIterator(zhp).Find(VdevGUID()));
1108 }