]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - cddl/usr.sbin/zfsd/case_file.cc
MFC r329273, r329275, r329277, r329284, r329344
[FreeBSD/FreeBSD.git] / cddl / usr.sbin / zfsd / case_file.cc
1 /*-
2  * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions, and the following disclaimer,
10  *    without modification.
11  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12  *    substantially similar to the "NO WARRANTY" disclaimer below
13  *    ("Disclaimer") and any redistribution must be conditioned upon
14  *    including a substantially similar Disclaimer requirement for further
15  *    binary redistribution.
16  *
17  * NO WARRANTY
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGES.
29  *
30  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31  */
32
33 /**
34  * \file case_file.cc
35  *
36  * We keep case files for any leaf vdev that is not in the optimal state.
37  * However, we only serialize to disk those events that need to be preserved
38  * across reboots.  For now, this is just a log of soft errors which we
39  * accumulate in order to mark a device as degraded.
40  */
41 #include <sys/cdefs.h>
42 #include <sys/time.h>
43
44 #include <sys/fs/zfs.h>
45
46 #include <dirent.h>
47 #include <iomanip>
48 #include <fstream>
49 #include <functional>
50 #include <sstream>
51 #include <syslog.h>
52 #include <unistd.h>
53
54 #include <libzfs.h>
55
56 #include <list>
57 #include <map>
58 #include <string>
59
60 #include <devdctl/guid.h>
61 #include <devdctl/event.h>
62 #include <devdctl/event_factory.h>
63 #include <devdctl/exception.h>
64 #include <devdctl/consumer.h>
65
66 #include "callout.h"
67 #include "vdev_iterator.h"
68 #include "zfsd_event.h"
69 #include "case_file.h"
70 #include "vdev.h"
71 #include "zfsd.h"
72 #include "zfsd_exception.h"
73 #include "zpool_list.h"
74
75 __FBSDID("$FreeBSD$");
76
77 /*============================ Namespace Control =============================*/
78 using std::auto_ptr;
79 using std::hex;
80 using std::ifstream;
81 using std::stringstream;
82 using std::setfill;
83 using std::setw;
84
85 using DevdCtl::Event;
86 using DevdCtl::EventFactory;
87 using DevdCtl::EventList;
88 using DevdCtl::Guid;
89 using DevdCtl::ParseException;
90
91 /*--------------------------------- CaseFile ---------------------------------*/
92 //- CaseFile Static Data -------------------------------------------------------
93
94 CaseFileList  CaseFile::s_activeCases;
95 const string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
96 const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
97
98 //- CaseFile Static Public Methods ---------------------------------------------
99 CaseFile *
100 CaseFile::Find(Guid poolGUID, Guid vdevGUID)
101 {
102         for (CaseFileList::iterator curCase = s_activeCases.begin();
103              curCase != s_activeCases.end(); curCase++) {
104
105                 if (((*curCase)->PoolGUID() != poolGUID
106                   && Guid::InvalidGuid() != poolGUID)
107                  || (*curCase)->VdevGUID() != vdevGUID)
108                         continue;
109
110                 /*
111                  * We only carry one active case per-vdev.
112                  */
113                 return (*curCase);
114         }
115         return (NULL);
116 }
117
118 CaseFile *
119 CaseFile::Find(const string &physPath)
120 {
121         CaseFile *result = NULL;
122
123         for (CaseFileList::iterator curCase = s_activeCases.begin();
124              curCase != s_activeCases.end(); curCase++) {
125
126                 if ((*curCase)->PhysicalPath() != physPath)
127                         continue;
128
129                 if (result != NULL) {
130                         syslog(LOG_WARNING, "Multiple casefiles found for "
131                             "physical path %s.  "
132                             "This is most likely a bug in zfsd",
133                             physPath.c_str());
134                 }
135                 result = *curCase;
136         }
137         return (result);
138 }
139
140
141 void
142 CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
143 {
144         CaseFileList::iterator casefile;
145         for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
146                 CaseFileList::iterator next = casefile;
147                 next++;
148                 if (poolGUID == (*casefile)->PoolGUID())
149                         (*casefile)->ReEvaluate(event);
150                 casefile = next;
151         }
152 }
153
154 CaseFile &
155 CaseFile::Create(Vdev &vdev)
156 {
157         CaseFile *activeCase;
158
159         activeCase = Find(vdev.PoolGUID(), vdev.GUID());
160         if (activeCase == NULL)
161                 activeCase = new CaseFile(vdev);
162
163         return (*activeCase);
164 }
165
166 void
167 CaseFile::DeSerialize()
168 {
169         struct dirent **caseFiles;
170
171         int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
172                          DeSerializeSelector, /*compar*/NULL));
173
174         if (numCaseFiles == -1)
175                 return;
176         if (numCaseFiles == 0) {
177                 free(caseFiles);
178                 return;
179         }
180
181         for (int i = 0; i < numCaseFiles; i++) {
182
183                 DeSerializeFile(caseFiles[i]->d_name);
184                 free(caseFiles[i]);
185         }
186         free(caseFiles);
187 }
188
189 bool
190 CaseFile::Empty()
191 {
192         return (s_activeCases.empty());
193 }
194
195 void
196 CaseFile::LogAll()
197 {
198         for (CaseFileList::iterator curCase = s_activeCases.begin();
199              curCase != s_activeCases.end(); curCase++)
200                 (*curCase)->Log();
201 }
202
203 void
204 CaseFile::PurgeAll()
205 {
206         /*
207          * Serialize casefiles before deleting them so that they can be reread
208          * and revalidated during BuildCaseFiles.
209          * CaseFiles remove themselves from this list on destruction.
210          */
211         while (s_activeCases.size() != 0) {
212                 CaseFile *casefile = s_activeCases.front();
213                 casefile->Serialize();
214                 delete casefile;
215         }
216
217 }
218
219 //- CaseFile Public Methods ----------------------------------------------------
220 bool
221 CaseFile::RefreshVdevState()
222 {
223         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
224         zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
225         if (casePool == NULL)
226                 return (false);
227
228         Vdev vd(casePool, CaseVdev(casePool));
229         if (vd.DoesNotExist())
230                 return (false);
231
232         m_vdevState    = vd.State();
233         m_vdevPhysPath = vd.PhysicalPath();
234         return (true);
235 }
236
237 bool
238 CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
239 {
240         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
241         zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
242
243         if (pool == NULL || !RefreshVdevState()) {
244                 /*
245                  * The pool or vdev for this case file is no longer
246                  * part of the configuration.  This can happen
247                  * if we process a device arrival notification
248                  * before seeing the ZFS configuration change
249                  * event.
250                  */
251                 syslog(LOG_INFO,
252                        "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
253                        "Closing\n",
254                        PoolGUIDString().c_str(),
255                        VdevGUIDString().c_str());
256                 Close();
257
258                 /*
259                  * Since this event was not used to close this
260                  * case, do not report it as consumed.
261                  */
262                 return (/*consumed*/false);
263         }
264
265         if (VdevState() > VDEV_STATE_CANT_OPEN) {
266                 /*
267                  * For now, newly discovered devices only help for
268                  * devices that are missing.  In the future, we might
269                  * use a newly inserted spare to replace a degraded
270                  * or faulted device.
271                  */
272                 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
273                     PoolGUIDString().c_str(), VdevGUIDString().c_str());
274                 return (/*consumed*/false);
275         }
276
277         if (vdev != NULL
278          && ( vdev->PoolGUID() == m_poolGUID
279            || vdev->PoolGUID() == Guid::InvalidGuid())
280          && vdev->GUID() == m_vdevGUID) {
281
282                 zpool_vdev_online(pool, vdev->GUIDString().c_str(),
283                                   ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
284                                   &m_vdevState);
285                 syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s.\n",
286                        zpool_get_name(pool), vdev->GUIDString().c_str(),
287                        devPath.c_str(),
288                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
289
290                 /*
291                  * Check the vdev state post the online action to see
292                  * if we can retire this case.
293                  */
294                 CloseIfSolved();
295
296                 return (/*consumed*/true);
297         }
298
299         /*
300          * If the auto-replace policy is enabled, and we have physical
301          * path information, try a physical path replacement.
302          */
303         if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
304                 syslog(LOG_INFO,
305                        "CaseFile(%s:%s:%s): AutoReplace not set.  "
306                        "Ignoring device insertion.\n",
307                        PoolGUIDString().c_str(),
308                        VdevGUIDString().c_str(),
309                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
310                 return (/*consumed*/false);
311         }
312
313         if (PhysicalPath().empty()) {
314                 syslog(LOG_INFO,
315                        "CaseFile(%s:%s:%s): No physical path information.  "
316                        "Ignoring device insertion.\n",
317                        PoolGUIDString().c_str(),
318                        VdevGUIDString().c_str(),
319                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
320                 return (/*consumed*/false);
321         }
322
323         if (physPath != PhysicalPath()) {
324                 syslog(LOG_INFO,
325                        "CaseFile(%s:%s:%s): Physical path mismatch.  "
326                        "Ignoring device insertion.\n",
327                        PoolGUIDString().c_str(),
328                        VdevGUIDString().c_str(),
329                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
330                 return (/*consumed*/false);
331         }
332
333         /* Write a label on the newly inserted disk. */
334         if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
335                 syslog(LOG_ERR,
336                        "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
337                        zpool_get_name(pool), VdevGUIDString().c_str(),
338                        libzfs_error_action(g_zfsHandle),
339                        libzfs_error_description(g_zfsHandle));
340                 return (/*consumed*/false);
341         }
342
343         syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
344             PoolGUIDString().c_str(), VdevGUIDString().c_str(),
345             devPath.c_str());
346         return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
347 }
348
349 bool
350 CaseFile::ReEvaluate(const ZfsEvent &event)
351 {
352         bool consumed(false);
353
354         if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
355                 /*
356                  * The Vdev we represent has been removed from the
357                  * configuration.  This case is no longer of value.
358                  */
359                 Close();
360
361                 return (/*consumed*/true);
362         } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") {
363                 /* This Pool has been destroyed.  Discard the case */
364                 Close();
365
366                 return (/*consumed*/true);
367         } else if (event.Value("type") == "misc.fs.zfs.config_sync") {
368                 RefreshVdevState();
369                 if (VdevState() < VDEV_STATE_HEALTHY)
370                         consumed = ActivateSpare();
371         }
372
373
374         if (event.Value("class") == "resource.fs.zfs.removed") {
375                 bool spare_activated;
376
377                 if (!RefreshVdevState()) {
378                         /*
379                          * The pool or vdev for this case file is no longer
380                          * part of the configuration.  This can happen
381                          * if we process a device arrival notification
382                          * before seeing the ZFS configuration change
383                          * event.
384                          */
385                         syslog(LOG_INFO,
386                                "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
387                                "unconfigured.  Closing\n",
388                                PoolGUIDString().c_str(),
389                                VdevGUIDString().c_str());
390                         /*
391                          * Close the case now so we won't waste cycles in the
392                          * system rescan
393                          */
394                         Close();
395
396                         /*
397                          * Since this event was not used to close this
398                          * case, do not report it as consumed.
399                          */
400                         return (/*consumed*/false);
401                 }
402
403                 /*
404                  * Discard any tentative I/O error events for
405                  * this case.  They were most likely caused by the
406                  * hot-unplug of this device.
407                  */
408                 PurgeTentativeEvents();
409
410                 /* Try to activate spares if they are available */
411                 spare_activated = ActivateSpare();
412
413                 /*
414                  * Rescan the drives in the system to see if a recent
415                  * drive arrival can be used to solve this case.
416                  */
417                 ZfsDaemon::RequestSystemRescan();
418
419                 /*
420                  * Consume the event if we successfully activated a spare.
421                  * Otherwise, leave it in the unconsumed events list so that the
422                  * future addition of a spare to this pool might be able to
423                  * close the case
424                  */
425                 consumed = spare_activated;
426         } else if (event.Value("class") == "resource.fs.zfs.statechange") {
427                 RefreshVdevState();
428                 /*
429                  * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
430                  * activate a hotspare.  Otherwise, ignore the event
431                  */
432                 if (VdevState() == VDEV_STATE_FAULTED ||
433                     VdevState() == VDEV_STATE_DEGRADED ||
434                     VdevState() == VDEV_STATE_CANT_OPEN)
435                         (void) ActivateSpare();
436                 consumed = true;
437         }
438         else if (event.Value("class") == "ereport.fs.zfs.io" ||
439                  event.Value("class") == "ereport.fs.zfs.checksum") {
440
441                 m_tentativeEvents.push_front(event.DeepCopy());
442                 RegisterCallout(event);
443                 consumed = true;
444         }
445
446         bool closed(CloseIfSolved());
447
448         return (consumed || closed);
449 }
450
451 /* Find a Vdev containing the vdev with the given GUID */
452 static nvlist_t*
453 find_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid)
454 {
455         nvlist_t **vdevChildren;
456         int        error;
457         unsigned   ch, numChildren;
458
459         error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
460                                            &vdevChildren, &numChildren);
461
462         if (error != 0 || numChildren == 0)
463                 return (NULL);
464
465         for (ch = 0; ch < numChildren; ch++) {
466                 nvlist *result;
467                 Vdev vdev(pool_config, vdevChildren[ch]);
468
469                 if (vdev.GUID() == child_guid)
470                         return (config);
471
472                 result = find_parent(pool_config, vdevChildren[ch], child_guid);
473                 if (result != NULL)
474                         return (result);
475         }
476
477         return (NULL);
478 }
479
480 bool
481 CaseFile::ActivateSpare() {
482         nvlist_t        *config, *nvroot, *parent_config;
483         nvlist_t       **spares;
484         char            *devPath, *vdev_type;
485         const char      *poolname;
486         u_int            nspares, i;
487         int              error;
488
489         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
490         zpool_handle_t  *zhp(zpl.empty() ? NULL : zpl.front());
491         if (zhp == NULL) {
492                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
493                        "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID);
494                 return (false);
495         }
496         poolname = zpool_get_name(zhp);
497         config = zpool_get_config(zhp, NULL);
498         if (config == NULL) {
499                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
500                        "config for pool %s", poolname);
501                 return (false);
502         }
503         error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
504         if (error != 0){
505                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
506                        "tree for pool %s", poolname);
507                 return (false);
508         }
509
510         parent_config = find_parent(config, nvroot, m_vdevGUID);
511         if (parent_config != NULL) {
512                 char *parent_type;
513
514                 /* 
515                  * Don't activate spares for members of a "replacing" vdev.
516                  * They're already dealt with.  Sparing them will just drag out
517                  * the resilver process.
518                  */
519                 error = nvlist_lookup_string(parent_config,
520                     ZPOOL_CONFIG_TYPE, &parent_type);
521                 if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0)
522                         return (false);
523         }
524
525         nspares = 0;
526         nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
527                                    &nspares);
528         if (nspares == 0) {
529                 /* The pool has no spares configured */
530                 syslog(LOG_INFO, "CaseFile::ActivateSpare: "
531                        "No spares available for pool %s", poolname);
532                 return (false);
533         }
534         for (i = 0; i < nspares; i++) {
535                 uint64_t    *nvlist_array;
536                 vdev_stat_t *vs;
537                 uint_t       nstats;
538
539                 if (nvlist_lookup_uint64_array(spares[i],
540                     ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
541                         syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
542                                "find vdev stats for pool %s, spare %d",
543                                poolname, i);
544                         return (false);
545                 }
546                 vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
547
548                 if ((vs->vs_aux != VDEV_AUX_SPARED)
549                  && (vs->vs_state == VDEV_STATE_HEALTHY)) {
550                         /* We found a usable spare */
551                         break;
552                 }
553         }
554
555         if (i == nspares) {
556                 /* No available spares were found */
557                 return (false);
558         }
559
560         error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath);
561         if (error != 0) {
562                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
563                        "the path of pool %s, spare %d. Error %d",
564                        poolname, i, error);
565                 return (false);
566         }
567
568         error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type);
569         if (error != 0) {
570                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
571                        "the vdev type of pool %s, spare %d. Error %d",
572                        poolname, i, error);
573                 return (false);
574         }
575
576         return (Replace(vdev_type, devPath, /*isspare*/true));
577 }
578
579 void
580 CaseFile::RegisterCallout(const Event &event)
581 {
582         timeval now, countdown, elapsed, timestamp, zero, remaining;
583
584         gettimeofday(&now, 0);
585         timestamp = event.GetTimestamp();
586         timersub(&now, &timestamp, &elapsed);
587         timersub(&s_removeGracePeriod, &elapsed, &countdown);
588         /*
589          * If countdown is <= zero, Reset the timer to the
590          * smallest positive time value instead
591          */
592         timerclear(&zero);
593         if (timercmp(&countdown, &zero, <=)) {
594                 timerclear(&countdown);
595                 countdown.tv_usec = 1;
596         }
597
598         remaining = m_tentativeTimer.TimeRemaining();
599
600         if (!m_tentativeTimer.IsPending()
601          || timercmp(&countdown, &remaining, <))
602                 m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this);
603 }
604
605
606 bool
607 CaseFile::CloseIfSolved()
608 {
609         if (m_events.empty()
610          && m_tentativeEvents.empty()) {
611
612                 /*
613                  * We currently do not track or take actions on
614                  * devices in the degraded or faulted state.
615                  * Once we have support for spare pools, we'll
616                  * retain these cases so that any spares added in
617                  * the future can be applied to them.
618                  */
619                 switch (VdevState()) {
620                 case VDEV_STATE_HEALTHY:
621                         /* No need to keep cases for healthy vdevs */
622                         Close();
623                         return (true);
624                 case VDEV_STATE_REMOVED:
625                 case VDEV_STATE_CANT_OPEN:
626                         /*
627                          * Keep open.  We may solve it with a newly inserted
628                          * device.
629                          */
630                 case VDEV_STATE_FAULTED:
631                 case VDEV_STATE_DEGRADED:
632                         /*
633                          * Keep open.  We may solve it with the future
634                          * addition of a spare to the pool
635                          */
636                 case VDEV_STATE_UNKNOWN:
637                 case VDEV_STATE_CLOSED:
638                 case VDEV_STATE_OFFLINE:
639                         /*
640                          * Keep open?  This may not be the correct behavior,
641                          * but it's what we've always done
642                          */
643                         ;
644                 }
645
646                 /*
647                  * Re-serialize the case in order to remove any
648                  * previous event data.
649                  */
650                 Serialize();
651         }
652
653         return (false);
654 }
655
656 void
657 CaseFile::Log()
658 {
659         syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
660                VdevGUIDString().c_str(), PhysicalPath().c_str());
661         syslog(LOG_INFO, "\tVdev State = %s\n",
662                zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
663         if (m_tentativeEvents.size() != 0) {
664                 syslog(LOG_INFO, "\t=== Tentative Events ===\n");
665                 for (EventList::iterator event(m_tentativeEvents.begin());
666                      event != m_tentativeEvents.end(); event++)
667                         (*event)->Log(LOG_INFO);
668         }
669         if (m_events.size() != 0) {
670                 syslog(LOG_INFO, "\t=== Events ===\n");
671                 for (EventList::iterator event(m_events.begin());
672                      event != m_events.end(); event++)
673                         (*event)->Log(LOG_INFO);
674         }
675 }
676
677 //- CaseFile Static Protected Methods ------------------------------------------
678 void
679 CaseFile::OnGracePeriodEnded(void *arg)
680 {
681         CaseFile &casefile(*static_cast<CaseFile *>(arg));
682
683         casefile.OnGracePeriodEnded();
684 }
685
686 int
687 CaseFile::DeSerializeSelector(const struct dirent *dirEntry)
688 {
689         uint64_t poolGUID;
690         uint64_t vdevGUID;
691
692         if (dirEntry->d_type == DT_REG
693          && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
694                    &poolGUID, &vdevGUID) == 2)
695                 return (1);
696         return (0);
697 }
698
699 void
700 CaseFile::DeSerializeFile(const char *fileName)
701 {
702         string    fullName(s_caseFilePath + '/' + fileName);
703         CaseFile *existingCaseFile(NULL);
704         CaseFile *caseFile(NULL);
705
706         try {
707                 uint64_t poolGUID;
708                 uint64_t vdevGUID;
709                 nvlist_t *vdevConf;
710
711                 if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
712                        &poolGUID, &vdevGUID) != 2) {
713                         throw ZfsdException("CaseFile::DeSerialize: "
714                             "Unintelligible CaseFile filename %s.\n", fileName);
715                 }
716                 existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID));
717                 if (existingCaseFile != NULL) {
718                         /*
719                          * If the vdev is already degraded or faulted,
720                          * there's no point in keeping the state around
721                          * that we use to put a drive into the degraded
722                          * state.  However, if the vdev is simply missing,
723                          * preserve the case data in the hopes that it will
724                          * return.
725                          */
726                         caseFile = existingCaseFile;
727                         vdev_state curState(caseFile->VdevState());
728                         if (curState > VDEV_STATE_CANT_OPEN
729                          && curState < VDEV_STATE_HEALTHY) {
730                                 unlink(fileName);
731                                 return;
732                         }
733                 } else {
734                         ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
735                         if (zpl.empty()
736                          || (vdevConf = VdevIterator(zpl.front())
737                                                     .Find(vdevGUID)) == NULL) {
738                                 /*
739                                  * Either the pool no longer exists
740                                  * or this vdev is no longer a member of
741                                  * the pool.
742                                  */
743                                 unlink(fullName.c_str());
744                                 return;
745                         }
746
747                         /*
748                          * Any vdev we find that does not have a case file
749                          * must be in the healthy state and thus worthy of
750                          * continued SERD data tracking.
751                          */
752                         caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
753                 }
754
755                 ifstream caseStream(fullName.c_str());
756                 if (!caseStream)
757                         throw ZfsdException("CaseFile::DeSerialize: Unable to "
758                                             "read %s.\n", fileName);
759
760                 caseFile->DeSerialize(caseStream);
761         } catch (const ParseException &exp) {
762
763                 exp.Log();
764                 if (caseFile != existingCaseFile)
765                         delete caseFile;
766
767                 /*
768                  * Since we can't parse the file, unlink it so we don't
769                  * trip over it again.
770                  */
771                 unlink(fileName);
772         } catch (const ZfsdException &zfsException) {
773
774                 zfsException.Log();
775                 if (caseFile != existingCaseFile)
776                         delete caseFile;
777         }
778 }
779
780 //- CaseFile Protected Methods -------------------------------------------------
781 CaseFile::CaseFile(const Vdev &vdev)
782  : m_poolGUID(vdev.PoolGUID()),
783    m_vdevGUID(vdev.GUID()),
784    m_vdevState(vdev.State()),
785    m_vdevPhysPath(vdev.PhysicalPath())
786 {
787         stringstream guidString;
788
789         guidString << m_vdevGUID;
790         m_vdevGUIDString = guidString.str();
791         guidString.str("");
792         guidString << m_poolGUID;
793         m_poolGUIDString = guidString.str();
794
795         s_activeCases.push_back(this);
796
797         syslog(LOG_INFO, "Creating new CaseFile:\n");
798         Log();
799 }
800
801 CaseFile::~CaseFile()
802 {
803         PurgeEvents();
804         PurgeTentativeEvents();
805         m_tentativeTimer.Stop();
806         s_activeCases.remove(this);
807 }
808
809 void
810 CaseFile::PurgeEvents()
811 {
812         for (EventList::iterator event(m_events.begin());
813              event != m_events.end(); event++)
814                 delete *event;
815
816         m_events.clear();
817 }
818
819 void
820 CaseFile::PurgeTentativeEvents()
821 {
822         for (EventList::iterator event(m_tentativeEvents.begin());
823              event != m_tentativeEvents.end(); event++)
824                 delete *event;
825
826         m_tentativeEvents.clear();
827 }
828
829 void
830 CaseFile::SerializeEvList(const EventList events, int fd,
831                 const char* prefix) const
832 {
833         if (events.empty())
834                 return;
835         for (EventList::const_iterator curEvent = events.begin();
836              curEvent != events.end(); curEvent++) {
837                 const string &eventString((*curEvent)->GetEventString());
838
839                 // TODO: replace many write(2) calls with a single writev(2)
840                 if (prefix)
841                         write(fd, prefix, strlen(prefix));
842                 write(fd, eventString.c_str(), eventString.length());
843         }
844 }
845
846 void
847 CaseFile::Serialize()
848 {
849         stringstream saveFile;
850
851         saveFile << setfill('0')
852                  << s_caseFilePath << "/"
853                  << "pool_" << PoolGUIDString()
854                  << "_vdev_" << VdevGUIDString()
855                  << ".case";
856
857         if (m_events.empty() && m_tentativeEvents.empty()) {
858                 unlink(saveFile.str().c_str());
859                 return;
860         }
861
862         int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644));
863         if (fd == -1) {
864                 syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n",
865                        saveFile.str().c_str());
866                 return;
867         }
868         SerializeEvList(m_events, fd);
869         SerializeEvList(m_tentativeEvents, fd, "tentative ");
870         close(fd);
871 }
872
873 /*
874  * XXX: This method assumes that events may not contain embedded newlines.  If
875  * ever events can contain embedded newlines, then CaseFile must switch
876  * serialization formats
877  */
878 void
879 CaseFile::DeSerialize(ifstream &caseStream)
880 {
881         string        evString;
882         const EventFactory &factory(ZfsDaemon::Get().GetFactory());
883
884         caseStream >> std::noskipws >> std::ws;
885         while (caseStream.good()) {
886                 /*
887                  * Outline:
888                  * read the beginning of a line and check it for
889                  * "tentative".  If found, discard "tentative".
890                  * Create a new event
891                  * continue
892                  */
893                 EventList* destEvents;
894                 const string tentFlag("tentative ");
895                 string line;
896                 std::stringbuf lineBuf;
897
898                 caseStream.get(lineBuf);
899                 caseStream.ignore();  /*discard the newline character*/
900                 line = lineBuf.str();
901                 if (line.compare(0, tentFlag.size(), tentFlag) == 0) {
902                         /* Discard "tentative" */
903                         line.erase(0, tentFlag.size());
904                         destEvents = &m_tentativeEvents;
905                 } else {
906                         destEvents = &m_events;
907                 }
908                 Event *event(Event::CreateEvent(factory, line));
909                 if (event != NULL) {
910                         destEvents->push_back(event);
911                         RegisterCallout(*event);
912                 }
913         }
914 }
915
916 void
917 CaseFile::Close()
918 {
919         /*
920          * This case is no longer relevant.  Clean up our
921          * serialization file, and delete the case.
922          */
923         syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n",
924                PoolGUIDString().c_str(), VdevGUIDString().c_str(),
925                zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
926
927         /*
928          * Serialization of a Case with no event data, clears the
929          * Serialization data for that event.
930          */
931         PurgeEvents();
932         Serialize();
933
934         delete this;
935 }
936
937 void
938 CaseFile::OnGracePeriodEnded()
939 {
940         bool should_fault, should_degrade;
941         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
942         zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
943
944         m_events.splice(m_events.begin(), m_tentativeEvents);
945         should_fault = ShouldFault();
946         should_degrade = ShouldDegrade();
947
948         if (should_fault || should_degrade) {
949                 if (zhp == NULL
950                  || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) {
951                         /*
952                          * Either the pool no longer exists
953                          * or this vdev is no longer a member of
954                          * the pool.
955                          */
956                         Close();
957                         return;
958                 }
959
960         }
961
962         /* A fault condition has priority over a degrade condition */
963         if (ShouldFault()) {
964                 /* Fault the vdev and close the case. */
965                 if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID,
966                                        VDEV_AUX_ERR_EXCEEDED) == 0) {
967                         syslog(LOG_INFO, "Faulting vdev(%s/%s)",
968                                PoolGUIDString().c_str(),
969                                VdevGUIDString().c_str());
970                         Close();
971                         return;
972                 }
973                 else {
974                         syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n",
975                                PoolGUIDString().c_str(),
976                                VdevGUIDString().c_str(),
977                                libzfs_error_action(g_zfsHandle),
978                                libzfs_error_description(g_zfsHandle));
979                 }
980         }
981         else if (ShouldDegrade()) {
982                 /* Degrade the vdev and close the case. */
983                 if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID,
984                                        VDEV_AUX_ERR_EXCEEDED) == 0) {
985                         syslog(LOG_INFO, "Degrading vdev(%s/%s)",
986                                PoolGUIDString().c_str(),
987                                VdevGUIDString().c_str());
988                         Close();
989                         return;
990                 }
991                 else {
992                         syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n",
993                                PoolGUIDString().c_str(),
994                                VdevGUIDString().c_str(),
995                                libzfs_error_action(g_zfsHandle),
996                                libzfs_error_description(g_zfsHandle));
997                 }
998         }
999         Serialize();
1000 }
1001
1002 Vdev
1003 CaseFile::BeingReplacedBy(zpool_handle_t *zhp) {
1004         Vdev vd(zhp, CaseVdev(zhp));
1005         std::list<Vdev> children;
1006         std::list<Vdev>::iterator children_it;
1007
1008         Vdev parent(vd.Parent());
1009         Vdev replacing(NonexistentVdev);
1010
1011         /*
1012          * To determine whether we are being replaced by another spare that
1013          * is still working, then make sure that it is currently spared and
1014          * that the spare is either resilvering or healthy.  If any of these
1015          * conditions fail, then we are not being replaced by a spare.
1016          *
1017          * If the spare is healthy, then the case file should be closed very
1018          * soon after this check.
1019          */
1020         if (parent.DoesNotExist()
1021          || parent.Name(zhp, /*verbose*/false) != "spare")
1022                 return (NonexistentVdev);
1023
1024         children = parent.Children();
1025         children_it = children.begin();
1026         for (;children_it != children.end(); children_it++) {
1027                 Vdev child = *children_it;
1028
1029                 /* Skip our vdev. */
1030                 if (child.GUID() == VdevGUID())
1031                         continue;
1032                 /*
1033                  * Accept the first child that doesn't match our GUID, or
1034                  * any resilvering/healthy device if one exists.
1035                  */
1036                 if (replacing.DoesNotExist() || child.IsResilvering()
1037                  || child.State() == VDEV_STATE_HEALTHY)
1038                         replacing = child;
1039         }
1040
1041         return (replacing);
1042 }
1043
1044 bool
1045 CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
1046         nvlist_t *nvroot, *newvd;
1047         const char *poolname;
1048         string oldstr(VdevGUIDString());
1049         bool retval = true;
1050
1051         /* Figure out what pool we're working on */
1052         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
1053         zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1054         if (zhp == NULL) {
1055                 syslog(LOG_ERR, "CaseFile::Replace: could not find pool for "
1056                        "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID);
1057                 return (false);
1058         }
1059         poolname = zpool_get_name(zhp);
1060         Vdev vd(zhp, CaseVdev(zhp));
1061         Vdev replaced(BeingReplacedBy(zhp));
1062
1063         if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) {
1064                 /* If we are already being replaced by a working spare, pass. */
1065                 if (replaced.IsResilvering()
1066                  || replaced.State() == VDEV_STATE_HEALTHY) {
1067                         syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already "
1068                             "replaced", VdevGUIDString().c_str(), path);
1069                         return (/*consumed*/false);
1070                 }
1071                 /*
1072                  * If we have already been replaced by a spare, but that spare
1073                  * is broken, we must spare the spare, not the original device.
1074                  */
1075                 oldstr = replaced.GUIDString();
1076                 syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing "
1077                     "broken spare %s instead", VdevGUIDString().c_str(),
1078                     path, oldstr.c_str());
1079         }
1080
1081         /*
1082          * Build a root vdev/leaf vdev configuration suitable for
1083          * zpool_vdev_attach. Only enough data for the kernel to find
1084          * the device (i.e. type and disk device node path) are needed.
1085          */
1086         nvroot = NULL;
1087         newvd = NULL;
1088
1089         if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
1090          || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1091                 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate "
1092                     "configuration data.", poolname, oldstr.c_str());
1093                 if (nvroot != NULL)
1094                         nvlist_free(nvroot);
1095                 return (false);
1096         }
1097         if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0
1098          || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0
1099          || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
1100          || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1101                                     &newvd, 1) != 0) {
1102                 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize "
1103                     "configuration data.", poolname, oldstr.c_str());
1104                 nvlist_free(newvd);
1105                 nvlist_free(nvroot);
1106                 return (true);
1107         }
1108
1109         /* Data was copied when added to the root vdev. */
1110         nvlist_free(newvd);
1111
1112         retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot,
1113             /*replace*/B_TRUE) == 0);
1114         if (retval)
1115                 syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
1116                     poolname, oldstr.c_str(), path);
1117         else
1118                 syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n",
1119                     poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle),
1120                     libzfs_error_description(g_zfsHandle));
1121         nvlist_free(nvroot);
1122
1123         return (retval);
1124 }
1125
1126 /* Does the argument event refer to a checksum error? */
1127 static bool
1128 IsChecksumEvent(const Event* const event)
1129 {
1130         return ("ereport.fs.zfs.checksum" == event->Value("type"));
1131 }
1132
1133 /* Does the argument event refer to an IO error? */
1134 static bool
1135 IsIOEvent(const Event* const event)
1136 {
1137         return ("ereport.fs.zfs.io" == event->Value("type"));
1138 }
1139
1140 bool
1141 CaseFile::ShouldDegrade() const
1142 {
1143         return (std::count_if(m_events.begin(), m_events.end(),
1144                               IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
1145 }
1146
1147 bool
1148 CaseFile::ShouldFault() const
1149 {
1150         return (std::count_if(m_events.begin(), m_events.end(),
1151                               IsIOEvent) > ZFS_DEGRADE_IO_COUNT);
1152 }
1153
1154 nvlist_t *
1155 CaseFile::CaseVdev(zpool_handle_t *zhp) const
1156 {
1157         return (VdevIterator(zhp).Find(VdevGUID()));
1158 }