]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - cddl/usr.sbin/zfsd/case_file.cc
Update OpenZFS to 2.0.0-rc3-gfc5966
[FreeBSD/FreeBSD.git] / cddl / usr.sbin / zfsd / case_file.cc
1 /*-
2  * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions, and the following disclaimer,
10  *    without modification.
11  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12  *    substantially similar to the "NO WARRANTY" disclaimer below
13  *    ("Disclaimer") and any redistribution must be conditioned upon
14  *    including a substantially similar Disclaimer requirement for further
15  *    binary redistribution.
16  *
17  * NO WARRANTY
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGES.
29  *
30  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31  */
32
33 /**
34  * \file case_file.cc
35  *
36  * We keep case files for any leaf vdev that is not in the optimal state.
37  * However, we only serialize to disk those events that need to be preserved
38  * across reboots.  For now, this is just a log of soft errors which we
39  * accumulate in order to mark a device as degraded.
40  */
41 #include <sys/cdefs.h>
42 #include <sys/byteorder.h>
43 #include <sys/time.h>
44
45 #include <sys/fs/zfs.h>
46
47 #include <dirent.h>
48 #include <fcntl.h>
49 #include <iomanip>
50 #include <fstream>
51 #include <functional>
52 #include <sstream>
53 #include <syslog.h>
54 #include <unistd.h>
55
56 #include <libzfs.h>
57
58 #include <list>
59 #include <map>
60 #include <string>
61
62 #include <devdctl/guid.h>
63 #include <devdctl/event.h>
64 #include <devdctl/event_factory.h>
65 #include <devdctl/exception.h>
66 #include <devdctl/consumer.h>
67
68 #include "callout.h"
69 #include "vdev_iterator.h"
70 #include "zfsd_event.h"
71 #include "case_file.h"
72 #include "vdev.h"
73 #include "zfsd.h"
74 #include "zfsd_exception.h"
75 #include "zpool_list.h"
76
77 __FBSDID("$FreeBSD$");
78
79 /*============================ Namespace Control =============================*/
80 using std::hex;
81 using std::ifstream;
82 using std::stringstream;
83 using std::setfill;
84 using std::setw;
85
86 using DevdCtl::Event;
87 using DevdCtl::EventFactory;
88 using DevdCtl::EventList;
89 using DevdCtl::Guid;
90 using DevdCtl::ParseException;
91
92 /*--------------------------------- CaseFile ---------------------------------*/
93 //- CaseFile Static Data -------------------------------------------------------
94
95 CaseFileList  CaseFile::s_activeCases;
96 const string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
97 const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
98
99 //- CaseFile Static Public Methods ---------------------------------------------
100 CaseFile *
101 CaseFile::Find(Guid poolGUID, Guid vdevGUID)
102 {
103         for (CaseFileList::iterator curCase = s_activeCases.begin();
104              curCase != s_activeCases.end(); curCase++) {
105
106                 if (((*curCase)->PoolGUID() != poolGUID
107                   && Guid::InvalidGuid() != poolGUID)
108                  || (*curCase)->VdevGUID() != vdevGUID)
109                         continue;
110
111                 /*
112                  * We only carry one active case per-vdev.
113                  */
114                 return (*curCase);
115         }
116         return (NULL);
117 }
118
119 CaseFile *
120 CaseFile::Find(const string &physPath)
121 {
122         CaseFile *result = NULL;
123
124         for (CaseFileList::iterator curCase = s_activeCases.begin();
125              curCase != s_activeCases.end(); curCase++) {
126
127                 if ((*curCase)->PhysicalPath() != physPath)
128                         continue;
129
130                 if (result != NULL) {
131                         syslog(LOG_WARNING, "Multiple casefiles found for "
132                             "physical path %s.  "
133                             "This is most likely a bug in zfsd",
134                             physPath.c_str());
135                 }
136                 result = *curCase;
137         }
138         return (result);
139 }
140
141
142 void
143 CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
144 {
145         CaseFileList::iterator casefile;
146         for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
147                 CaseFileList::iterator next = casefile;
148                 next++;
149                 if (poolGUID == (*casefile)->PoolGUID())
150                         (*casefile)->ReEvaluate(event);
151                 casefile = next;
152         }
153 }
154
155 CaseFile &
156 CaseFile::Create(Vdev &vdev)
157 {
158         CaseFile *activeCase;
159
160         activeCase = Find(vdev.PoolGUID(), vdev.GUID());
161         if (activeCase == NULL)
162                 activeCase = new CaseFile(vdev);
163
164         return (*activeCase);
165 }
166
167 void
168 CaseFile::DeSerialize()
169 {
170         struct dirent **caseFiles;
171
172         int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
173                          DeSerializeSelector, /*compar*/NULL));
174
175         if (numCaseFiles == -1)
176                 return;
177         if (numCaseFiles == 0) {
178                 free(caseFiles);
179                 return;
180         }
181
182         for (int i = 0; i < numCaseFiles; i++) {
183
184                 DeSerializeFile(caseFiles[i]->d_name);
185                 free(caseFiles[i]);
186         }
187         free(caseFiles);
188 }
189
190 bool
191 CaseFile::Empty()
192 {
193         return (s_activeCases.empty());
194 }
195
196 void
197 CaseFile::LogAll()
198 {
199         for (CaseFileList::iterator curCase = s_activeCases.begin();
200              curCase != s_activeCases.end(); curCase++)
201                 (*curCase)->Log();
202 }
203
204 void
205 CaseFile::PurgeAll()
206 {
207         /*
208          * Serialize casefiles before deleting them so that they can be reread
209          * and revalidated during BuildCaseFiles.
210          * CaseFiles remove themselves from this list on destruction.
211          */
212         while (s_activeCases.size() != 0) {
213                 CaseFile *casefile = s_activeCases.front();
214                 casefile->Serialize();
215                 delete casefile;
216         }
217
218 }
219
220 //- CaseFile Public Methods ----------------------------------------------------
221 bool
222 CaseFile::RefreshVdevState()
223 {
224         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
225         zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
226         if (casePool == NULL)
227                 return (false);
228
229         Vdev vd(casePool, CaseVdev(casePool));
230         if (vd.DoesNotExist())
231                 return (false);
232
233         m_vdevState    = vd.State();
234         m_vdevPhysPath = vd.PhysicalPath();
235         return (true);
236 }
237
238 bool
239 CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
240 {
241         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
242         zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
243
244         if (pool == NULL || !RefreshVdevState()) {
245                 /*
246                  * The pool or vdev for this case file is no longer
247                  * part of the configuration.  This can happen
248                  * if we process a device arrival notification
249                  * before seeing the ZFS configuration change
250                  * event.
251                  */
252                 syslog(LOG_INFO,
253                        "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
254                        "Closing\n",
255                        PoolGUIDString().c_str(),
256                        VdevGUIDString().c_str());
257                 Close();
258
259                 /*
260                  * Since this event was not used to close this
261                  * case, do not report it as consumed.
262                  */
263                 return (/*consumed*/false);
264         }
265
266         if (VdevState() > VDEV_STATE_CANT_OPEN) {
267                 /*
268                  * For now, newly discovered devices only help for
269                  * devices that are missing.  In the future, we might
270                  * use a newly inserted spare to replace a degraded
271                  * or faulted device.
272                  */
273                 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
274                     PoolGUIDString().c_str(), VdevGUIDString().c_str());
275                 return (/*consumed*/false);
276         }
277
278         if (vdev != NULL
279          && ( vdev->PoolGUID() == m_poolGUID
280            || vdev->PoolGUID() == Guid::InvalidGuid())
281          && vdev->GUID() == m_vdevGUID) {
282
283                 zpool_vdev_online(pool, vdev->GUIDString().c_str(),
284                                   ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
285                                   &m_vdevState);
286                 syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s.\n",
287                        zpool_get_name(pool), vdev->GUIDString().c_str(),
288                        devPath.c_str(),
289                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
290
291                 /*
292                  * Check the vdev state post the online action to see
293                  * if we can retire this case.
294                  */
295                 CloseIfSolved();
296
297                 return (/*consumed*/true);
298         }
299
300         /*
301          * If the auto-replace policy is enabled, and we have physical
302          * path information, try a physical path replacement.
303          */
304         if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
305                 syslog(LOG_INFO,
306                        "CaseFile(%s:%s:%s): AutoReplace not set.  "
307                        "Ignoring device insertion.\n",
308                        PoolGUIDString().c_str(),
309                        VdevGUIDString().c_str(),
310                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
311                 return (/*consumed*/false);
312         }
313
314         if (PhysicalPath().empty()) {
315                 syslog(LOG_INFO,
316                        "CaseFile(%s:%s:%s): No physical path information.  "
317                        "Ignoring device insertion.\n",
318                        PoolGUIDString().c_str(),
319                        VdevGUIDString().c_str(),
320                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
321                 return (/*consumed*/false);
322         }
323
324         if (physPath != PhysicalPath()) {
325                 syslog(LOG_INFO,
326                        "CaseFile(%s:%s:%s): Physical path mismatch.  "
327                        "Ignoring device insertion.\n",
328                        PoolGUIDString().c_str(),
329                        VdevGUIDString().c_str(),
330                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
331                 return (/*consumed*/false);
332         }
333
334         /* Write a label on the newly inserted disk. */
335         if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str()) != 0) {
336                 syslog(LOG_ERR,
337                        "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
338                        zpool_get_name(pool), VdevGUIDString().c_str(),
339                        libzfs_error_action(g_zfsHandle),
340                        libzfs_error_description(g_zfsHandle));
341                 return (/*consumed*/false);
342         }
343
344         syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
345             PoolGUIDString().c_str(), VdevGUIDString().c_str(),
346             devPath.c_str());
347         return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
348 }
349
350 bool
351 CaseFile::ReEvaluate(const ZfsEvent &event)
352 {
353         bool consumed(false);
354
355         if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
356                 /*
357                  * The Vdev we represent has been removed from the
358                  * configuration.  This case is no longer of value.
359                  */
360                 Close();
361
362                 return (/*consumed*/true);
363         } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") {
364                 /* This Pool has been destroyed.  Discard the case */
365                 Close();
366
367                 return (/*consumed*/true);
368         } else if (event.Value("type") == "misc.fs.zfs.config_sync") {
369                 RefreshVdevState();
370                 if (VdevState() < VDEV_STATE_HEALTHY)
371                         consumed = ActivateSpare();
372         }
373
374
375         if (event.Value("class") == "resource.fs.zfs.removed") {
376                 bool spare_activated;
377
378                 if (!RefreshVdevState()) {
379                         /*
380                          * The pool or vdev for this case file is no longer
381                          * part of the configuration.  This can happen
382                          * if we process a device arrival notification
383                          * before seeing the ZFS configuration change
384                          * event.
385                          */
386                         syslog(LOG_INFO,
387                                "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
388                                "unconfigured.  Closing\n",
389                                PoolGUIDString().c_str(),
390                                VdevGUIDString().c_str());
391                         /*
392                          * Close the case now so we won't waste cycles in the
393                          * system rescan
394                          */
395                         Close();
396
397                         /*
398                          * Since this event was not used to close this
399                          * case, do not report it as consumed.
400                          */
401                         return (/*consumed*/false);
402                 }
403
404                 /*
405                  * Discard any tentative I/O error events for
406                  * this case.  They were most likely caused by the
407                  * hot-unplug of this device.
408                  */
409                 PurgeTentativeEvents();
410
411                 /* Try to activate spares if they are available */
412                 spare_activated = ActivateSpare();
413
414                 /*
415                  * Rescan the drives in the system to see if a recent
416                  * drive arrival can be used to solve this case.
417                  */
418                 ZfsDaemon::RequestSystemRescan();
419
420                 /*
421                  * Consume the event if we successfully activated a spare.
422                  * Otherwise, leave it in the unconsumed events list so that the
423                  * future addition of a spare to this pool might be able to
424                  * close the case
425                  */
426                 consumed = spare_activated;
427         } else if (event.Value("class") == "resource.fs.zfs.statechange") {
428                 RefreshVdevState();
429                 /*
430                  * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
431                  * activate a hotspare.  Otherwise, ignore the event
432                  */
433                 if (VdevState() == VDEV_STATE_FAULTED ||
434                     VdevState() == VDEV_STATE_DEGRADED ||
435                     VdevState() == VDEV_STATE_CANT_OPEN)
436                         (void) ActivateSpare();
437                 consumed = true;
438         }
439         else if (event.Value("class") == "ereport.fs.zfs.io" ||
440                  event.Value("class") == "ereport.fs.zfs.checksum") {
441
442                 m_tentativeEvents.push_front(event.DeepCopy());
443                 RegisterCallout(event);
444                 consumed = true;
445         }
446
447         bool closed(CloseIfSolved());
448
449         return (consumed || closed);
450 }
451
452 /* Find a Vdev containing the vdev with the given GUID */
453 static nvlist_t*
454 find_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid)
455 {
456         nvlist_t **vdevChildren;
457         int        error;
458         unsigned   ch, numChildren;
459
460         error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
461                                            &vdevChildren, &numChildren);
462
463         if (error != 0 || numChildren == 0)
464                 return (NULL);
465
466         for (ch = 0; ch < numChildren; ch++) {
467                 nvlist *result;
468                 Vdev vdev(pool_config, vdevChildren[ch]);
469
470                 if (vdev.GUID() == child_guid)
471                         return (config);
472
473                 result = find_parent(pool_config, vdevChildren[ch], child_guid);
474                 if (result != NULL)
475                         return (result);
476         }
477
478         return (NULL);
479 }
480
481 bool
482 CaseFile::ActivateSpare() {
483         nvlist_t        *config, *nvroot, *parent_config;
484         nvlist_t       **spares;
485         char            *devPath, *vdev_type;
486         const char      *poolname;
487         u_int            nspares, i;
488         int              error;
489
490         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
491         zpool_handle_t  *zhp(zpl.empty() ? NULL : zpl.front());
492         if (zhp == NULL) {
493                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
494                        "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID);
495                 return (false);
496         }
497         poolname = zpool_get_name(zhp);
498         config = zpool_get_config(zhp, NULL);
499         if (config == NULL) {
500                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
501                        "config for pool %s", poolname);
502                 return (false);
503         }
504         error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
505         if (error != 0){
506                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
507                        "tree for pool %s", poolname);
508                 return (false);
509         }
510
511         parent_config = find_parent(config, nvroot, m_vdevGUID);
512         if (parent_config != NULL) {
513                 char *parent_type;
514
515                 /* 
516                  * Don't activate spares for members of a "replacing" vdev.
517                  * They're already dealt with.  Sparing them will just drag out
518                  * the resilver process.
519                  */
520                 error = nvlist_lookup_string(parent_config,
521                     ZPOOL_CONFIG_TYPE, &parent_type);
522                 if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0)
523                         return (false);
524         }
525
526         nspares = 0;
527         nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
528                                    &nspares);
529         if (nspares == 0) {
530                 /* The pool has no spares configured */
531                 syslog(LOG_INFO, "CaseFile::ActivateSpare: "
532                        "No spares available for pool %s", poolname);
533                 return (false);
534         }
535         for (i = 0; i < nspares; i++) {
536                 uint64_t    *nvlist_array;
537                 vdev_stat_t *vs;
538                 uint_t       nstats;
539
540                 if (nvlist_lookup_uint64_array(spares[i],
541                     ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
542                         syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
543                                "find vdev stats for pool %s, spare %d",
544                                poolname, i);
545                         return (false);
546                 }
547                 vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
548
549                 if ((vs->vs_aux != VDEV_AUX_SPARED)
550                  && (vs->vs_state == VDEV_STATE_HEALTHY)) {
551                         /* We found a usable spare */
552                         break;
553                 }
554         }
555
556         if (i == nspares) {
557                 /* No available spares were found */
558                 return (false);
559         }
560
561         error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath);
562         if (error != 0) {
563                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
564                        "the path of pool %s, spare %d. Error %d",
565                        poolname, i, error);
566                 return (false);
567         }
568
569         error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type);
570         if (error != 0) {
571                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
572                        "the vdev type of pool %s, spare %d. Error %d",
573                        poolname, i, error);
574                 return (false);
575         }
576
577         return (Replace(vdev_type, devPath, /*isspare*/true));
578 }
579
580 void
581 CaseFile::RegisterCallout(const Event &event)
582 {
583         timeval now, countdown, elapsed, timestamp, zero, remaining;
584
585         gettimeofday(&now, 0);
586         timestamp = event.GetTimestamp();
587         timersub(&now, &timestamp, &elapsed);
588         timersub(&s_removeGracePeriod, &elapsed, &countdown);
589         /*
590          * If countdown is <= zero, Reset the timer to the
591          * smallest positive time value instead
592          */
593         timerclear(&zero);
594         if (timercmp(&countdown, &zero, <=)) {
595                 timerclear(&countdown);
596                 countdown.tv_usec = 1;
597         }
598
599         remaining = m_tentativeTimer.TimeRemaining();
600
601         if (!m_tentativeTimer.IsPending()
602          || timercmp(&countdown, &remaining, <))
603                 m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this);
604 }
605
606
607 bool
608 CaseFile::CloseIfSolved()
609 {
610         if (m_events.empty()
611          && m_tentativeEvents.empty()) {
612
613                 /*
614                  * We currently do not track or take actions on
615                  * devices in the degraded or faulted state.
616                  * Once we have support for spare pools, we'll
617                  * retain these cases so that any spares added in
618                  * the future can be applied to them.
619                  */
620                 switch (VdevState()) {
621                 case VDEV_STATE_HEALTHY:
622                         /* No need to keep cases for healthy vdevs */
623                         Close();
624                         return (true);
625                 case VDEV_STATE_REMOVED:
626                 case VDEV_STATE_CANT_OPEN:
627                         /*
628                          * Keep open.  We may solve it with a newly inserted
629                          * device.
630                          */
631                 case VDEV_STATE_FAULTED:
632                 case VDEV_STATE_DEGRADED:
633                         /*
634                          * Keep open.  We may solve it with the future
635                          * addition of a spare to the pool
636                          */
637                 case VDEV_STATE_UNKNOWN:
638                 case VDEV_STATE_CLOSED:
639                 case VDEV_STATE_OFFLINE:
640                         /*
641                          * Keep open?  This may not be the correct behavior,
642                          * but it's what we've always done
643                          */
644                         ;
645                 }
646
647                 /*
648                  * Re-serialize the case in order to remove any
649                  * previous event data.
650                  */
651                 Serialize();
652         }
653
654         return (false);
655 }
656
657 void
658 CaseFile::Log()
659 {
660         syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
661                VdevGUIDString().c_str(), PhysicalPath().c_str());
662         syslog(LOG_INFO, "\tVdev State = %s\n",
663                zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
664         if (m_tentativeEvents.size() != 0) {
665                 syslog(LOG_INFO, "\t=== Tentative Events ===\n");
666                 for (EventList::iterator event(m_tentativeEvents.begin());
667                      event != m_tentativeEvents.end(); event++)
668                         (*event)->Log(LOG_INFO);
669         }
670         if (m_events.size() != 0) {
671                 syslog(LOG_INFO, "\t=== Events ===\n");
672                 for (EventList::iterator event(m_events.begin());
673                      event != m_events.end(); event++)
674                         (*event)->Log(LOG_INFO);
675         }
676 }
677
678 //- CaseFile Static Protected Methods ------------------------------------------
679 void
680 CaseFile::OnGracePeriodEnded(void *arg)
681 {
682         CaseFile &casefile(*static_cast<CaseFile *>(arg));
683
684         casefile.OnGracePeriodEnded();
685 }
686
687 int
688 CaseFile::DeSerializeSelector(const struct dirent *dirEntry)
689 {
690         uint64_t poolGUID;
691         uint64_t vdevGUID;
692
693         if (dirEntry->d_type == DT_REG
694          && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
695                    &poolGUID, &vdevGUID) == 2)
696                 return (1);
697         return (0);
698 }
699
700 void
701 CaseFile::DeSerializeFile(const char *fileName)
702 {
703         string    fullName(s_caseFilePath + '/' + fileName);
704         CaseFile *existingCaseFile(NULL);
705         CaseFile *caseFile(NULL);
706
707         try {
708                 uint64_t poolGUID;
709                 uint64_t vdevGUID;
710                 nvlist_t *vdevConf;
711
712                 if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
713                        &poolGUID, &vdevGUID) != 2) {
714                         throw ZfsdException("CaseFile::DeSerialize: "
715                             "Unintelligible CaseFile filename %s.\n", fileName);
716                 }
717                 existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID));
718                 if (existingCaseFile != NULL) {
719                         /*
720                          * If the vdev is already degraded or faulted,
721                          * there's no point in keeping the state around
722                          * that we use to put a drive into the degraded
723                          * state.  However, if the vdev is simply missing,
724                          * preserve the case data in the hopes that it will
725                          * return.
726                          */
727                         caseFile = existingCaseFile;
728                         vdev_state curState(caseFile->VdevState());
729                         if (curState > VDEV_STATE_CANT_OPEN
730                          && curState < VDEV_STATE_HEALTHY) {
731                                 unlink(fileName);
732                                 return;
733                         }
734                 } else {
735                         ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
736                         if (zpl.empty()
737                          || (vdevConf = VdevIterator(zpl.front())
738                                                     .Find(vdevGUID)) == NULL) {
739                                 /*
740                                  * Either the pool no longer exists
741                                  * or this vdev is no longer a member of
742                                  * the pool.
743                                  */
744                                 unlink(fullName.c_str());
745                                 return;
746                         }
747
748                         /*
749                          * Any vdev we find that does not have a case file
750                          * must be in the healthy state and thus worthy of
751                          * continued SERD data tracking.
752                          */
753                         caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
754                 }
755
756                 ifstream caseStream(fullName.c_str());
757                 if (!caseStream)
758                         throw ZfsdException("CaseFile::DeSerialize: Unable to "
759                                             "read %s.\n", fileName);
760
761                 caseFile->DeSerialize(caseStream);
762         } catch (const ParseException &exp) {
763
764                 exp.Log();
765                 if (caseFile != existingCaseFile)
766                         delete caseFile;
767
768                 /*
769                  * Since we can't parse the file, unlink it so we don't
770                  * trip over it again.
771                  */
772                 unlink(fileName);
773         } catch (const ZfsdException &zfsException) {
774
775                 zfsException.Log();
776                 if (caseFile != existingCaseFile)
777                         delete caseFile;
778         }
779 }
780
781 //- CaseFile Protected Methods -------------------------------------------------
782 CaseFile::CaseFile(const Vdev &vdev)
783  : m_poolGUID(vdev.PoolGUID()),
784    m_vdevGUID(vdev.GUID()),
785    m_vdevState(vdev.State()),
786    m_vdevPhysPath(vdev.PhysicalPath())
787 {
788         stringstream guidString;
789
790         guidString << m_vdevGUID;
791         m_vdevGUIDString = guidString.str();
792         guidString.str("");
793         guidString << m_poolGUID;
794         m_poolGUIDString = guidString.str();
795
796         s_activeCases.push_back(this);
797
798         syslog(LOG_INFO, "Creating new CaseFile:\n");
799         Log();
800 }
801
802 CaseFile::~CaseFile()
803 {
804         PurgeEvents();
805         PurgeTentativeEvents();
806         m_tentativeTimer.Stop();
807         s_activeCases.remove(this);
808 }
809
810 void
811 CaseFile::PurgeEvents()
812 {
813         for (EventList::iterator event(m_events.begin());
814              event != m_events.end(); event++)
815                 delete *event;
816
817         m_events.clear();
818 }
819
820 void
821 CaseFile::PurgeTentativeEvents()
822 {
823         for (EventList::iterator event(m_tentativeEvents.begin());
824              event != m_tentativeEvents.end(); event++)
825                 delete *event;
826
827         m_tentativeEvents.clear();
828 }
829
830 void
831 CaseFile::SerializeEvList(const EventList events, int fd,
832                 const char* prefix) const
833 {
834         if (events.empty())
835                 return;
836         for (EventList::const_iterator curEvent = events.begin();
837              curEvent != events.end(); curEvent++) {
838                 const string &eventString((*curEvent)->GetEventString());
839
840                 // TODO: replace many write(2) calls with a single writev(2)
841                 if (prefix)
842                         write(fd, prefix, strlen(prefix));
843                 write(fd, eventString.c_str(), eventString.length());
844         }
845 }
846
847 void
848 CaseFile::Serialize()
849 {
850         stringstream saveFile;
851
852         saveFile << setfill('0')
853                  << s_caseFilePath << "/"
854                  << "pool_" << PoolGUIDString()
855                  << "_vdev_" << VdevGUIDString()
856                  << ".case";
857
858         if (m_events.empty() && m_tentativeEvents.empty()) {
859                 unlink(saveFile.str().c_str());
860                 return;
861         }
862
863         int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644));
864         if (fd == -1) {
865                 syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n",
866                        saveFile.str().c_str());
867                 return;
868         }
869         SerializeEvList(m_events, fd);
870         SerializeEvList(m_tentativeEvents, fd, "tentative ");
871         close(fd);
872 }
873
874 /*
875  * XXX: This method assumes that events may not contain embedded newlines.  If
876  * ever events can contain embedded newlines, then CaseFile must switch
877  * serialization formats
878  */
879 void
880 CaseFile::DeSerialize(ifstream &caseStream)
881 {
882         string        evString;
883         const EventFactory &factory(ZfsDaemon::Get().GetFactory());
884
885         caseStream >> std::noskipws >> std::ws;
886         while (caseStream.good()) {
887                 /*
888                  * Outline:
889                  * read the beginning of a line and check it for
890                  * "tentative".  If found, discard "tentative".
891                  * Create a new event
892                  * continue
893                  */
894                 EventList* destEvents;
895                 const string tentFlag("tentative ");
896                 string line;
897                 std::stringbuf lineBuf;
898
899                 caseStream.get(lineBuf);
900                 caseStream.ignore();  /*discard the newline character*/
901                 line = lineBuf.str();
902                 if (line.compare(0, tentFlag.size(), tentFlag) == 0) {
903                         /* Discard "tentative" */
904                         line.erase(0, tentFlag.size());
905                         destEvents = &m_tentativeEvents;
906                 } else {
907                         destEvents = &m_events;
908                 }
909                 Event *event(Event::CreateEvent(factory, line));
910                 if (event != NULL) {
911                         destEvents->push_back(event);
912                         RegisterCallout(*event);
913                 }
914         }
915 }
916
917 void
918 CaseFile::Close()
919 {
920         /*
921          * This case is no longer relevant.  Clean up our
922          * serialization file, and delete the case.
923          */
924         syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n",
925                PoolGUIDString().c_str(), VdevGUIDString().c_str(),
926                zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
927
928         /*
929          * Serialization of a Case with no event data, clears the
930          * Serialization data for that event.
931          */
932         PurgeEvents();
933         Serialize();
934
935         delete this;
936 }
937
938 void
939 CaseFile::OnGracePeriodEnded()
940 {
941         bool should_fault, should_degrade;
942         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
943         zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
944
945         m_events.splice(m_events.begin(), m_tentativeEvents);
946         should_fault = ShouldFault();
947         should_degrade = ShouldDegrade();
948
949         if (should_fault || should_degrade) {
950                 if (zhp == NULL
951                  || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) {
952                         /*
953                          * Either the pool no longer exists
954                          * or this vdev is no longer a member of
955                          * the pool.
956                          */
957                         Close();
958                         return;
959                 }
960
961         }
962
963         /* A fault condition has priority over a degrade condition */
964         if (ShouldFault()) {
965                 /* Fault the vdev and close the case. */
966                 if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID,
967                                        VDEV_AUX_ERR_EXCEEDED) == 0) {
968                         syslog(LOG_INFO, "Faulting vdev(%s/%s)",
969                                PoolGUIDString().c_str(),
970                                VdevGUIDString().c_str());
971                         Close();
972                         return;
973                 }
974                 else {
975                         syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n",
976                                PoolGUIDString().c_str(),
977                                VdevGUIDString().c_str(),
978                                libzfs_error_action(g_zfsHandle),
979                                libzfs_error_description(g_zfsHandle));
980                 }
981         }
982         else if (ShouldDegrade()) {
983                 /* Degrade the vdev and close the case. */
984                 if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID,
985                                        VDEV_AUX_ERR_EXCEEDED) == 0) {
986                         syslog(LOG_INFO, "Degrading vdev(%s/%s)",
987                                PoolGUIDString().c_str(),
988                                VdevGUIDString().c_str());
989                         Close();
990                         return;
991                 }
992                 else {
993                         syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n",
994                                PoolGUIDString().c_str(),
995                                VdevGUIDString().c_str(),
996                                libzfs_error_action(g_zfsHandle),
997                                libzfs_error_description(g_zfsHandle));
998                 }
999         }
1000         Serialize();
1001 }
1002
1003 Vdev
1004 CaseFile::BeingReplacedBy(zpool_handle_t *zhp) {
1005         Vdev vd(zhp, CaseVdev(zhp));
1006         std::list<Vdev> children;
1007         std::list<Vdev>::iterator children_it;
1008
1009         Vdev parent(vd.Parent());
1010         Vdev replacing(NonexistentVdev);
1011
1012         /*
1013          * To determine whether we are being replaced by another spare that
1014          * is still working, then make sure that it is currently spared and
1015          * that the spare is either resilvering or healthy.  If any of these
1016          * conditions fail, then we are not being replaced by a spare.
1017          *
1018          * If the spare is healthy, then the case file should be closed very
1019          * soon after this check.
1020          */
1021         if (parent.DoesNotExist()
1022          || parent.Name(zhp, /*verbose*/false) != "spare")
1023                 return (NonexistentVdev);
1024
1025         children = parent.Children();
1026         children_it = children.begin();
1027         for (;children_it != children.end(); children_it++) {
1028                 Vdev child = *children_it;
1029
1030                 /* Skip our vdev. */
1031                 if (child.GUID() == VdevGUID())
1032                         continue;
1033                 /*
1034                  * Accept the first child that doesn't match our GUID, or
1035                  * any resilvering/healthy device if one exists.
1036                  */
1037                 if (replacing.DoesNotExist() || child.IsResilvering()
1038                  || child.State() == VDEV_STATE_HEALTHY)
1039                         replacing = child;
1040         }
1041
1042         return (replacing);
1043 }
1044
1045 bool
1046 CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
1047         nvlist_t *nvroot, *newvd;
1048         const char *poolname;
1049         string oldstr(VdevGUIDString());
1050         bool retval = true;
1051
1052         /* Figure out what pool we're working on */
1053         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
1054         zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1055         if (zhp == NULL) {
1056                 syslog(LOG_ERR, "CaseFile::Replace: could not find pool for "
1057                        "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID);
1058                 return (false);
1059         }
1060         poolname = zpool_get_name(zhp);
1061         Vdev vd(zhp, CaseVdev(zhp));
1062         Vdev replaced(BeingReplacedBy(zhp));
1063
1064         if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) {
1065                 /* If we are already being replaced by a working spare, pass. */
1066                 if (replaced.IsResilvering()
1067                  || replaced.State() == VDEV_STATE_HEALTHY) {
1068                         syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already "
1069                             "replaced", VdevGUIDString().c_str(), path);
1070                         return (/*consumed*/false);
1071                 }
1072                 /*
1073                  * If we have already been replaced by a spare, but that spare
1074                  * is broken, we must spare the spare, not the original device.
1075                  */
1076                 oldstr = replaced.GUIDString();
1077                 syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing "
1078                     "broken spare %s instead", VdevGUIDString().c_str(),
1079                     path, oldstr.c_str());
1080         }
1081
1082         /*
1083          * Build a root vdev/leaf vdev configuration suitable for
1084          * zpool_vdev_attach. Only enough data for the kernel to find
1085          * the device (i.e. type and disk device node path) are needed.
1086          */
1087         nvroot = NULL;
1088         newvd = NULL;
1089
1090         if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
1091          || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1092                 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate "
1093                     "configuration data.", poolname, oldstr.c_str());
1094                 if (nvroot != NULL)
1095                         nvlist_free(nvroot);
1096                 return (false);
1097         }
1098         if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0
1099          || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0
1100          || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
1101          || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1102                                     &newvd, 1) != 0) {
1103                 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize "
1104                     "configuration data.", poolname, oldstr.c_str());
1105                 nvlist_free(newvd);
1106                 nvlist_free(nvroot);
1107                 return (true);
1108         }
1109
1110         /* Data was copied when added to the root vdev. */
1111         nvlist_free(newvd);
1112
1113         retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot,
1114        /*replace*/B_TRUE, /*rebuild*/ B_FALSE) == 0);
1115         if (retval)
1116                 syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
1117                     poolname, oldstr.c_str(), path);
1118         else
1119                 syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n",
1120                     poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle),
1121                     libzfs_error_description(g_zfsHandle));
1122         nvlist_free(nvroot);
1123
1124         return (retval);
1125 }
1126
1127 /* Does the argument event refer to a checksum error? */
1128 static bool
1129 IsChecksumEvent(const Event* const event)
1130 {
1131         return ("ereport.fs.zfs.checksum" == event->Value("type"));
1132 }
1133
1134 /* Does the argument event refer to an IO error? */
1135 static bool
1136 IsIOEvent(const Event* const event)
1137 {
1138         return ("ereport.fs.zfs.io" == event->Value("type"));
1139 }
1140
1141 bool
1142 CaseFile::ShouldDegrade() const
1143 {
1144         return (std::count_if(m_events.begin(), m_events.end(),
1145                               IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
1146 }
1147
1148 bool
1149 CaseFile::ShouldFault() const
1150 {
1151         return (std::count_if(m_events.begin(), m_events.end(),
1152                               IsIOEvent) > ZFS_DEGRADE_IO_COUNT);
1153 }
1154
1155 nvlist_t *
1156 CaseFile::CaseVdev(zpool_handle_t *zhp) const
1157 {
1158         return (VdevIterator(zhp).Find(VdevGUID()));
1159 }