]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - cddl/usr.sbin/zfsd/case_file.cc
Update to Zstandard 1.4.5
[FreeBSD/FreeBSD.git] / cddl / usr.sbin / zfsd / case_file.cc
1 /*-
2  * Copyright (c) 2011, 2012, 2013, 2014, 2016 Spectra Logic Corporation
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions, and the following disclaimer,
10  *    without modification.
11  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12  *    substantially similar to the "NO WARRANTY" disclaimer below
13  *    ("Disclaimer") and any redistribution must be conditioned upon
14  *    including a substantially similar Disclaimer requirement for further
15  *    binary redistribution.
16  *
17  * NO WARRANTY
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22  * HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
26  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
27  * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGES.
29  *
30  * Authors: Justin T. Gibbs     (Spectra Logic Corporation)
31  */
32
33 /**
34  * \file case_file.cc
35  *
36  * We keep case files for any leaf vdev that is not in the optimal state.
37  * However, we only serialize to disk those events that need to be preserved
38  * across reboots.  For now, this is just a log of soft errors which we
39  * accumulate in order to mark a device as degraded.
40  */
41 #include <sys/cdefs.h>
42 #include <sys/time.h>
43
44 #include <sys/fs/zfs.h>
45
46 #include <dirent.h>
47 #include <iomanip>
48 #include <fstream>
49 #include <functional>
50 #include <sstream>
51 #include <syslog.h>
52 #include <unistd.h>
53
54 #include <libzfs.h>
55
56 #include <list>
57 #include <map>
58 #include <string>
59
60 #include <devdctl/guid.h>
61 #include <devdctl/event.h>
62 #include <devdctl/event_factory.h>
63 #include <devdctl/exception.h>
64 #include <devdctl/consumer.h>
65
66 #include "callout.h"
67 #include "vdev_iterator.h"
68 #include "zfsd_event.h"
69 #include "case_file.h"
70 #include "vdev.h"
71 #include "zfsd.h"
72 #include "zfsd_exception.h"
73 #include "zpool_list.h"
74
75 __FBSDID("$FreeBSD$");
76
77 /*============================ Namespace Control =============================*/
78 using std::auto_ptr;
79 using std::hex;
80 using std::ifstream;
81 using std::stringstream;
82 using std::setfill;
83 using std::setw;
84
85 using DevdCtl::Event;
86 using DevdCtl::EventFactory;
87 using DevdCtl::EventList;
88 using DevdCtl::Guid;
89 using DevdCtl::ParseException;
90
91 /*--------------------------------- CaseFile ---------------------------------*/
92 //- CaseFile Static Data -------------------------------------------------------
93
94 CaseFileList  CaseFile::s_activeCases;
95 const string  CaseFile::s_caseFilePath = "/var/db/zfsd/cases";
96 const timeval CaseFile::s_removeGracePeriod = { 60 /*sec*/, 0 /*usec*/};
97
98 //- CaseFile Static Public Methods ---------------------------------------------
99 CaseFile *
100 CaseFile::Find(Guid poolGUID, Guid vdevGUID)
101 {
102         for (CaseFileList::iterator curCase = s_activeCases.begin();
103              curCase != s_activeCases.end(); curCase++) {
104
105                 if (((*curCase)->PoolGUID() != poolGUID
106                   && Guid::InvalidGuid() != poolGUID)
107                  || (*curCase)->VdevGUID() != vdevGUID)
108                         continue;
109
110                 /*
111                  * We only carry one active case per-vdev.
112                  */
113                 return (*curCase);
114         }
115         return (NULL);
116 }
117
118 CaseFile *
119 CaseFile::Find(const string &physPath)
120 {
121         CaseFile *result = NULL;
122
123         for (CaseFileList::iterator curCase = s_activeCases.begin();
124              curCase != s_activeCases.end(); curCase++) {
125
126                 if ((*curCase)->PhysicalPath() != physPath)
127                         continue;
128
129                 if (result != NULL) {
130                         syslog(LOG_WARNING, "Multiple casefiles found for "
131                             "physical path %s.  "
132                             "This is most likely a bug in zfsd",
133                             physPath.c_str());
134                 }
135                 result = *curCase;
136         }
137         return (result);
138 }
139
140
141 void
142 CaseFile::ReEvaluateByGuid(Guid poolGUID, const ZfsEvent &event)
143 {
144         CaseFileList::iterator casefile;
145         for (casefile = s_activeCases.begin(); casefile != s_activeCases.end();){
146                 CaseFileList::iterator next = casefile;
147                 next++;
148                 if (poolGUID == (*casefile)->PoolGUID())
149                         (*casefile)->ReEvaluate(event);
150                 casefile = next;
151         }
152 }
153
154 CaseFile &
155 CaseFile::Create(Vdev &vdev)
156 {
157         CaseFile *activeCase;
158
159         activeCase = Find(vdev.PoolGUID(), vdev.GUID());
160         if (activeCase == NULL)
161                 activeCase = new CaseFile(vdev);
162
163         return (*activeCase);
164 }
165
166 void
167 CaseFile::DeSerialize()
168 {
169         struct dirent **caseFiles;
170
171         int numCaseFiles(scandir(s_caseFilePath.c_str(), &caseFiles,
172                          DeSerializeSelector, /*compar*/NULL));
173
174         if (numCaseFiles == -1)
175                 return;
176         if (numCaseFiles == 0) {
177                 free(caseFiles);
178                 return;
179         }
180
181         for (int i = 0; i < numCaseFiles; i++) {
182
183                 DeSerializeFile(caseFiles[i]->d_name);
184                 free(caseFiles[i]);
185         }
186         free(caseFiles);
187 }
188
189 bool
190 CaseFile::Empty()
191 {
192         return (s_activeCases.empty());
193 }
194
195 void
196 CaseFile::LogAll()
197 {
198         for (CaseFileList::iterator curCase = s_activeCases.begin();
199              curCase != s_activeCases.end(); curCase++)
200                 (*curCase)->Log();
201 }
202
203 void
204 CaseFile::PurgeAll()
205 {
206         /*
207          * Serialize casefiles before deleting them so that they can be reread
208          * and revalidated during BuildCaseFiles.
209          * CaseFiles remove themselves from this list on destruction.
210          */
211         while (s_activeCases.size() != 0) {
212                 CaseFile *casefile = s_activeCases.front();
213                 casefile->Serialize();
214                 delete casefile;
215         }
216
217 }
218
219 //- CaseFile Public Methods ----------------------------------------------------
220 bool
221 CaseFile::RefreshVdevState()
222 {
223         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
224         zpool_handle_t *casePool(zpl.empty() ? NULL : zpl.front());
225         if (casePool == NULL)
226                 return (false);
227
228         Vdev vd(casePool, CaseVdev(casePool));
229         if (vd.DoesNotExist())
230                 return (false);
231
232         m_vdevState    = vd.State();
233         m_vdevPhysPath = vd.PhysicalPath();
234         return (true);
235 }
236
237 bool
238 CaseFile::ReEvaluate(const string &devPath, const string &physPath, Vdev *vdev)
239 {
240         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
241         zpool_handle_t *pool(zpl.empty() ? NULL : zpl.front());
242         zpool_boot_label_t boot_type;
243         uint64_t boot_size;
244
245         if (pool == NULL || !RefreshVdevState()) {
246                 /*
247                  * The pool or vdev for this case file is no longer
248                  * part of the configuration.  This can happen
249                  * if we process a device arrival notification
250                  * before seeing the ZFS configuration change
251                  * event.
252                  */
253                 syslog(LOG_INFO,
254                        "CaseFile::ReEvaluate(%s,%s) Pool/Vdev unconfigured.  "
255                        "Closing\n",
256                        PoolGUIDString().c_str(),
257                        VdevGUIDString().c_str());
258                 Close();
259
260                 /*
261                  * Since this event was not used to close this
262                  * case, do not report it as consumed.
263                  */
264                 return (/*consumed*/false);
265         }
266
267         if (VdevState() > VDEV_STATE_CANT_OPEN) {
268                 /*
269                  * For now, newly discovered devices only help for
270                  * devices that are missing.  In the future, we might
271                  * use a newly inserted spare to replace a degraded
272                  * or faulted device.
273                  */
274                 syslog(LOG_INFO, "CaseFile::ReEvaluate(%s,%s): Pool/Vdev ignored",
275                     PoolGUIDString().c_str(), VdevGUIDString().c_str());
276                 return (/*consumed*/false);
277         }
278
279         if (vdev != NULL
280          && ( vdev->PoolGUID() == m_poolGUID
281            || vdev->PoolGUID() == Guid::InvalidGuid())
282          && vdev->GUID() == m_vdevGUID) {
283
284                 zpool_vdev_online(pool, vdev->GUIDString().c_str(),
285                                   ZFS_ONLINE_CHECKREMOVE | ZFS_ONLINE_UNSPARE,
286                                   &m_vdevState);
287                 syslog(LOG_INFO, "Onlined vdev(%s/%s:%s).  State now %s.\n",
288                        zpool_get_name(pool), vdev->GUIDString().c_str(),
289                        devPath.c_str(),
290                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
291
292                 /*
293                  * Check the vdev state post the online action to see
294                  * if we can retire this case.
295                  */
296                 CloseIfSolved();
297
298                 return (/*consumed*/true);
299         }
300
301         /*
302          * If the auto-replace policy is enabled, and we have physical
303          * path information, try a physical path replacement.
304          */
305         if (zpool_get_prop_int(pool, ZPOOL_PROP_AUTOREPLACE, NULL) == 0) {
306                 syslog(LOG_INFO,
307                        "CaseFile(%s:%s:%s): AutoReplace not set.  "
308                        "Ignoring device insertion.\n",
309                        PoolGUIDString().c_str(),
310                        VdevGUIDString().c_str(),
311                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
312                 return (/*consumed*/false);
313         }
314
315         if (PhysicalPath().empty()) {
316                 syslog(LOG_INFO,
317                        "CaseFile(%s:%s:%s): No physical path information.  "
318                        "Ignoring device insertion.\n",
319                        PoolGUIDString().c_str(),
320                        VdevGUIDString().c_str(),
321                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
322                 return (/*consumed*/false);
323         }
324
325         if (physPath != PhysicalPath()) {
326                 syslog(LOG_INFO,
327                        "CaseFile(%s:%s:%s): Physical path mismatch.  "
328                        "Ignoring device insertion.\n",
329                        PoolGUIDString().c_str(),
330                        VdevGUIDString().c_str(),
331                        zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
332                 return (/*consumed*/false);
333         }
334
335         /* Write a label on the newly inserted disk. */
336         if (zpool_is_bootable(pool))
337                 boot_type = ZPOOL_COPY_BOOT_LABEL;
338         else
339                 boot_type = ZPOOL_NO_BOOT_LABEL;
340         boot_size = zpool_get_prop_int(pool, ZPOOL_PROP_BOOTSIZE, NULL);
341         if (zpool_label_disk(g_zfsHandle, pool, devPath.c_str(),
342             boot_type, boot_size, NULL) != 0) {
343                 syslog(LOG_ERR,
344                        "Replace vdev(%s/%s) by physical path (label): %s: %s\n",
345                        zpool_get_name(pool), VdevGUIDString().c_str(),
346                        libzfs_error_action(g_zfsHandle),
347                        libzfs_error_description(g_zfsHandle));
348                 return (/*consumed*/false);
349         }
350
351         syslog(LOG_INFO, "CaseFile::ReEvaluate(%s/%s): Replacing with %s",
352             PoolGUIDString().c_str(), VdevGUIDString().c_str(),
353             devPath.c_str());
354         return (Replace(VDEV_TYPE_DISK, devPath.c_str(), /*isspare*/false));
355 }
356
357 bool
358 CaseFile::ReEvaluate(const ZfsEvent &event)
359 {
360         bool consumed(false);
361
362         if (event.Value("type") == "misc.fs.zfs.vdev_remove") {
363                 /*
364                  * The Vdev we represent has been removed from the
365                  * configuration.  This case is no longer of value.
366                  */
367                 Close();
368
369                 return (/*consumed*/true);
370         } else if (event.Value("type") == "misc.fs.zfs.pool_destroy") {
371                 /* This Pool has been destroyed.  Discard the case */
372                 Close();
373
374                 return (/*consumed*/true);
375         } else if (event.Value("type") == "misc.fs.zfs.config_sync") {
376                 RefreshVdevState();
377                 if (VdevState() < VDEV_STATE_HEALTHY)
378                         consumed = ActivateSpare();
379         }
380
381
382         if (event.Value("class") == "resource.fs.zfs.removed") {
383                 bool spare_activated;
384
385                 if (!RefreshVdevState()) {
386                         /*
387                          * The pool or vdev for this case file is no longer
388                          * part of the configuration.  This can happen
389                          * if we process a device arrival notification
390                          * before seeing the ZFS configuration change
391                          * event.
392                          */
393                         syslog(LOG_INFO,
394                                "CaseFile::ReEvaluate(%s,%s) Pool/Vdev "
395                                "unconfigured.  Closing\n",
396                                PoolGUIDString().c_str(),
397                                VdevGUIDString().c_str());
398                         /*
399                          * Close the case now so we won't waste cycles in the
400                          * system rescan
401                          */
402                         Close();
403
404                         /*
405                          * Since this event was not used to close this
406                          * case, do not report it as consumed.
407                          */
408                         return (/*consumed*/false);
409                 }
410
411                 /*
412                  * Discard any tentative I/O error events for
413                  * this case.  They were most likely caused by the
414                  * hot-unplug of this device.
415                  */
416                 PurgeTentativeEvents();
417
418                 /* Try to activate spares if they are available */
419                 spare_activated = ActivateSpare();
420
421                 /*
422                  * Rescan the drives in the system to see if a recent
423                  * drive arrival can be used to solve this case.
424                  */
425                 ZfsDaemon::RequestSystemRescan();
426
427                 /*
428                  * Consume the event if we successfully activated a spare.
429                  * Otherwise, leave it in the unconsumed events list so that the
430                  * future addition of a spare to this pool might be able to
431                  * close the case
432                  */
433                 consumed = spare_activated;
434         } else if (event.Value("class") == "resource.fs.zfs.statechange") {
435                 RefreshVdevState();
436                 /*
437                  * If this vdev is DEGRADED, FAULTED, or UNAVAIL, try to
438                  * activate a hotspare.  Otherwise, ignore the event
439                  */
440                 if (VdevState() == VDEV_STATE_FAULTED ||
441                     VdevState() == VDEV_STATE_DEGRADED ||
442                     VdevState() == VDEV_STATE_CANT_OPEN)
443                         (void) ActivateSpare();
444                 consumed = true;
445         }
446         else if (event.Value("class") == "ereport.fs.zfs.io" ||
447                  event.Value("class") == "ereport.fs.zfs.checksum") {
448
449                 m_tentativeEvents.push_front(event.DeepCopy());
450                 RegisterCallout(event);
451                 consumed = true;
452         }
453
454         bool closed(CloseIfSolved());
455
456         return (consumed || closed);
457 }
458
459 /* Find a Vdev containing the vdev with the given GUID */
460 static nvlist_t*
461 find_parent(nvlist_t *pool_config, nvlist_t *config, DevdCtl::Guid child_guid)
462 {
463         nvlist_t **vdevChildren;
464         int        error;
465         unsigned   ch, numChildren;
466
467         error = nvlist_lookup_nvlist_array(config, ZPOOL_CONFIG_CHILDREN,
468                                            &vdevChildren, &numChildren);
469
470         if (error != 0 || numChildren == 0)
471                 return (NULL);
472
473         for (ch = 0; ch < numChildren; ch++) {
474                 nvlist *result;
475                 Vdev vdev(pool_config, vdevChildren[ch]);
476
477                 if (vdev.GUID() == child_guid)
478                         return (config);
479
480                 result = find_parent(pool_config, vdevChildren[ch], child_guid);
481                 if (result != NULL)
482                         return (result);
483         }
484
485         return (NULL);
486 }
487
488 bool
489 CaseFile::ActivateSpare() {
490         nvlist_t        *config, *nvroot, *parent_config;
491         nvlist_t       **spares;
492         char            *devPath, *vdev_type;
493         const char      *poolname;
494         u_int            nspares, i;
495         int              error;
496
497         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
498         zpool_handle_t  *zhp(zpl.empty() ? NULL : zpl.front());
499         if (zhp == NULL) {
500                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
501                        "for pool_guid %" PRIu64".", (uint64_t)m_poolGUID);
502                 return (false);
503         }
504         poolname = zpool_get_name(zhp);
505         config = zpool_get_config(zhp, NULL);
506         if (config == NULL) {
507                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find pool "
508                        "config for pool %s", poolname);
509                 return (false);
510         }
511         error = nvlist_lookup_nvlist(config, ZPOOL_CONFIG_VDEV_TREE, &nvroot);
512         if (error != 0){
513                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not find vdev "
514                        "tree for pool %s", poolname);
515                 return (false);
516         }
517
518         parent_config = find_parent(config, nvroot, m_vdevGUID);
519         if (parent_config != NULL) {
520                 char *parent_type;
521
522                 /* 
523                  * Don't activate spares for members of a "replacing" vdev.
524                  * They're already dealt with.  Sparing them will just drag out
525                  * the resilver process.
526                  */
527                 error = nvlist_lookup_string(parent_config,
528                     ZPOOL_CONFIG_TYPE, &parent_type);
529                 if (error == 0 && strcmp(parent_type, VDEV_TYPE_REPLACING) == 0)
530                         return (false);
531         }
532
533         nspares = 0;
534         nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_SPARES, &spares,
535                                    &nspares);
536         if (nspares == 0) {
537                 /* The pool has no spares configured */
538                 syslog(LOG_INFO, "CaseFile::ActivateSpare: "
539                        "No spares available for pool %s", poolname);
540                 return (false);
541         }
542         for (i = 0; i < nspares; i++) {
543                 uint64_t    *nvlist_array;
544                 vdev_stat_t *vs;
545                 uint_t       nstats;
546
547                 if (nvlist_lookup_uint64_array(spares[i],
548                     ZPOOL_CONFIG_VDEV_STATS, &nvlist_array, &nstats) != 0) {
549                         syslog(LOG_ERR, "CaseFile::ActivateSpare: Could not "
550                                "find vdev stats for pool %s, spare %d",
551                                poolname, i);
552                         return (false);
553                 }
554                 vs = reinterpret_cast<vdev_stat_t *>(nvlist_array);
555
556                 if ((vs->vs_aux != VDEV_AUX_SPARED)
557                  && (vs->vs_state == VDEV_STATE_HEALTHY)) {
558                         /* We found a usable spare */
559                         break;
560                 }
561         }
562
563         if (i == nspares) {
564                 /* No available spares were found */
565                 return (false);
566         }
567
568         error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_PATH, &devPath);
569         if (error != 0) {
570                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
571                        "the path of pool %s, spare %d. Error %d",
572                        poolname, i, error);
573                 return (false);
574         }
575
576         error = nvlist_lookup_string(spares[i], ZPOOL_CONFIG_TYPE, &vdev_type);
577         if (error != 0) {
578                 syslog(LOG_ERR, "CaseFile::ActivateSpare: Cannot determine "
579                        "the vdev type of pool %s, spare %d. Error %d",
580                        poolname, i, error);
581                 return (false);
582         }
583
584         return (Replace(vdev_type, devPath, /*isspare*/true));
585 }
586
587 void
588 CaseFile::RegisterCallout(const Event &event)
589 {
590         timeval now, countdown, elapsed, timestamp, zero, remaining;
591
592         gettimeofday(&now, 0);
593         timestamp = event.GetTimestamp();
594         timersub(&now, &timestamp, &elapsed);
595         timersub(&s_removeGracePeriod, &elapsed, &countdown);
596         /*
597          * If countdown is <= zero, Reset the timer to the
598          * smallest positive time value instead
599          */
600         timerclear(&zero);
601         if (timercmp(&countdown, &zero, <=)) {
602                 timerclear(&countdown);
603                 countdown.tv_usec = 1;
604         }
605
606         remaining = m_tentativeTimer.TimeRemaining();
607
608         if (!m_tentativeTimer.IsPending()
609          || timercmp(&countdown, &remaining, <))
610                 m_tentativeTimer.Reset(countdown, OnGracePeriodEnded, this);
611 }
612
613
614 bool
615 CaseFile::CloseIfSolved()
616 {
617         if (m_events.empty()
618          && m_tentativeEvents.empty()) {
619
620                 /*
621                  * We currently do not track or take actions on
622                  * devices in the degraded or faulted state.
623                  * Once we have support for spare pools, we'll
624                  * retain these cases so that any spares added in
625                  * the future can be applied to them.
626                  */
627                 switch (VdevState()) {
628                 case VDEV_STATE_HEALTHY:
629                         /* No need to keep cases for healthy vdevs */
630                         Close();
631                         return (true);
632                 case VDEV_STATE_REMOVED:
633                 case VDEV_STATE_CANT_OPEN:
634                         /*
635                          * Keep open.  We may solve it with a newly inserted
636                          * device.
637                          */
638                 case VDEV_STATE_FAULTED:
639                 case VDEV_STATE_DEGRADED:
640                         /*
641                          * Keep open.  We may solve it with the future
642                          * addition of a spare to the pool
643                          */
644                 case VDEV_STATE_UNKNOWN:
645                 case VDEV_STATE_CLOSED:
646                 case VDEV_STATE_OFFLINE:
647                         /*
648                          * Keep open?  This may not be the correct behavior,
649                          * but it's what we've always done
650                          */
651                         ;
652                 }
653
654                 /*
655                  * Re-serialize the case in order to remove any
656                  * previous event data.
657                  */
658                 Serialize();
659         }
660
661         return (false);
662 }
663
664 void
665 CaseFile::Log()
666 {
667         syslog(LOG_INFO, "CaseFile(%s,%s,%s)\n", PoolGUIDString().c_str(),
668                VdevGUIDString().c_str(), PhysicalPath().c_str());
669         syslog(LOG_INFO, "\tVdev State = %s\n",
670                zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
671         if (m_tentativeEvents.size() != 0) {
672                 syslog(LOG_INFO, "\t=== Tentative Events ===\n");
673                 for (EventList::iterator event(m_tentativeEvents.begin());
674                      event != m_tentativeEvents.end(); event++)
675                         (*event)->Log(LOG_INFO);
676         }
677         if (m_events.size() != 0) {
678                 syslog(LOG_INFO, "\t=== Events ===\n");
679                 for (EventList::iterator event(m_events.begin());
680                      event != m_events.end(); event++)
681                         (*event)->Log(LOG_INFO);
682         }
683 }
684
685 //- CaseFile Static Protected Methods ------------------------------------------
686 void
687 CaseFile::OnGracePeriodEnded(void *arg)
688 {
689         CaseFile &casefile(*static_cast<CaseFile *>(arg));
690
691         casefile.OnGracePeriodEnded();
692 }
693
694 int
695 CaseFile::DeSerializeSelector(const struct dirent *dirEntry)
696 {
697         uint64_t poolGUID;
698         uint64_t vdevGUID;
699
700         if (dirEntry->d_type == DT_REG
701          && sscanf(dirEntry->d_name, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
702                    &poolGUID, &vdevGUID) == 2)
703                 return (1);
704         return (0);
705 }
706
707 void
708 CaseFile::DeSerializeFile(const char *fileName)
709 {
710         string    fullName(s_caseFilePath + '/' + fileName);
711         CaseFile *existingCaseFile(NULL);
712         CaseFile *caseFile(NULL);
713
714         try {
715                 uint64_t poolGUID;
716                 uint64_t vdevGUID;
717                 nvlist_t *vdevConf;
718
719                 if (sscanf(fileName, "pool_%" PRIu64 "_vdev_%" PRIu64 ".case",
720                        &poolGUID, &vdevGUID) != 2) {
721                         throw ZfsdException("CaseFile::DeSerialize: "
722                             "Unintelligible CaseFile filename %s.\n", fileName);
723                 }
724                 existingCaseFile = Find(Guid(poolGUID), Guid(vdevGUID));
725                 if (existingCaseFile != NULL) {
726                         /*
727                          * If the vdev is already degraded or faulted,
728                          * there's no point in keeping the state around
729                          * that we use to put a drive into the degraded
730                          * state.  However, if the vdev is simply missing,
731                          * preserve the case data in the hopes that it will
732                          * return.
733                          */
734                         caseFile = existingCaseFile;
735                         vdev_state curState(caseFile->VdevState());
736                         if (curState > VDEV_STATE_CANT_OPEN
737                          && curState < VDEV_STATE_HEALTHY) {
738                                 unlink(fileName);
739                                 return;
740                         }
741                 } else {
742                         ZpoolList zpl(ZpoolList::ZpoolByGUID, &poolGUID);
743                         if (zpl.empty()
744                          || (vdevConf = VdevIterator(zpl.front())
745                                                     .Find(vdevGUID)) == NULL) {
746                                 /*
747                                  * Either the pool no longer exists
748                                  * or this vdev is no longer a member of
749                                  * the pool.
750                                  */
751                                 unlink(fullName.c_str());
752                                 return;
753                         }
754
755                         /*
756                          * Any vdev we find that does not have a case file
757                          * must be in the healthy state and thus worthy of
758                          * continued SERD data tracking.
759                          */
760                         caseFile = new CaseFile(Vdev(zpl.front(), vdevConf));
761                 }
762
763                 ifstream caseStream(fullName.c_str());
764                 if (!caseStream)
765                         throw ZfsdException("CaseFile::DeSerialize: Unable to "
766                                             "read %s.\n", fileName);
767
768                 caseFile->DeSerialize(caseStream);
769         } catch (const ParseException &exp) {
770
771                 exp.Log();
772                 if (caseFile != existingCaseFile)
773                         delete caseFile;
774
775                 /*
776                  * Since we can't parse the file, unlink it so we don't
777                  * trip over it again.
778                  */
779                 unlink(fileName);
780         } catch (const ZfsdException &zfsException) {
781
782                 zfsException.Log();
783                 if (caseFile != existingCaseFile)
784                         delete caseFile;
785         }
786 }
787
788 //- CaseFile Protected Methods -------------------------------------------------
789 CaseFile::CaseFile(const Vdev &vdev)
790  : m_poolGUID(vdev.PoolGUID()),
791    m_vdevGUID(vdev.GUID()),
792    m_vdevState(vdev.State()),
793    m_vdevPhysPath(vdev.PhysicalPath())
794 {
795         stringstream guidString;
796
797         guidString << m_vdevGUID;
798         m_vdevGUIDString = guidString.str();
799         guidString.str("");
800         guidString << m_poolGUID;
801         m_poolGUIDString = guidString.str();
802
803         s_activeCases.push_back(this);
804
805         syslog(LOG_INFO, "Creating new CaseFile:\n");
806         Log();
807 }
808
809 CaseFile::~CaseFile()
810 {
811         PurgeEvents();
812         PurgeTentativeEvents();
813         m_tentativeTimer.Stop();
814         s_activeCases.remove(this);
815 }
816
817 void
818 CaseFile::PurgeEvents()
819 {
820         for (EventList::iterator event(m_events.begin());
821              event != m_events.end(); event++)
822                 delete *event;
823
824         m_events.clear();
825 }
826
827 void
828 CaseFile::PurgeTentativeEvents()
829 {
830         for (EventList::iterator event(m_tentativeEvents.begin());
831              event != m_tentativeEvents.end(); event++)
832                 delete *event;
833
834         m_tentativeEvents.clear();
835 }
836
837 void
838 CaseFile::SerializeEvList(const EventList events, int fd,
839                 const char* prefix) const
840 {
841         if (events.empty())
842                 return;
843         for (EventList::const_iterator curEvent = events.begin();
844              curEvent != events.end(); curEvent++) {
845                 const string &eventString((*curEvent)->GetEventString());
846
847                 // TODO: replace many write(2) calls with a single writev(2)
848                 if (prefix)
849                         write(fd, prefix, strlen(prefix));
850                 write(fd, eventString.c_str(), eventString.length());
851         }
852 }
853
854 void
855 CaseFile::Serialize()
856 {
857         stringstream saveFile;
858
859         saveFile << setfill('0')
860                  << s_caseFilePath << "/"
861                  << "pool_" << PoolGUIDString()
862                  << "_vdev_" << VdevGUIDString()
863                  << ".case";
864
865         if (m_events.empty() && m_tentativeEvents.empty()) {
866                 unlink(saveFile.str().c_str());
867                 return;
868         }
869
870         int fd(open(saveFile.str().c_str(), O_CREAT|O_TRUNC|O_WRONLY, 0644));
871         if (fd == -1) {
872                 syslog(LOG_ERR, "CaseFile::Serialize: Unable to open %s.\n",
873                        saveFile.str().c_str());
874                 return;
875         }
876         SerializeEvList(m_events, fd);
877         SerializeEvList(m_tentativeEvents, fd, "tentative ");
878         close(fd);
879 }
880
881 /*
882  * XXX: This method assumes that events may not contain embedded newlines.  If
883  * ever events can contain embedded newlines, then CaseFile must switch
884  * serialization formats
885  */
886 void
887 CaseFile::DeSerialize(ifstream &caseStream)
888 {
889         string        evString;
890         const EventFactory &factory(ZfsDaemon::Get().GetFactory());
891
892         caseStream >> std::noskipws >> std::ws;
893         while (caseStream.good()) {
894                 /*
895                  * Outline:
896                  * read the beginning of a line and check it for
897                  * "tentative".  If found, discard "tentative".
898                  * Create a new event
899                  * continue
900                  */
901                 EventList* destEvents;
902                 const string tentFlag("tentative ");
903                 string line;
904                 std::stringbuf lineBuf;
905
906                 caseStream.get(lineBuf);
907                 caseStream.ignore();  /*discard the newline character*/
908                 line = lineBuf.str();
909                 if (line.compare(0, tentFlag.size(), tentFlag) == 0) {
910                         /* Discard "tentative" */
911                         line.erase(0, tentFlag.size());
912                         destEvents = &m_tentativeEvents;
913                 } else {
914                         destEvents = &m_events;
915                 }
916                 Event *event(Event::CreateEvent(factory, line));
917                 if (event != NULL) {
918                         destEvents->push_back(event);
919                         RegisterCallout(*event);
920                 }
921         }
922 }
923
924 void
925 CaseFile::Close()
926 {
927         /*
928          * This case is no longer relevant.  Clean up our
929          * serialization file, and delete the case.
930          */
931         syslog(LOG_INFO, "CaseFile(%s,%s) closed - State %s\n",
932                PoolGUIDString().c_str(), VdevGUIDString().c_str(),
933                zpool_state_to_name(VdevState(), VDEV_AUX_NONE));
934
935         /*
936          * Serialization of a Case with no event data, clears the
937          * Serialization data for that event.
938          */
939         PurgeEvents();
940         Serialize();
941
942         delete this;
943 }
944
945 void
946 CaseFile::OnGracePeriodEnded()
947 {
948         bool should_fault, should_degrade;
949         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
950         zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
951
952         m_events.splice(m_events.begin(), m_tentativeEvents);
953         should_fault = ShouldFault();
954         should_degrade = ShouldDegrade();
955
956         if (should_fault || should_degrade) {
957                 if (zhp == NULL
958                  || (VdevIterator(zhp).Find(m_vdevGUID)) == NULL) {
959                         /*
960                          * Either the pool no longer exists
961                          * or this vdev is no longer a member of
962                          * the pool.
963                          */
964                         Close();
965                         return;
966                 }
967
968         }
969
970         /* A fault condition has priority over a degrade condition */
971         if (ShouldFault()) {
972                 /* Fault the vdev and close the case. */
973                 if (zpool_vdev_fault(zhp, (uint64_t)m_vdevGUID,
974                                        VDEV_AUX_ERR_EXCEEDED) == 0) {
975                         syslog(LOG_INFO, "Faulting vdev(%s/%s)",
976                                PoolGUIDString().c_str(),
977                                VdevGUIDString().c_str());
978                         Close();
979                         return;
980                 }
981                 else {
982                         syslog(LOG_ERR, "Fault vdev(%s/%s): %s: %s\n",
983                                PoolGUIDString().c_str(),
984                                VdevGUIDString().c_str(),
985                                libzfs_error_action(g_zfsHandle),
986                                libzfs_error_description(g_zfsHandle));
987                 }
988         }
989         else if (ShouldDegrade()) {
990                 /* Degrade the vdev and close the case. */
991                 if (zpool_vdev_degrade(zhp, (uint64_t)m_vdevGUID,
992                                        VDEV_AUX_ERR_EXCEEDED) == 0) {
993                         syslog(LOG_INFO, "Degrading vdev(%s/%s)",
994                                PoolGUIDString().c_str(),
995                                VdevGUIDString().c_str());
996                         Close();
997                         return;
998                 }
999                 else {
1000                         syslog(LOG_ERR, "Degrade vdev(%s/%s): %s: %s\n",
1001                                PoolGUIDString().c_str(),
1002                                VdevGUIDString().c_str(),
1003                                libzfs_error_action(g_zfsHandle),
1004                                libzfs_error_description(g_zfsHandle));
1005                 }
1006         }
1007         Serialize();
1008 }
1009
1010 Vdev
1011 CaseFile::BeingReplacedBy(zpool_handle_t *zhp) {
1012         Vdev vd(zhp, CaseVdev(zhp));
1013         std::list<Vdev> children;
1014         std::list<Vdev>::iterator children_it;
1015
1016         Vdev parent(vd.Parent());
1017         Vdev replacing(NonexistentVdev);
1018
1019         /*
1020          * To determine whether we are being replaced by another spare that
1021          * is still working, then make sure that it is currently spared and
1022          * that the spare is either resilvering or healthy.  If any of these
1023          * conditions fail, then we are not being replaced by a spare.
1024          *
1025          * If the spare is healthy, then the case file should be closed very
1026          * soon after this check.
1027          */
1028         if (parent.DoesNotExist()
1029          || parent.Name(zhp, /*verbose*/false) != "spare")
1030                 return (NonexistentVdev);
1031
1032         children = parent.Children();
1033         children_it = children.begin();
1034         for (;children_it != children.end(); children_it++) {
1035                 Vdev child = *children_it;
1036
1037                 /* Skip our vdev. */
1038                 if (child.GUID() == VdevGUID())
1039                         continue;
1040                 /*
1041                  * Accept the first child that doesn't match our GUID, or
1042                  * any resilvering/healthy device if one exists.
1043                  */
1044                 if (replacing.DoesNotExist() || child.IsResilvering()
1045                  || child.State() == VDEV_STATE_HEALTHY)
1046                         replacing = child;
1047         }
1048
1049         return (replacing);
1050 }
1051
1052 bool
1053 CaseFile::Replace(const char* vdev_type, const char* path, bool isspare) {
1054         nvlist_t *nvroot, *newvd;
1055         const char *poolname;
1056         string oldstr(VdevGUIDString());
1057         bool retval = true;
1058
1059         /* Figure out what pool we're working on */
1060         ZpoolList zpl(ZpoolList::ZpoolByGUID, &m_poolGUID);
1061         zpool_handle_t *zhp(zpl.empty() ? NULL : zpl.front());
1062         if (zhp == NULL) {
1063                 syslog(LOG_ERR, "CaseFile::Replace: could not find pool for "
1064                        "pool_guid %" PRIu64 ".", (uint64_t)m_poolGUID);
1065                 return (false);
1066         }
1067         poolname = zpool_get_name(zhp);
1068         Vdev vd(zhp, CaseVdev(zhp));
1069         Vdev replaced(BeingReplacedBy(zhp));
1070
1071         if (isspare && !vd.IsSpare() && !replaced.DoesNotExist()) {
1072                 /* If we are already being replaced by a working spare, pass. */
1073                 if (replaced.IsResilvering()
1074                  || replaced.State() == VDEV_STATE_HEALTHY) {
1075                         syslog(LOG_INFO, "CaseFile::Replace(%s->%s): already "
1076                             "replaced", VdevGUIDString().c_str(), path);
1077                         return (/*consumed*/false);
1078                 }
1079                 /*
1080                  * If we have already been replaced by a spare, but that spare
1081                  * is broken, we must spare the spare, not the original device.
1082                  */
1083                 oldstr = replaced.GUIDString();
1084                 syslog(LOG_INFO, "CaseFile::Replace(%s->%s): sparing "
1085                     "broken spare %s instead", VdevGUIDString().c_str(),
1086                     path, oldstr.c_str());
1087         }
1088
1089         /*
1090          * Build a root vdev/leaf vdev configuration suitable for
1091          * zpool_vdev_attach. Only enough data for the kernel to find
1092          * the device (i.e. type and disk device node path) are needed.
1093          */
1094         nvroot = NULL;
1095         newvd = NULL;
1096
1097         if (nvlist_alloc(&nvroot, NV_UNIQUE_NAME, 0) != 0
1098          || nvlist_alloc(&newvd, NV_UNIQUE_NAME, 0) != 0) {
1099                 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to allocate "
1100                     "configuration data.", poolname, oldstr.c_str());
1101                 if (nvroot != NULL)
1102                         nvlist_free(nvroot);
1103                 return (false);
1104         }
1105         if (nvlist_add_string(newvd, ZPOOL_CONFIG_TYPE, vdev_type) != 0
1106          || nvlist_add_string(newvd, ZPOOL_CONFIG_PATH, path) != 0
1107          || nvlist_add_string(nvroot, ZPOOL_CONFIG_TYPE, VDEV_TYPE_ROOT) != 0
1108          || nvlist_add_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
1109                                     &newvd, 1) != 0) {
1110                 syslog(LOG_ERR, "Replace vdev(%s/%s): Unable to initialize "
1111                     "configuration data.", poolname, oldstr.c_str());
1112                 nvlist_free(newvd);
1113                 nvlist_free(nvroot);
1114                 return (true);
1115         }
1116
1117         /* Data was copied when added to the root vdev. */
1118         nvlist_free(newvd);
1119
1120         retval = (zpool_vdev_attach(zhp, oldstr.c_str(), path, nvroot,
1121             /*replace*/B_TRUE) == 0);
1122         if (retval)
1123                 syslog(LOG_INFO, "Replacing vdev(%s/%s) with %s\n",
1124                     poolname, oldstr.c_str(), path);
1125         else
1126                 syslog(LOG_ERR, "Replace vdev(%s/%s): %s: %s\n",
1127                     poolname, oldstr.c_str(), libzfs_error_action(g_zfsHandle),
1128                     libzfs_error_description(g_zfsHandle));
1129         nvlist_free(nvroot);
1130
1131         return (retval);
1132 }
1133
1134 /* Does the argument event refer to a checksum error? */
1135 static bool
1136 IsChecksumEvent(const Event* const event)
1137 {
1138         return ("ereport.fs.zfs.checksum" == event->Value("type"));
1139 }
1140
1141 /* Does the argument event refer to an IO error? */
1142 static bool
1143 IsIOEvent(const Event* const event)
1144 {
1145         return ("ereport.fs.zfs.io" == event->Value("type"));
1146 }
1147
1148 bool
1149 CaseFile::ShouldDegrade() const
1150 {
1151         return (std::count_if(m_events.begin(), m_events.end(),
1152                               IsChecksumEvent) > ZFS_DEGRADE_IO_COUNT);
1153 }
1154
1155 bool
1156 CaseFile::ShouldFault() const
1157 {
1158         return (std::count_if(m_events.begin(), m_events.end(),
1159                               IsIOEvent) > ZFS_DEGRADE_IO_COUNT);
1160 }
1161
1162 nvlist_t *
1163 CaseFile::CaseVdev(zpool_handle_t *zhp) const
1164 {
1165         return (VdevIterator(zhp).Find(VdevGUID()));
1166 }