]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zio_inject.c
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / sys / cddl / contrib / opensolaris / uts / common / fs / zfs / zio_inject.c
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright (c) 2013 by Delphix. All rights reserved.
24  */
25
26 /*
27  * ZFS fault injection
28  *
29  * To handle fault injection, we keep track of a series of zinject_record_t
30  * structures which describe which logical block(s) should be injected with a
31  * fault.  These are kept in a global list.  Each record corresponds to a given
32  * spa_t and maintains a special hold on the spa_t so that it cannot be deleted
33  * or exported while the injection record exists.
34  *
35  * Device level injection is done using the 'zi_guid' field.  If this is set, it
36  * means that the error is destined for a particular device, not a piece of
37  * data.
38  *
39  * This is a rather poor data structure and algorithm, but we don't expect more
40  * than a few faults at any one time, so it should be sufficient for our needs.
41  */
42
43 #include <sys/arc.h>
44 #include <sys/zio_impl.h>
45 #include <sys/zfs_ioctl.h>
46 #include <sys/vdev_impl.h>
47 #include <sys/dmu_objset.h>
48 #include <sys/fs/zfs.h>
49
50 uint32_t zio_injection_enabled;
51
52 typedef struct inject_handler {
53         int                     zi_id;
54         spa_t                   *zi_spa;
55         zinject_record_t        zi_record;
56         list_node_t             zi_link;
57 } inject_handler_t;
58
59 static list_t inject_handlers;
60 static krwlock_t inject_lock;
61 static int inject_next_id = 1;
62
63 /*
64  * Returns true if the given record matches the I/O in progress.
65  */
66 static boolean_t
67 zio_match_handler(zbookmark_t *zb, uint64_t type,
68     zinject_record_t *record, int error)
69 {
70         /*
71          * Check for a match against the MOS, which is based on type
72          */
73         if (zb->zb_objset == DMU_META_OBJSET &&
74             record->zi_objset == DMU_META_OBJSET &&
75             record->zi_object == DMU_META_DNODE_OBJECT) {
76                 if (record->zi_type == DMU_OT_NONE ||
77                     type == record->zi_type)
78                         return (record->zi_freq == 0 ||
79                             spa_get_random(100) < record->zi_freq);
80                 else
81                         return (B_FALSE);
82         }
83
84         /*
85          * Check for an exact match.
86          */
87         if (zb->zb_objset == record->zi_objset &&
88             zb->zb_object == record->zi_object &&
89             zb->zb_level == record->zi_level &&
90             zb->zb_blkid >= record->zi_start &&
91             zb->zb_blkid <= record->zi_end &&
92             error == record->zi_error)
93                 return (record->zi_freq == 0 ||
94                     spa_get_random(100) < record->zi_freq);
95
96         return (B_FALSE);
97 }
98
99 /*
100  * Panic the system when a config change happens in the function
101  * specified by tag.
102  */
103 void
104 zio_handle_panic_injection(spa_t *spa, char *tag, uint64_t type)
105 {
106         inject_handler_t *handler;
107
108         rw_enter(&inject_lock, RW_READER);
109
110         for (handler = list_head(&inject_handlers); handler != NULL;
111             handler = list_next(&inject_handlers, handler)) {
112
113                 if (spa != handler->zi_spa)
114                         continue;
115
116                 if (handler->zi_record.zi_type == type &&
117                     strcmp(tag, handler->zi_record.zi_func) == 0)
118                         panic("Panic requested in function %s\n", tag);
119         }
120
121         rw_exit(&inject_lock);
122 }
123
124 /*
125  * Determine if the I/O in question should return failure.  Returns the errno
126  * to be returned to the caller.
127  */
128 int
129 zio_handle_fault_injection(zio_t *zio, int error)
130 {
131         int ret = 0;
132         inject_handler_t *handler;
133
134         /*
135          * Ignore I/O not associated with any logical data.
136          */
137         if (zio->io_logical == NULL)
138                 return (0);
139
140         /*
141          * Currently, we only support fault injection on reads.
142          */
143         if (zio->io_type != ZIO_TYPE_READ)
144                 return (0);
145
146         rw_enter(&inject_lock, RW_READER);
147
148         for (handler = list_head(&inject_handlers); handler != NULL;
149             handler = list_next(&inject_handlers, handler)) {
150
151                 if (zio->io_spa != handler->zi_spa ||
152                     handler->zi_record.zi_cmd != ZINJECT_DATA_FAULT)
153                         continue;
154
155                 /* If this handler matches, return EIO */
156                 if (zio_match_handler(&zio->io_logical->io_bookmark,
157                     zio->io_bp ? BP_GET_TYPE(zio->io_bp) : DMU_OT_NONE,
158                     &handler->zi_record, error)) {
159                         ret = error;
160                         break;
161                 }
162         }
163
164         rw_exit(&inject_lock);
165
166         return (ret);
167 }
168
169 /*
170  * Determine if the zio is part of a label update and has an injection
171  * handler associated with that portion of the label. Currently, we
172  * allow error injection in either the nvlist or the uberblock region of
173  * of the vdev label.
174  */
175 int
176 zio_handle_label_injection(zio_t *zio, int error)
177 {
178         inject_handler_t *handler;
179         vdev_t *vd = zio->io_vd;
180         uint64_t offset = zio->io_offset;
181         int label;
182         int ret = 0;
183
184         if (offset >= VDEV_LABEL_START_SIZE &&
185             offset < vd->vdev_psize - VDEV_LABEL_END_SIZE)
186                 return (0);
187
188         rw_enter(&inject_lock, RW_READER);
189
190         for (handler = list_head(&inject_handlers); handler != NULL;
191             handler = list_next(&inject_handlers, handler)) {
192                 uint64_t start = handler->zi_record.zi_start;
193                 uint64_t end = handler->zi_record.zi_end;
194
195                 if (handler->zi_record.zi_cmd != ZINJECT_LABEL_FAULT)
196                         continue;
197
198                 /*
199                  * The injection region is the relative offsets within a
200                  * vdev label. We must determine the label which is being
201                  * updated and adjust our region accordingly.
202                  */
203                 label = vdev_label_number(vd->vdev_psize, offset);
204                 start = vdev_label_offset(vd->vdev_psize, label, start);
205                 end = vdev_label_offset(vd->vdev_psize, label, end);
206
207                 if (zio->io_vd->vdev_guid == handler->zi_record.zi_guid &&
208                     (offset >= start && offset <= end)) {
209                         ret = error;
210                         break;
211                 }
212         }
213         rw_exit(&inject_lock);
214         return (ret);
215 }
216
217
218 int
219 zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
220 {
221         inject_handler_t *handler;
222         int ret = 0;
223
224         /*
225          * We skip over faults in the labels unless it's during
226          * device open (i.e. zio == NULL).
227          */
228         if (zio != NULL) {
229                 uint64_t offset = zio->io_offset;
230
231                 if (offset < VDEV_LABEL_START_SIZE ||
232                     offset >= vd->vdev_psize - VDEV_LABEL_END_SIZE)
233                         return (0);
234         }
235
236         rw_enter(&inject_lock, RW_READER);
237
238         for (handler = list_head(&inject_handlers); handler != NULL;
239             handler = list_next(&inject_handlers, handler)) {
240
241                 if (handler->zi_record.zi_cmd != ZINJECT_DEVICE_FAULT)
242                         continue;
243
244                 if (vd->vdev_guid == handler->zi_record.zi_guid) {
245                         if (handler->zi_record.zi_failfast &&
246                             (zio == NULL || (zio->io_flags &
247                             (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) {
248                                 continue;
249                         }
250
251                         /* Handle type specific I/O failures */
252                         if (zio != NULL &&
253                             handler->zi_record.zi_iotype != ZIO_TYPES &&
254                             handler->zi_record.zi_iotype != zio->io_type)
255                                 continue;
256
257                         if (handler->zi_record.zi_error == error) {
258                                 /*
259                                  * For a failed open, pretend like the device
260                                  * has gone away.
261                                  */
262                                 if (error == ENXIO)
263                                         vd->vdev_stat.vs_aux =
264                                             VDEV_AUX_OPEN_FAILED;
265
266                                 /*
267                                  * Treat these errors as if they had been
268                                  * retried so that all the appropriate stats
269                                  * and FMA events are generated.
270                                  */
271                                 if (!handler->zi_record.zi_failfast &&
272                                     zio != NULL)
273                                         zio->io_flags |= ZIO_FLAG_IO_RETRY;
274
275                                 ret = error;
276                                 break;
277                         }
278                         if (handler->zi_record.zi_error == ENXIO) {
279                                 ret = SET_ERROR(EIO);
280                                 break;
281                         }
282                 }
283         }
284
285         rw_exit(&inject_lock);
286
287         return (ret);
288 }
289
290 /*
291  * Simulate hardware that ignores cache flushes.  For requested number
292  * of seconds nix the actual writing to disk.
293  */
294 void
295 zio_handle_ignored_writes(zio_t *zio)
296 {
297         inject_handler_t *handler;
298
299         rw_enter(&inject_lock, RW_READER);
300
301         for (handler = list_head(&inject_handlers); handler != NULL;
302             handler = list_next(&inject_handlers, handler)) {
303
304                 /* Ignore errors not destined for this pool */
305                 if (zio->io_spa != handler->zi_spa ||
306                     handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
307                         continue;
308
309                 /*
310                  * Positive duration implies # of seconds, negative
311                  * a number of txgs
312                  */
313                 if (handler->zi_record.zi_timer == 0) {
314                         if (handler->zi_record.zi_duration > 0)
315                                 handler->zi_record.zi_timer = ddi_get_lbolt64();
316                         else
317                                 handler->zi_record.zi_timer = zio->io_txg;
318                 }
319
320                 /* Have a "problem" writing 60% of the time */
321                 if (spa_get_random(100) < 60)
322                         zio->io_pipeline &= ~ZIO_VDEV_IO_STAGES;
323                 break;
324         }
325
326         rw_exit(&inject_lock);
327 }
328
329 void
330 spa_handle_ignored_writes(spa_t *spa)
331 {
332         inject_handler_t *handler;
333
334         if (zio_injection_enabled == 0)
335                 return;
336
337         rw_enter(&inject_lock, RW_READER);
338
339         for (handler = list_head(&inject_handlers); handler != NULL;
340             handler = list_next(&inject_handlers, handler)) {
341
342                 if (spa != handler->zi_spa ||
343                     handler->zi_record.zi_cmd != ZINJECT_IGNORED_WRITES)
344                         continue;
345
346                 if (handler->zi_record.zi_duration > 0) {
347                         VERIFY(handler->zi_record.zi_timer == 0 ||
348                             handler->zi_record.zi_timer +
349                             handler->zi_record.zi_duration * hz >
350                             ddi_get_lbolt64());
351                 } else {
352                         /* duration is negative so the subtraction here adds */
353                         VERIFY(handler->zi_record.zi_timer == 0 ||
354                             handler->zi_record.zi_timer -
355                             handler->zi_record.zi_duration >=
356                             spa_syncing_txg(spa));
357                 }
358         }
359
360         rw_exit(&inject_lock);
361 }
362
363 uint64_t
364 zio_handle_io_delay(zio_t *zio)
365 {
366         vdev_t *vd = zio->io_vd;
367         inject_handler_t *handler;
368         uint64_t seconds = 0;
369
370         if (zio_injection_enabled == 0)
371                 return (0);
372
373         rw_enter(&inject_lock, RW_READER);
374
375         for (handler = list_head(&inject_handlers); handler != NULL;
376             handler = list_next(&inject_handlers, handler)) {
377
378                 if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO)
379                         continue;
380
381                 if (vd->vdev_guid == handler->zi_record.zi_guid) {
382                         seconds = handler->zi_record.zi_timer;
383                         break;
384                 }
385
386         }
387         rw_exit(&inject_lock);
388         return (seconds);
389 }
390
391 /*
392  * Create a new handler for the given record.  We add it to the list, adding
393  * a reference to the spa_t in the process.  We increment zio_injection_enabled,
394  * which is the switch to trigger all fault injection.
395  */
396 int
397 zio_inject_fault(char *name, int flags, int *id, zinject_record_t *record)
398 {
399         inject_handler_t *handler;
400         int error;
401         spa_t *spa;
402
403         /*
404          * If this is pool-wide metadata, make sure we unload the corresponding
405          * spa_t, so that the next attempt to load it will trigger the fault.
406          * We call spa_reset() to unload the pool appropriately.
407          */
408         if (flags & ZINJECT_UNLOAD_SPA)
409                 if ((error = spa_reset(name)) != 0)
410                         return (error);
411
412         if (!(flags & ZINJECT_NULL)) {
413                 /*
414                  * spa_inject_ref() will add an injection reference, which will
415                  * prevent the pool from being removed from the namespace while
416                  * still allowing it to be unloaded.
417                  */
418                 if ((spa = spa_inject_addref(name)) == NULL)
419                         return (SET_ERROR(ENOENT));
420
421                 handler = kmem_alloc(sizeof (inject_handler_t), KM_SLEEP);
422
423                 rw_enter(&inject_lock, RW_WRITER);
424
425                 *id = handler->zi_id = inject_next_id++;
426                 handler->zi_spa = spa;
427                 handler->zi_record = *record;
428                 list_insert_tail(&inject_handlers, handler);
429                 atomic_add_32(&zio_injection_enabled, 1);
430
431                 rw_exit(&inject_lock);
432         }
433
434         /*
435          * Flush the ARC, so that any attempts to read this data will end up
436          * going to the ZIO layer.  Note that this is a little overkill, but
437          * we don't have the necessary ARC interfaces to do anything else, and
438          * fault injection isn't a performance critical path.
439          */
440         if (flags & ZINJECT_FLUSH_ARC)
441                 arc_flush(NULL);
442
443         return (0);
444 }
445
446 /*
447  * Returns the next record with an ID greater than that supplied to the
448  * function.  Used to iterate over all handlers in the system.
449  */
450 int
451 zio_inject_list_next(int *id, char *name, size_t buflen,
452     zinject_record_t *record)
453 {
454         inject_handler_t *handler;
455         int ret;
456
457         mutex_enter(&spa_namespace_lock);
458         rw_enter(&inject_lock, RW_READER);
459
460         for (handler = list_head(&inject_handlers); handler != NULL;
461             handler = list_next(&inject_handlers, handler))
462                 if (handler->zi_id > *id)
463                         break;
464
465         if (handler) {
466                 *record = handler->zi_record;
467                 *id = handler->zi_id;
468                 (void) strncpy(name, spa_name(handler->zi_spa), buflen);
469                 ret = 0;
470         } else {
471                 ret = SET_ERROR(ENOENT);
472         }
473
474         rw_exit(&inject_lock);
475         mutex_exit(&spa_namespace_lock);
476
477         return (ret);
478 }
479
480 /*
481  * Clear the fault handler with the given identifier, or return ENOENT if none
482  * exists.
483  */
484 int
485 zio_clear_fault(int id)
486 {
487         inject_handler_t *handler;
488
489         rw_enter(&inject_lock, RW_WRITER);
490
491         for (handler = list_head(&inject_handlers); handler != NULL;
492             handler = list_next(&inject_handlers, handler))
493                 if (handler->zi_id == id)
494                         break;
495
496         if (handler == NULL) {
497                 rw_exit(&inject_lock);
498                 return (SET_ERROR(ENOENT));
499         }
500
501         list_remove(&inject_handlers, handler);
502         rw_exit(&inject_lock);
503
504         spa_inject_delref(handler->zi_spa);
505         kmem_free(handler, sizeof (inject_handler_t));
506         atomic_add_32(&zio_injection_enabled, -1);
507
508         return (0);
509 }
510
511 void
512 zio_inject_init(void)
513 {
514         rw_init(&inject_lock, NULL, RW_DEFAULT, NULL);
515         list_create(&inject_handlers, sizeof (inject_handler_t),
516             offsetof(inject_handler_t, zi_link));
517 }
518
519 void
520 zio_inject_fini(void)
521 {
522         list_destroy(&inject_handlers);
523         rw_destroy(&inject_lock);
524 }