2 # This file and its contents are supplied under the terms of the
3 # Common Development and Distribution License ("CDDL"), version 1.0.
4 # You may only use this file in accordance with the terms of version
7 # A full copy of the text of the CDDL should have accompanied this
8 # source. A copy of the CDDL is also available via the Internet at
9 # http://www.illumos.org/license/CDDL.
13 # Copyright 2009 Sun Microsystems, Inc. All rights reserved.
14 # Use is subject to license terms.
15 # Copyright (c) 2012, 2019 by Delphix. All rights reserved.
16 # Copyright 2016 Nexenta Systems, Inc.
17 # Copyright (c) 2016, 2017 by Intel Corporation. All rights reserved.
18 # Copyright (c) 2017 Lawrence Livermore National Security, LLC.
19 # Copyright (c) 2017 Datto Inc.
20 # Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
21 # Copyright 2019 Richard Elling
25 # Returns SCSI host number for the given disk
27 function get_scsi_host #disk
30 ls /sys/block/${disk}/device/scsi_device | cut -d : -f 1
34 # Cause a scan of all scsi host adapters by default
36 # $1 optional host number
38 function scan_scsi_hosts
43 if [[ -z $hostnum ]]; then
44 for host in /sys/class/scsi_host/host*; do
45 log_must eval "echo '- - -' > $host/scan"
49 "echo /sys/class/scsi_host/host$hostnum/scan" \
52 "echo '- - -' > /sys/class/scsi_host/host$hostnum/scan"
58 # Wait for newly created block devices to have their minors created.
59 # Additional arguments can be passed to udevadm trigger, with the expected
60 # arguments to typically be a block device pathname. This is useful when
61 # checking waiting on a specific device to settle rather than triggering
62 # all devices and waiting for them all to settle.
64 # The udevadm settle timeout can be 120 or 180 seconds by default for
65 # some distros. If a long delay is experienced, it could be due to some
66 # strangeness in a malfunctioning device that isn't related to the devices
67 # under test. To help debug this condition, a notice is given if settle takes
70 # Note: there is no meaningful return code if udevadm fails. Consumers
71 # should not expect a return code (do not call as argument to log_must)
73 function block_device_wait
77 typeset start=$SECONDS
79 typeset elapsed=$((SECONDS - start))
80 [[ $elapsed > 60 ]] && \
81 log_note udevadm settle time too long: $elapsed
83 if [[ ${#@} -eq 0 ]]; then
84 # Do something that has to go through the geom event
86 sysctl kern.geom.conftxt >/dev/null
90 # Poll for the given paths to appear, but give up eventually.
92 for (( i = 0; i < 5; ++i )); do
96 if ! [[ -f $dev ]]; then
109 # Check if the given device is physical device
111 function is_physical_device #device
113 typeset device=${1#$DEV_DSKDIR/}
114 device=${device#$DEV_RDSKDIR/}
117 is_disk_device "$DEV_DSKDIR/$device" && \
118 [[ -f /sys/module/loop/parameters/max_part ]]
120 elif is_freebsd; then
121 is_disk_device "$DEV_DSKDIR/$device" && \
122 echo $device | egrep -q \
131 echo $device | egrep "^c[0-F]+([td][0-F]+)+$" > /dev/null 2>&1
137 # Check if the given device is a real device (ie SCSI device)
139 function is_real_device #disk
142 [[ -z $disk ]] && log_fail "No argument for disk given."
145 lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \
146 egrep disk >/dev/null
152 # Check if the given device is a loop device
154 function is_loop_device #disk
157 [[ -z $disk ]] && log_fail "No argument for disk given."
160 lsblk $DEV_RDSKDIR/$disk -o TYPE 2>/dev/null | \
161 egrep loop >/dev/null
168 # Check if the given device is a multipath device and if there is a symbolic
169 # link to a device mapper and to a disk
170 # Currently no support for dm devices alone without multipath
173 # Check if the given device is a gmultipath device.
176 # No multipath detection.
178 function is_mpath_device #disk
181 [[ -z $disk ]] && log_fail "No argument for disk given."
184 lsblk $DEV_MPATHDIR/$disk -o TYPE 2>/dev/null | \
185 egrep mpath >/dev/null
187 readlink $DEV_MPATHDIR/$disk > /dev/null 2>&1
192 elif is_freebsd; then
193 is_disk_device $DEV_MPATHDIR/$disk
200 # Check if the given path is the appropriate sort of device special node.
202 function is_disk_device #path
207 # FreeBSD doesn't have block devices, only character devices.
214 # Set the slice prefix for disk partitioning depending
215 # on whether the device is a real, multipath, or loop device.
216 # Currently all disks have to be of the same type, so only
217 # checks first disk to determine slice prefix.
219 function set_slice_prefix
225 while (( i < $DISK_ARRAY_NUM )); do
226 disk="$(echo $DISKS | nawk '{print $(i + 1)}')"
227 if ( is_mpath_device $disk ) && [[ -z $(echo $disk | awk 'substr($1,18,1)\
228 ~ /^[[:digit:]]+$/') ]] || ( is_real_device $disk ); then
229 export SLICE_PREFIX=""
231 elif ( is_mpath_device $disk || is_loop_device \
233 export SLICE_PREFIX="p"
236 log_fail "$disk not supported for partitioning."
244 # Set the directory path of the listed devices in $DISK_ARRAY_NUM
245 # Currently all disks have to be of the same type, so only
246 # checks first disk to determine device directory
247 # default = /dev (linux)
248 # real disk = /dev (linux)
249 # multipath device = /dev/mapper (linux)
251 function set_device_dir
257 while (( i < $DISK_ARRAY_NUM )); do
258 disk="$(echo $DISKS | nawk '{print $(i + 1)}')"
259 if is_mpath_device $disk; then
260 export DEV_DSKDIR=$DEV_MPATHDIR
263 export DEV_DSKDIR=$DEV_RDSKDIR
269 export DEV_DSKDIR=$DEV_RDSKDIR
274 # Get the directory path of given device
276 function get_device_dir #device
280 if ! is_freebsd && ! is_physical_device $device; then
281 if [[ $device != "/" ]]; then
284 if is_disk_device "$DEV_DSKDIR/$device"; then
294 # Get persistent name for given disk
296 function get_persistent_disk_name #device
302 if is_real_device $device; then
303 dev_id="$(udevadm info -q all -n $DEV_DSKDIR/$device \
304 | egrep disk/by-id | nawk '{print $2; exit}' \
305 | nawk -F / '{print $3}')"
307 elif is_mpath_device $device; then
308 dev_id="$(udevadm info -q all -n $DEV_DSKDIR/$device \
309 | egrep disk/by-id/dm-uuid \
310 | nawk '{print $2; exit}' \
311 | nawk -F / '{print $3}')"
322 # Online or offline a disk on the system
324 # First checks state of disk. Test will fail if disk is not properly onlined
325 # or offlined. Online is a full rescan of SCSI disks by echoing to every
328 function on_off_disk # disk state{online,offline} host
334 [[ -z $disk ]] || [[ -z $state ]] && \
335 log_fail "Arguments invalid or missing"
338 if [[ $state == "offline" ]] && ( is_mpath_device $disk ); then
339 dm_name="$(readlink $DEV_DSKDIR/$disk \
340 | nawk -F / '{print $2}')"
341 dep="$(ls /sys/block/${dm_name}/slaves \
342 | nawk '{print $1}')"
343 while [[ -n $dep ]]; do
344 #check if disk is online
345 lsscsi | egrep $dep > /dev/null
347 dep_dir="/sys/block/${dm_name}"
348 dep_dir+="/slaves/${dep}/device"
349 ss="${dep_dir}/state"
350 sd="${dep_dir}/delete"
351 log_must eval "echo 'offline' > ${ss}"
352 log_must eval "echo '1' > ${sd}"
353 lsscsi | egrep $dep > /dev/null
355 log_fail "Offlining" \
359 dep="$(ls /sys/block/$dm_name/slaves \
360 2>/dev/null | nawk '{print $1}')"
362 elif [[ $state == "offline" ]] && ( is_real_device $disk ); then
363 #check if disk is online
364 lsscsi | egrep $disk > /dev/null
366 dev_state="/sys/block/$disk/device/state"
367 dev_delete="/sys/block/$disk/device/delete"
368 log_must eval "echo 'offline' > ${dev_state}"
369 log_must eval "echo '1' > ${dev_delete}"
370 lsscsi | egrep $disk > /dev/null
372 log_fail "Offlining $disk" \
376 log_note "$disk is already offline"
378 elif [[ $state == "online" ]]; then
380 scan_scsi_hosts $host
382 if is_mpath_device $disk; then
383 dm_name="$(readlink $DEV_DSKDIR/$disk \
384 | nawk -F / '{print $2}')"
385 dep="$(ls /sys/block/$dm_name/slaves \
386 | nawk '{print $1}')"
387 lsscsi | egrep $dep > /dev/null
389 log_fail "Onlining $disk failed"
391 elif is_real_device $disk; then
394 while ! lsscsi | egrep -q $disk; do
395 if (( $retries > 2 )); then
396 log_fail "Onlining $disk failed"
403 log_fail "$disk is not a real dev"
406 log_fail "$disk failed to $state"
412 # Simulate disk removal
414 function remove_disk #disk
417 on_off_disk $disk "offline"
422 # Simulate disk insertion for the given SCSI host
424 function insert_disk #disk scsi_host
428 on_off_disk $disk "online" $scsi_host
433 # Load scsi_debug module with specified parameters
434 # $blksz can be either one of: < 512b | 512e | 4Kn >
436 function load_scsi_debug # dev_size_mb add_host num_tgts max_luns blksz
444 [[ -z $devsize ]] || [[ -z $hosts ]] || [[ -z $tgts ]] || \
445 [[ -z $luns ]] || [[ -z $blksz ]] && \
446 log_fail "Arguments invalid or missing"
461 *) log_fail "Unsupported blksz value: $5" ;;
465 modprobe -n scsi_debug
467 log_unsupported "Platform does not have scsi_debug"
470 lsmod | egrep scsi_debug > /dev/null
472 log_fail "scsi_debug module already installed"
474 log_must modprobe scsi_debug dev_size_mb=$devsize \
475 add_host=$hosts num_tgts=$tgts max_luns=$luns \
476 sector_size=$sector physblk_exp=$blkexp
478 lsscsi | egrep scsi_debug > /dev/null
480 log_fail "scsi_debug module install failed"
487 # Unload scsi_debug module, if needed.
489 function unload_scsi_debug
491 log_must_retry "in use" 5 modprobe -r scsi_debug
495 # Get scsi_debug device name.
496 # Returns basename of scsi_debug device (for example "sdb").
498 function get_debug_device
500 for i in {1..10} ; do
501 val=$(lsscsi | nawk '/scsi_debug/ {print $6; exit}' | cut -d / -f3)
503 # lsscsi can take time to settle
504 if [ "$val" != "-" ] ; then
513 # Get actual devices used by the pool (i.e. linux sdb1 not sdb).
515 function get_pool_devices #testpool #devdir
521 if is_linux || is_freebsd; then
522 out=$(zpool status -P $testpool |grep ${devdir} | awk '{print $1}')
523 out=$(echo $out | sed -e "s|${devdir}/||g" | tr '\n' ' ')
529 # Write to standard out giving the level, device name, offset and length
530 # of all blocks in an input file. The offset and length are in units of
531 # 512 byte blocks. In the case of mirrored vdevs, only the first
532 # device is listed, as the levels, blocks and offsets will be the same
533 # on other devices. Note that this function only works with mirrored
534 # or non-redundant pools, not raidz.
536 # The output of this function can be used to introduce corruption at
537 # varying levels of indirection.
539 function list_file_blocks # input_file
541 typeset input_file=$1
543 [[ -f $input_file ]] || log_fail "Couldn't find $input_file"
545 typeset ds="$(zfs list -H -o name $input_file)"
546 typeset pool="${ds%%/*}"
547 typeset objnum="$(get_objnum $input_file)"
550 # Establish a mapping between vdev ids as shown in a DVA and the
551 # pathnames they correspond to in ${VDEV_MAP[]}.
553 eval $(zdb -C $pool | awk '
555 printf("typeset VDEV_MAP\n");
562 /path: / && looking == 1 {
566 ' | sed -n 's/^children\[\([0-9]\)\]: \(.*\)$/VDEV_MAP[\1]=\2/p')
569 # The awk below parses the output of zdb, printing out the level
570 # of each block along with vdev id, offset and length. The last
571 # two are converted to decimal in the while loop. 4M is added to
572 # the offset to compensate for the first two labels and boot
573 # block. Lastly, the offset and length are printed in units of
574 # 512b blocks for ease of use with dd.
576 log_must zpool sync -f
577 typeset level path offset length
578 zdb -ddddd $ds $objnum | awk -F: '
579 BEGIN { looking = 0 }
580 /^Indirect blocks:/ { looking = 1}
581 /^\t\tsegment / { looking = 0}
582 /L[0-8]/ && looking == 1 { print $0}
583 ' | sed -n 's/^.*\(L[0-9]\) \([0-9]*\):\([0-9a-f]*\):\([0-9a-f]*\) .*$/\1 \2 \3 \4/p' | \
584 while read level path offset length; do
585 offset=$((16#$offset)) # Conversion from hex
586 length=$((16#$length))
587 offset="$(((offset + 4 * 1024 * 1024) / 512))"
588 length="$((length / 512))"
589 echo "$level ${VDEV_MAP[$path]} $offset $length"
593 function corrupt_blocks_at_level # input_file corrupt_level
595 typeset input_file=$1
596 typeset corrupt_level="L${2:-0}"
597 typeset level path offset length
599 [[ -f $input_file ]] || log_fail "Couldn't find $input_file"
602 # Temporarily allow corrupting an inuse device.
603 debugflags=$(sysctl -n kern.geom.debugflags)
604 sysctl kern.geom.debugflags=16
607 list_file_blocks $input_file | \
608 while read level path offset length; do
609 if [[ $level = $corrupt_level ]]; then
610 log_must dd if=/dev/urandom of=$path bs=512 \
611 count=$length seek=$offset conv=notrunc
616 sysctl kern.geom.debugflags=$debugflags
619 # This is necessary for pools made of loop devices.