2 # This file and its contents are supplied under the terms of the
3 # Common Development and Distribution License ("CDDL"), version 1.0.
4 # You may only use this file in accordance with the terms of version
7 # A full copy of the text of the CDDL should have accompanied this
8 # source. A copy of the CDDL is also available via the Internet at
9 # http://www.illumos.org/license/CDDL.
13 # Copyright (c) 2015, 2016 by Delphix. All rights reserved.
14 # Copyright (c) 2016, Intel Corporation.
17 . $STF_SUITE/include/libtest.shlib
19 # If neither is specified, do a nightly run.
20 [[ -z $PERF_REGRESSION_WEEKLY ]] && export PERF_REGRESSION_NIGHTLY=1
22 # Default runtime for each type of test run.
23 export PERF_RUNTIME_WEEKLY=$((30 * 60))
24 export PERF_RUNTIME_NIGHTLY=$((10 * 60))
26 # Default to JSON for fio output
27 export PERF_FIO_FORMAT=${PERF_FIO_FORMAT:-'json'}
29 # Default fs creation options
30 export PERF_FS_OPTS=${PERF_FS_OPTS:-'-o recsize=8k -o compress=lz4' \
31 ' -o checksum=sha256 -o redundant_metadata=most'}
38 [[ $sync -eq 0 ]] && sync_str='async'
39 [[ $sync -eq 1 ]] && sync_str='sync'
49 typeset sync_str=$(get_sync_str $sync)
50 typeset filesystems=$(get_nfilesystems)
52 typeset suffix="$sync_str.$iosize-ios"
53 suffix="$suffix.$threads-threads.$filesystems-filesystems"
57 function do_fio_run_impl
60 typeset do_recreate=$2
61 typeset clear_cache=$3
64 typeset threads_per_fs=$5
68 typeset sync_str=$(get_sync_str $sync)
69 log_note "Running with $threads $sync_str threads, $iosize ios"
71 if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
72 log_must test $do_recreate
73 verify_threads_per_fs $threads $threads_per_fs
80 # A value of zero for "threads_per_fs" is "special", and
81 # means a single filesystem should be used, regardless
82 # of the number of threads.
84 if [[ -n $threads_per_fs && $threads_per_fs -ne 0 ]]; then
85 populate_perf_filesystems $((threads / threads_per_fs))
87 populate_perf_filesystems 1
93 zpool export $PERFPOOL
94 zpool import $PERFPOOL
97 if [[ -n $ZINJECT_DELAYS ]]; then
100 log_note "No per-device commands to execute."
104 # Allow this to be overridden by the individual test case. This
105 # can be used to run the FIO job against something other than
106 # the default filesystem (e.g. against a clone).
108 export DIRECTORY=$(get_directory)
109 log_note "DIRECTORY: " $DIRECTORY
111 export RUNTIME=$PERF_RUNTIME
112 export RANDSEED=$PERF_RANDSEED
113 export COMPPERCENT=$PERF_COMPPERCENT
114 export COMPCHUNK=$PERF_COMPCHUNK
115 export FILESIZE=$((TOTAL_SIZE / threads))
116 export NUMJOBS=$threads
117 export SYNC_TYPE=$sync
118 export BLOCKSIZE=$iosize
121 # When running locally, we want to keep the default behavior of
122 # DIRECT == 0, so only set it when we're running over NFS to
123 # disable client cache for reads.
124 if [[ $NFS -eq 1 ]]; then
131 # This will be part of the output filename.
132 typeset suffix=$(get_suffix $threads $sync $iosize)
134 # Start the data collection
135 do_collect_scripts $suffix
138 typeset logbase="$(get_perf_output_dir)/$(basename \
140 typeset outfile="$logbase.fio.$suffix"
143 if [[ $NFS -eq 1 ]]; then
144 log_must ssh -t $NFS_USER@$NFS_CLIENT "
145 fio --output-format=${PERF_FIO_FORMAT} \
146 --output /tmp/fio.out /tmp/test.fio
148 log_must scp $NFS_USER@$NFS_CLIENT:/tmp/fio.out $outfile
149 log_must ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
151 log_must fio --output-format=${PERF_FIO_FORMAT} \
152 --output $outfile $FIO_SCRIPTS/$script
157 # This function will run fio in a loop, according to the .fio file passed
158 # in and a number of environment variables. The following variables can be
159 # set before launching zfstest to override the defaults.
161 # PERF_RUNTIME: The time in seconds each fio invocation should run.
162 # PERF_RUNTYPE: A human readable tag that appears in logs. The defaults are
163 # nightly and weekly.
164 # PERF_NTHREADS: A list of how many threads each fio invocation will use.
165 # PERF_SYNC_TYPES: Whether to use (O_SYNC) or not. 1 is sync IO, 0 is async IO.
166 # PERF_IOSIZES: A list of blocksizes in which each fio invocation will do IO.
167 # PERF_COLLECT_SCRIPTS: A comma delimited list of 'command args, logfile_tag'
168 # pairs that will be added to the scripts specified in each test.
173 typeset do_recreate=$2
174 typeset clear_cache=$3
175 typeset threads threads_per_fs sync iosize
177 for threads in $PERF_NTHREADS; do
178 for threads_per_fs in $PERF_NTHREADS_PER_FS; do
179 for sync in $PERF_SYNC_TYPES; do
180 for iosize in $PERF_IOSIZES; do
195 # This function sets NFS mount on the client and make sure all correct
196 # permissions are in place
198 function do_setup_nfs
201 zfs set sharenfs=on $TESTFS
202 log_must chmod -R 777 /$TESTFS
204 ssh -t $NFS_USER@$NFS_CLIENT "mkdir -m 777 -p $NFS_MOUNT"
205 ssh -t $NFS_USER@$NFS_CLIENT "sudo -S umount $NFS_MOUNT"
206 log_must ssh -t $NFS_USER@$NFS_CLIENT "
207 sudo -S mount $NFS_OPTIONS $NFS_SERVER:/$TESTFS $NFS_MOUNT
210 # The variables in the fio script are only available in our current
211 # shell session, so we have to evaluate them here before copying
212 # the resulting script over to the target machine.
214 export jobnum='$jobnum'
217 done < $FIO_SCRIPTS/$script > /tmp/test.fio
218 log_must sed -i -e "s%directory.*%directory=$NFS_MOUNT%" /tmp/test.fio
219 log_must scp /tmp/test.fio $NFS_USER@$NFS_CLIENT:/tmp
220 log_must rm /tmp/test.fio
224 # This function iterates through the value pairs in $PERF_COLLECT_SCRIPTS.
225 # The script at index N is launched in the background, with its output
226 # redirected to a logfile containing the tag specified at index N + 1.
228 function do_collect_scripts
232 [[ -n $collect_scripts ]] || log_fail "No data collection scripts."
233 [[ -n $PERF_RUNTIME ]] || log_fail "No runtime specified."
235 # Add in user supplied scripts and logfiles, if any.
238 for item in $PERF_COLLECT_SCRIPTS; do
239 collect_scripts+=($(echo $item | sed 's/^ *//g'))
244 while [[ $idx -lt "${#collect_scripts[@]}" ]]; do
245 typeset logbase="$(get_perf_output_dir)/$(basename \
247 typeset outfile="$logbase.${collect_scripts[$idx + 1]}.$suffix"
249 timeout $PERF_RUNTIME ${collect_scripts[$idx]} >$outfile 2>&1 &
253 # Need to explicitly return 0 because timeout(1) will kill
254 # a child process and cause us to return non-zero.
258 # Find a place to deposit performance data collected while under load.
259 function get_perf_output_dir
261 typeset dir="$(pwd)/perf_data"
262 [[ -d $dir ]] || mkdir -p $dir
267 function apply_zinject_delays
270 while [[ $idx -lt "${#ZINJECT_DELAYS[@]}" ]]; do
271 [[ -n ${ZINJECT_DELAYS[$idx]} ]] || \
272 log_must "No zinject delay found at index: $idx"
274 for disk in $DISKS; do
276 -d $disk -D ${ZINJECT_DELAYS[$idx]} $PERFPOOL
283 function clear_zinject_delays
285 log_must zinject -c all
289 # Destroy and create the pool used for performance tests.
291 function recreate_perf_pool
293 [[ -n $PERFPOOL ]] || log_fail "The \$PERFPOOL variable isn't set."
296 # In case there's been some "leaked" zinject delays, or if the
297 # performance test injected some delays itself, we clear all
298 # delays before attempting to destroy the pool. Each delay
299 # places a hold on the pool, so the destroy will fail if there
300 # are any outstanding delays.
305 # This function handles the case where the pool already exists,
306 # and will destroy the previous pool and recreate a new pool.
308 create_pool $PERFPOOL $DISKS
311 function verify_threads_per_fs
314 typeset threads_per_fs=$2
316 log_must test -n $threads
317 log_must test -n $threads_per_fs
320 # A value of "0" is treated as a "special value", and it is
321 # interpreted to mean all threads will run using a single
324 [[ $threads_per_fs -eq 0 ]] && return
327 # The number of threads per filesystem must be a value greater
328 # than or equal to zero; since we just verified the value isn't
329 # 0 above, then it must be greater than zero here.
331 log_must test $threads_per_fs -ge 0
334 # This restriction can be lifted later if needed, but for now,
335 # we restrict the number of threads per filesystem to a value
336 # that evenly divides the thread count. This way, the threads
337 # will be evenly distributed over all the filesystems.
339 log_must test $((threads % threads_per_fs)) -eq 0
342 function populate_perf_filesystems
344 typeset nfilesystems=${1:-1}
347 for i in $(seq 1 $nfilesystems); do
348 typeset dataset="$PERFPOOL/fs$i"
349 create_dataset $dataset $PERF_FS_OPTS
350 if [[ -z "$TESTFS" ]]; then
353 TESTFS="$TESTFS $dataset"
358 function get_nfilesystems
360 typeset filesystems=( $TESTFS )
361 echo ${#filesystems[@]}
364 function get_directory
366 typeset filesystems=( $TESTFS )
370 while [[ $idx -lt "${#filesystems[@]}" ]]; do
371 mountpoint=$(get_prop mountpoint "${filesystems[$idx]}")
373 if [[ -n $directory ]]; then
374 directory=$directory:$mountpoint
376 directory=$mountpoint
385 function get_min_arc_size
387 typeset -l min_arc_size
390 min_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_min)
391 elif is_illumos; then
392 min_arc_size=$(dtrace -qn 'BEGIN {
393 printf("%u\n", `arc_stats.arcstat_c_min.value.ui64);
397 min_arc_size=`awk '$1 == "c_min" { print $3 }' \
398 /proc/spl/kstat/zfs/arcstats`
401 [[ $? -eq 0 ]] || log_fail "get_min_arc_size failed"
406 function get_max_arc_size
408 typeset -l max_arc_size
411 max_arc_size=$(sysctl -n kstat.zfs.misc.arcstats.c_max)
412 elif is_illumos; then
413 max_arc_size=$(dtrace -qn 'BEGIN {
414 printf("%u\n", `arc_stats.arcstat_c_max.value.ui64);
418 max_arc_size=`awk '$1 == "c_max" { print $3 }' \
419 /proc/spl/kstat/zfs/arcstats`
422 [[ $? -eq 0 ]] || log_fail "get_max_arc_size failed"
427 function get_max_dbuf_cache_size
429 typeset -l max_dbuf_cache_size
432 max_dbuf_cache_size=$(dtrace -qn 'BEGIN {
433 printf("%u\n", `dbuf_cache_max_bytes);
437 max_dbuf_cache_size=$(get_tunable DBUF_CACHE_MAX_BYTES)
440 [[ $? -eq 0 ]] || log_fail "get_max_dbuf_cache_size failed"
442 echo $max_dbuf_cache_size
445 # Create a file with some information about how this system is configured.
446 function get_system_config
448 typeset config=$PERF_DATA_DIR/$1
452 echo " \"ncpus\": \"$(nproc --all)\"," >>$config
453 echo " \"physmem\": \"$(free -b | \
454 awk '$1 == "Mem:" { print $2 }')\"," >>$config
455 echo " \"c_max\": \"$(get_max_arc_size)\"," >>$config
456 echo " \"hostname\": \"$(uname -n)\"," >>$config
457 echo " \"kernel version\": \"$(uname -sr)\"," >>$config
460 printf(" \"ncpus\": %d,\n", `ncpus);
461 printf(" \"physmem\": %u,\n", `physmem * `_pagesize);
462 printf(" \"c_max\": %u,\n", `arc_stats.arcstat_c_max.value.ui64);
463 printf(" \"kmem_flags\": \"0x%x\",", `kmem_flags);
465 echo " \"hostname\": \"$(uname -n)\"," >>$config
466 echo " \"kernel version\": \"$(uname -v)\"," >>$config
469 lsblk -dino NAME,SIZE | awk 'BEGIN {
470 printf(" \"disks\": {\n"); first = 1}
471 {disk = $1} {size = $2;
472 if (first != 1) {printf(",\n")} else {first = 0}
473 printf(" \"%s\": \"%s\"", disk, size)}
474 END {printf("\n },\n")}' >>$config
476 zfs_tunables="/sys/module/zfs/parameters"
478 printf " \"tunables\": {\n" >>$config
485 zfs_prefetch_disable \
487 zfs_vdev_aggregation_limit \
488 zfs_vdev_async_read_max_active \
489 zfs_vdev_async_write_max_active \
490 zfs_vdev_sync_read_max_active \
491 zfs_vdev_sync_write_max_active \
494 if [ "$tunable" != "zfs_arc_max" ]
496 printf ",\n" >>$config
498 printf " \"$tunable\": \"$(<$zfs_tunables/$tunable)\"" \
501 printf "\n }\n" >>$config
503 iostat -En | awk 'BEGIN {
504 printf(" \"disks\": {\n"); first = 1}
506 /^Size: [^0]/ {size = $2;
507 if (first != 1) {printf(",\n")} else {first = 0}
508 printf(" \"%s\": \"%s\"", disk, size)}
509 END {printf("\n },\n")}' >>$config
511 sed -n 's/^set \(.*\)[ ]=[ ]\(.*\)/\1=\2/p' /etc/system | \
512 awk -F= 'BEGIN {printf(" \"system\": {\n"); first = 1}
513 {if (first != 1) {printf(",\n")} else {first = 0};
514 printf(" \"%s\": %s", $1, $2)}
515 END {printf("\n }\n")}' >>$config
520 function num_jobs_by_cpu
523 typeset ncpu=$($NPROC --all)
525 typeset ncpu=$(psrinfo | $WC -l)
527 typeset num_jobs=$ncpu
529 [[ $ncpu -gt 8 ]] && num_jobs=$(echo "$ncpu * 3 / 4" | bc)
535 # On illumos this looks like: ":sd3:sd4:sd1:sd2:"
537 function pool_to_lun_list
540 typeset ctd ctds devname lun
544 ctds=$(zpool list -v $pool |
545 awk '/c[0-9]*t[0-9a-fA-F]*d[0-9]*/ {print $1}')
548 # Get the device name as it appears in /etc/path_to_inst
549 devname=$(readlink -f /dev/dsk/${ctd}s0 | sed -n \
550 's/\/devices\([^:]*\):.*/\1/p')
551 # Add a string composed of the driver name and instance
552 # number to the list for comparison with dev_statname.
553 lun=$(sed 's/"//g' /etc/path_to_inst | grep \
554 $devname | awk '{print $3$2}')
555 lun_list="$lun_list$lun:"
557 elif is_freebsd; then
558 lun_list+=$(zpool list -HLv $pool | \
559 awk '/a?da[0-9]+|md[0-9]+|mfid[0-9]+|nda[0-9]+|nvd[0-9]+|vtbd[0-9]+/
560 { printf "%s:", $1 }')
562 ctds=$(zpool list -HLv $pool | \
563 awk '/sd[a-z]*|loop[0-9]*|dm-[0-9]*/ {print $1}')
566 lun_list="$lun_list$ctd:"
572 # Create a perf_data directory to hold performance statistics and
573 # configuration information.
574 export PERF_DATA_DIR=$(get_perf_output_dir)
575 [[ -f $PERF_DATA_DIR/config.json ]] || get_system_config config.json