From 3c7650491b9ac2d1c7610c9421aa1d2d2db8c74b Mon Sep 17 00:00:00 2001 From: Mauricio Faria de Oliveira Date: Fri, 8 Dec 2023 21:32:35 -0300 Subject: [PATCH 01/11] zed: fix typo in variable ZED_POWER_OFF_ENCLO*US*RE_SLOT_ON_FAULT Replace ENCLO_US_RE with ENCLO_SU_RE in the name of the variable. Note this changes the user-visible string in zed.rc, thus might break current users with the wrong string, but it's ~2 months since zfs-2.2.0 tag is out, thus should not be widespread yet. Mechanical change: $ grep -rl ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT cmd/zed/zed.d/zed.rc cmd/zed/zed.d/statechange-slot_off.sh $ sed -i 's/ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT/ ZED_POWER_OFF_ENCLOSURE_SLOT_ON_FAULT/g' \ cmd/zed/zed.d/zed.rc \ cmd/zed/zed.d/statechange-slot_off.sh $ grep -rl ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT $ Fixes 11fbcacf37d1a66c7a40bb8920c70ce9a87270ea ("zed: Add zedlet to power off slot when drive is faulted") Reviewed-by: Tony Hutter Reviewed-by: Brian Behlendorf Signed-off-by: Mauricio Faria de Oliveira Closes #15651 --- cmd/zed/zed.d/statechange-slot_off.sh | 6 +++--- cmd/zed/zed.d/zed.rc | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cmd/zed/zed.d/statechange-slot_off.sh b/cmd/zed/zed.d/statechange-slot_off.sh index 150012abe71a..06acce93b8aa 100755 --- a/cmd/zed/zed.d/statechange-slot_off.sh +++ b/cmd/zed/zed.d/statechange-slot_off.sh @@ -5,7 +5,7 @@ # # Bad SCSI disks can often "disappear and reappear" causing all sorts of chaos # as they flip between FAULTED and ONLINE. If -# ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT is set in zed.rc, and the disk gets +# ZED_POWER_OFF_ENCLOSURE_SLOT_ON_FAULT is set in zed.rc, and the disk gets # FAULTED, then power down the slot via sysfs: # # /sys/class/enclosure///power_status @@ -19,7 +19,7 @@ # Exit codes: # 0: slot successfully powered off # 1: enclosure not available -# 2: ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT disabled +# 2: ZED_POWER_OFF_ENCLOSURE_SLOT_ON_FAULT disabled # 3: vdev was not FAULTED # 4: The enclosure sysfs path passed from ZFS does not exist # 5: Enclosure slot didn't actually turn off after we told it to @@ -32,7 +32,7 @@ if [ ! -d /sys/class/enclosure ] ; then exit 1 fi -if [ "${ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT}" != "1" ] ; then +if [ "${ZED_POWER_OFF_ENCLOSURE_SLOT_ON_FAULT}" != "1" ] ; then exit 2 fi diff --git a/cmd/zed/zed.d/zed.rc b/cmd/zed/zed.d/zed.rc index acdfabc8ded2..bc269b155d76 100644 --- a/cmd/zed/zed.d/zed.rc +++ b/cmd/zed/zed.d/zed.rc @@ -146,7 +146,7 @@ ZED_SYSLOG_SUBCLASS_EXCLUDE="history_event" # Power off the drive's slot in the enclosure if it becomes FAULTED. This can # help silence misbehaving drives. This assumes your drive enclosure fully # supports slot power control via sysfs. -#ZED_POWER_OFF_ENCLOUSRE_SLOT_ON_FAULT=1 +#ZED_POWER_OFF_ENCLOSURE_SLOT_ON_FAULT=1 ## # Ntfy topic From 5ff43969e7bf64c21d91afbc121a5e2ea9b307d3 Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Thu, 7 Dec 2023 01:18:43 +0500 Subject: [PATCH 02/11] ZTS: block_cloning: Use numeric sort for get_same_blocks Reviewed-by: Kay Pedersen Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Ameer Hamza Closes #15614 --- .../tests/functional/block_cloning/block_cloning.kshlib | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib b/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib index 526bd54a2bf3..50f3a3d262c0 100644 --- a/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning.kshlib @@ -53,6 +53,6 @@ function get_same_blocks awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.a zdb $KEY -vvvvv $3 -O $4 | \ awk '/ L0 / { print l++ " " $3 " " $7 }' > $zdbout.b - echo $(sort $zdbout.a $zdbout.b | uniq -d | cut -f1 -d' ') + echo $(sort -n $zdbout.a $zdbout.b | uniq -d | cut -f1 -d' ') } From 2ebb9a4811ce14dc624cd085d959d74bf2da8e4e Mon Sep 17 00:00:00 2001 From: Ameer Hamza Date: Fri, 1 Dec 2023 01:14:56 +0500 Subject: [PATCH 03/11] ZTS: Add test cases for block cloning replay Reviewed-by: Kay Pedersen Reviewed-by: Brian Behlendorf Reviewed-by: Alexander Motin Signed-off-by: Ameer Hamza Closes #15614 --- tests/runfiles/linux.run | 3 +- tests/test-runner/bin/zts-report.py.in | 5 +- tests/zfs-tests/tests/Makefile.am | 2 + .../block_cloning/block_cloning_replay.ksh | 131 +++++++++++++++++ .../block_cloning_replay_encrypted.ksh | 133 ++++++++++++++++++ 5 files changed, 272 insertions(+), 2 deletions(-) create mode 100755 tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh create mode 100755 tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh diff --git a/tests/runfiles/linux.run b/tests/runfiles/linux.run index fb78d96fb52d..17ba23352422 100644 --- a/tests/runfiles/linux.run +++ b/tests/runfiles/linux.run @@ -43,7 +43,8 @@ tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial', 'block_cloning_disabled_ficlonerange', 'block_cloning_copyfilerange_cross_dataset', 'block_cloning_cross_enc_dataset', - 'block_cloning_copyfilerange_fallback_same_txg'] + 'block_cloning_copyfilerange_fallback_same_txg', + 'block_cloning_replay', 'block_cloning_replay_encrypted'] tags = ['functional', 'block_cloning'] [tests/functional/chattr:Linux] diff --git a/tests/test-runner/bin/zts-report.py.in b/tests/test-runner/bin/zts-report.py.in index b188a101c257..3b5eeacb6bad 100755 --- a/tests/test-runner/bin/zts-report.py.in +++ b/tests/test-runner/bin/zts-report.py.in @@ -301,6 +301,10 @@ elif sys.platform.startswith('linux'): ['SKIP', cfr_reason], 'block_cloning/block_cloning_copyfilerange_fallback': ['SKIP', cfr_reason], + 'block_cloning/block_cloning_replay': + ['SKIP', cfr_reason], + 'block_cloning/block_cloning_replay_encrypted': + ['SKIP', cfr_reason], 'block_cloning/block_cloning_copyfilerange_cross_dataset': ['SKIP', cfr_cross_reason], 'block_cloning/block_cloning_copyfilerange_fallback_same_txg': @@ -309,7 +313,6 @@ elif sys.platform.startswith('linux'): ['SKIP', cfr_cross_reason], }) - # Not all Github actions runners have scsi_debug module, so we may skip # some tests which use it. if os.environ.get('CI') == 'true': diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index 2d899e58bdbb..e7a4f20d303f 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -452,6 +452,8 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/block_cloning/block_cloning_ficlonerange.ksh \ functional/block_cloning/block_cloning_ficlonerange_partial.ksh \ functional/block_cloning/block_cloning_cross_enc_dataset.ksh \ + functional/block_cloning/block_cloning_replay.ksh \ + functional/block_cloning/block_cloning_replay_encrypted.ksh \ functional/bootfs/bootfs_001_pos.ksh \ functional/bootfs/bootfs_002_neg.ksh \ functional/bootfs/bootfs_003_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh new file mode 100755 index 000000000000..1fdf379ed2d2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay.ksh @@ -0,0 +1,131 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +# +# DESCRIPTION: +# Verify slogs are replayed correctly for cloned files. This +# test is ported from slog_replay tests for block cloning. +# +# STRATEGY: +# 1. Create an empty file system (TESTFS) +# 2. Create regular files and sync +# 3. Freeze TESTFS +# 4. Clone the file +# 5. Unmount filesystem +# +# 6. Remount TESTFS +# 7. Compare clone file with the original file +# + +verify_runnable "global" + +if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" +fi + +export VDIR=$TEST_BASE_DIR/disk-bclone +export VDEV="$VDIR/a $VDIR/b $VDIR/c" +export LDEV="$VDIR/e $VDIR/f" +log_must rm -rf $VDIR +log_must mkdir -p $VDIR +log_must truncate -s $MINVDEVSIZE $VDEV $LDEV + +claim="The slogs are replayed correctly for cloned files." + +log_assert $claim + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL + rm -rf $TESTDIR $VDIR $VDIR2 +} + +log_onexit cleanup + +# +# 1. Create an empty file system (TESTFS) +# +log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV \ + log mirror $LDEV +log_must zfs create $TESTPOOL/$TESTFS + +# +# 2. TX_WRITE: Create two files and sync txg +# +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file1 \ + oflag=sync bs=128k count=4 +log_must zfs set recordsize=16K $TESTPOOL/$TESTFS +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file2 \ + oflag=sync bs=16K count=2048 +sync_pool $TESTPOOL + +# +# 3. Checkpoint for ZIL Replay +# +log_must zpool freeze $TESTPOOL + +# +# 4. TX_CLONE_RANGE: Clone the file +# +log_must clonefile -c /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 +log_must clonefile -c /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 + +# +# 5. Unmount filesystem and export the pool +# +# At this stage TESTFS is frozen, the intent log contains a complete set +# of deltas to replay for clone files. +# +log_must zfs unmount /$TESTPOOL/$TESTFS + +log_note "Verify transactions to replay:" +log_must zdb -iv $TESTPOOL/$TESTFS + +log_must zpool export $TESTPOOL + +# +# 6. Remount TESTFS +# +# Import the pool to unfreeze it and claim log blocks. It has to be +# `zpool import -f` because we can't write a frozen pool's labels! +# +log_must zpool import -f -d $VDIR $TESTPOOL + +# +# 7. Compare clone file with the original file +# +log_must have_same_content /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 +log_must have_same_content /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 + +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 \ + $TESTPOOL/$TESTFS clone1) +log_must [ "$blocks" = "0 1 2 3" ] + +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file2 \ + $TESTPOOL/$TESTFS clone2) +log_must [ "$blocks" = "$(seq -s " " 0 2047)" ] + +log_pass $claim diff --git a/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh new file mode 100755 index 000000000000..f9f687c83e5b --- /dev/null +++ b/tests/zfs-tests/tests/functional/block_cloning/block_cloning_replay_encrypted.ksh @@ -0,0 +1,133 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +# +# DESCRIPTION: +# Verify slogs are replayed correctly for encrypted cloned files. +# This test is ported from slog_replay tests for block cloning. +# +# STRATEGY: +# 1. Create an encrypted file system (TESTFS) +# 2. Create regular files and sync +# 3. Freeze TESTFS +# 4. Clone the file +# 5. Unmount filesystem +# +# 6. Remount encrypted TESTFS +# 7. Compare clone file with the original file +# + +verify_runnable "global" + +if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" +fi + +export VDIR=$TEST_BASE_DIR/disk-bclone +export VDEV="$VDIR/a $VDIR/b $VDIR/c" +export LDEV="$VDIR/e $VDIR/f" +log_must rm -rf $VDIR +log_must mkdir -p $VDIR +log_must truncate -s $MINVDEVSIZE $VDEV $LDEV +export PASSPHRASE="password" + +claim="The slogs are replayed correctly for encrypted cloned files." + +log_assert $claim + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL + rm -rf $TESTDIR $VDIR $VDIR2 +} + +log_onexit cleanup + +# +# 1. Create an encrypted file system (TESTFS) +# +log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV \ + log mirror $LDEV +log_must eval "echo $PASSPHRASE | zfs create -o encryption=on" \ + "-o keyformat=passphrase -o keylocation=prompt $TESTPOOL/$TESTFS" + +# +# 2. TX_WRITE: Create two files and sync txg +# +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file1 \ + oflag=sync bs=128k count=4 +log_must zfs set recordsize=16K $TESTPOOL/$TESTFS +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file2 \ + oflag=sync bs=16K count=2048 +sync_pool $TESTPOOL + +# +# 3. Checkpoint for ZIL Replay +# +log_must zpool freeze $TESTPOOL + +# +# 4. TX_CLONE_RANGE: Clone the file +# +log_must clonefile -c /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 +log_must clonefile -c /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 + +# +# 5. Unmount filesystem and export the pool +# +# At this stage TESTFS is frozen, the intent log contains a complete set +# of deltas to replay for clone files. +# +log_must zfs unmount /$TESTPOOL/$TESTFS + +log_note "Verify transactions to replay:" +log_must zdb -iv $TESTPOOL/$TESTFS + +log_must zpool export $TESTPOOL + +# +# 6. Remount TESTFS +# +# Import the pool to unfreeze it and claim log blocks. It has to be +# `zpool import -f` because we can't write a frozen pool's labels! +# +log_must eval "echo $PASSPHRASE | zpool import -l -f -d $VDIR $TESTPOOL" + +# +# 7. Compare clone file with the original file +# +log_must have_same_content /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/clone1 +log_must have_same_content /$TESTPOOL/$TESTFS/file2 /$TESTPOOL/$TESTFS/clone2 + +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 \ + $TESTPOOL/$TESTFS clone1 $PASSPHRASE) +log_must [ "$blocks" = "0 1 2 3" ] + +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file2 \ + $TESTPOOL/$TESTFS clone2 $PASSPHRASE) +log_must [ "$blocks" = "$(seq -s " " 0 2047)" ] + +log_pass $claim From e53e60c0bda6ee5ae9524361d8210ccaf5d5d45e Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Fri, 8 Dec 2023 19:43:39 -0500 Subject: [PATCH 04/11] DMU: Fix lock leak on dbuf_hold() error dmu_assign_arcbuf_by_dnode() should drop dn_struct_rwlock lock in case dbuf_hold() failed. I don't have reproduction for this, but it looks inconsistent with dmu_buf_hold_noread_by_dnode() and co. Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15644 --- module/zfs/dmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index 6cf7f3c3b12e..f5a5d0fc437f 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -1501,9 +1501,9 @@ dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf, rw_enter(&dn->dn_struct_rwlock, RW_READER); blkid = dbuf_whichblock(dn, 0, offset); db = dbuf_hold(dn, blkid, FTAG); + rw_exit(&dn->dn_struct_rwlock); if (db == NULL) return (SET_ERROR(EIO)); - rw_exit(&dn->dn_struct_rwlock); /* * We can only assign if the offset is aligned and the arc buf is the From 8ad73bf4497e8c8d71ad3aa93459b70c6a2050c0 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Fri, 8 Dec 2023 17:31:31 -0800 Subject: [PATCH 05/11] ZTS: Disable io_uring test on CentOS 9 The io_uring test fails on CentOS 9 with the following fio error. Disable the test for the benefit of the CI until this can be fully investigated. This basic test passes as expected on newer kernels. Reviewed-by: Tony Hutter Signed-off-by: Brian Behlendorf Closes #15636 --- tests/zfs-tests/tests/functional/io/io_uring.ksh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tests/zfs-tests/tests/functional/io/io_uring.ksh b/tests/zfs-tests/tests/functional/io/io_uring.ksh index 47e439d0f4d5..2fa146556358 100755 --- a/tests/zfs-tests/tests/functional/io/io_uring.ksh +++ b/tests/zfs-tests/tests/functional/io/io_uring.ksh @@ -44,6 +44,13 @@ if ! $(grep -q "CONFIG_IO_URING=y" /boot/config-$(uname -r)); then log_unsupported "Requires io_uring support" fi +if [ -e /etc/os-release ] ; then + source /etc/os-release + if [ -n "$REDHAT_SUPPORT_PRODUCT_VERSION" ] && ((floor($REDHAT_SUPPORT_PRODUCT_VERSION) == 9)) ; then + log_unsupported "Disabled on CentOS 9, fails with 'Operation not permitted'" + fi +fi + fio --ioengine=io_uring --parse-only || log_unsupported "fio io_uring support required" function cleanup From 9f6ad4dcb6aea0b798cec6be034c70548aef12e9 Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Mon, 11 Dec 2023 09:40:32 -0800 Subject: [PATCH 06/11] ZTS: Add zpool_import_status.ksh to Makefile.am The zpool_import_status.ksh test case was not being run because it was not included in the Makefile.am. Reviewed-by: George Melikov Signed-off-by: Brian Behlendorf Closes #15655 --- tests/zfs-tests/tests/Makefile.am | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index e7a4f20d303f..a2ffd583faa0 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1115,6 +1115,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/cli_root/zpool_import/zpool_import_missing_002_pos.ksh \ functional/cli_root/zpool_import/zpool_import_missing_003_pos.ksh \ functional/cli_root/zpool_import/zpool_import_rename_001_pos.ksh \ + functional/cli_root/zpool_import/zpool_import_status.ksh \ functional/cli_root/zpool_initialize/cleanup.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_attach_detach_add_remove.ksh \ functional/cli_root/zpool_initialize/zpool_initialize_fault_export_import_online.ksh \ From b1748eaee0dd9331c3ab1c7e9d548203a4d70176 Mon Sep 17 00:00:00 2001 From: Tony Hutter Date: Mon, 11 Dec 2023 09:59:59 -0800 Subject: [PATCH 07/11] ZTS: Add dirty dnode stress test Add a test for the dirty dnode SEEK_HOLE/SEEK_DATA bug described in https://github.com/openzfs/zfs/issues/15526 The bug was fixed in https://github.com/openzfs/zfs/pull/15571 and was backported to 2.2.2 and 2.1.14. This test case is just to make sure it does not come back. seekflood.c originally written by Rob Norris. Reviewed-by: Graham Perrin Reviewed-by: Brian Behlendorf Reviewed-by: Rob Norris Signed-off-by: Tony Hutter Closes #15608 --- tests/runfiles/common.run | 2 +- tests/zfs-tests/Makefile.am | 3 + tests/zfs-tests/tests/Makefile.am | 1 + .../tests/functional/cp_files/.gitignore | 1 + .../tests/functional/cp_files/cp_stress.ksh | 73 +++++++ .../tests/functional/cp_files/seekflood.c | 180 ++++++++++++++++++ 6 files changed, 259 insertions(+), 1 deletion(-) create mode 100644 tests/zfs-tests/tests/functional/cp_files/.gitignore create mode 100755 tests/zfs-tests/tests/functional/cp_files/cp_stress.ksh create mode 100644 tests/zfs-tests/tests/functional/cp_files/seekflood.c diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index dd44b63aaf3d..8c1f0ad0b14e 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -598,7 +598,7 @@ tests = ['compress_001_pos', 'compress_002_pos', 'compress_003_pos', tags = ['functional', 'compression'] [tests/functional/cp_files] -tests = ['cp_files_001_pos'] +tests = ['cp_files_001_pos', 'cp_stress'] tags = ['functional', 'cp_files'] [tests/functional/crtime] diff --git a/tests/zfs-tests/Makefile.am b/tests/zfs-tests/Makefile.am index f8166352489e..3dd1a6452728 100644 --- a/tests/zfs-tests/Makefile.am +++ b/tests/zfs-tests/Makefile.am @@ -13,6 +13,9 @@ scripts_zfs_tests_functional_hkdf_PROGRAMS = %D%/tests/functional/hkdf/hkdf_test %C%_tests_functional_hkdf_hkdf_test_LDADD = \ libzpool.la +scripts_zfs_tests_functional_cp_filesdir = $(datadir)/$(PACKAGE)/zfs-tests/tests/functional/cp_files +scripts_zfs_tests_functional_cp_files_PROGRAMS = %D%/tests/functional/cp_files/seekflood + if BUILD_LINUX scripts_zfs_tests_functional_tmpfiledir = $(datadir)/$(PACKAGE)/zfs-tests/tests/functional/tmpfile scripts_zfs_tests_functional_tmpfile_PROGRAMS = \ diff --git a/tests/zfs-tests/tests/Makefile.am b/tests/zfs-tests/tests/Makefile.am index a2ffd583faa0..3c9f09382424 100644 --- a/tests/zfs-tests/tests/Makefile.am +++ b/tests/zfs-tests/tests/Makefile.am @@ -1369,6 +1369,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/compression/setup.ksh \ functional/cp_files/cleanup.ksh \ functional/cp_files/cp_files_001_pos.ksh \ + functional/cp_files/cp_stress.ksh \ functional/cp_files/setup.ksh \ functional/crtime/cleanup.ksh \ functional/crtime/crtime_001_pos.ksh \ diff --git a/tests/zfs-tests/tests/functional/cp_files/.gitignore b/tests/zfs-tests/tests/functional/cp_files/.gitignore new file mode 100644 index 000000000000..d15225ac8429 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cp_files/.gitignore @@ -0,0 +1 @@ +seekflood diff --git a/tests/zfs-tests/tests/functional/cp_files/cp_stress.ksh b/tests/zfs-tests/tests/functional/cp_files/cp_stress.ksh new file mode 100755 index 000000000000..43bb8ab572d2 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cp_files/cp_stress.ksh @@ -0,0 +1,73 @@ +#! /bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END + +# +# Copyright (c) 2023 by Lawrence Livermore National Security, LLC. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# +# https://github.com/openzfs/zfs/issues/15526 identified a dirty dnode +# SEEK_HOLE/SEEK_DATA bug. https://github.com/openzfs/zfs/pull/15571 +# fixed the bug, and was backported to 2.1.14 and 2.2.2. +# +# This test is to ensure that the bug, as understood, will not recur. +# +# STRATEGY: +# +# 1. Run the 'seekflood' binary, for creation of files with timing +# characteristics that can trigger #15526. +# 2. A single run is not always a trigger, so run repeatedly. + +verify_runnable "global" + +function cleanup +{ + rm -rf /$TESTDIR/cp_stress +} + +log_assert "Run the 'seekflood' binary repeatedly to try to trigger #15526" + +log_onexit cleanup + +log_must mkdir /$TESTPOOL/cp_stress + +MYPWD="$PWD" +cd /$TESTPOOL/cp_stress +CPUS=$(get_num_cpus) + +if is_freebsd ; then + # 'seekflood' takes longer on FreeBSD and can timeout the test + RUNS=3 +else + RUNS=10 +fi + +for i in $(seq 1 $RUNS) ; do + # Each run takes around 12 seconds. + log_must $STF_SUITE/tests/functional/cp_files/seekflood 2000 $CPUS +done +cd "$MYPWD" + +log_pass "No corruption detected" diff --git a/tests/zfs-tests/tests/functional/cp_files/seekflood.c b/tests/zfs-tests/tests/functional/cp_files/seekflood.c new file mode 100644 index 000000000000..02c2c8e6eca5 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cp_files/seekflood.c @@ -0,0 +1,180 @@ +/* + * SPDX-License-Identifier: MIT + * + * Copyright (c) 2023, Rob Norris + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DATASIZE (4096) +char data[DATASIZE]; + +static int +_open_file(int n, int wr) +{ + char buf[256]; + int fd; + + snprintf(buf, sizeof (buf), "testdata_%d_%d", getpid(), n); + + if ((fd = open(buf, wr ? (O_WRONLY | O_CREAT) : O_RDONLY, + wr ? (S_IRUSR | S_IWUSR) : 0)) < 0) { + fprintf(stderr, "Error: open '%s' (%s): %s\n", + buf, wr ? "write" : "read", strerror(errno)); + exit(1); + } + + return (fd); +} + +static void +_write_file(int n, int fd) +{ + /* write a big ball of stuff */ + ssize_t nwr = write(fd, data, DATASIZE); + if (nwr < 0) { + fprintf(stderr, "Error: write '%d_%d': %s\n", + getpid(), n, strerror(errno)); + exit(1); + } else if (nwr < DATASIZE) { + fprintf(stderr, "Error: write '%d_%d': short write\n", getpid(), + n); + exit(1); + } +} + +static int +_seek_file(int n, int fd) +{ + struct stat st; + if (fstat(fd, &st) < 0) { + fprintf(stderr, "Error: fstat '%d_%d': %s\n", getpid(), n, + strerror(errno)); + exit(1); + } + + /* + * A zero-sized file correctly has no data, so seeking the file is + * pointless. + */ + if (st.st_size == 0) + return (0); + + /* size is real, and we only write, so SEEK_DATA must find something */ + if (lseek(fd, 0, SEEK_DATA) < 0) { + if (errno == ENXIO) + return (1); + fprintf(stderr, "Error: lseek '%d_%d': %s\n", + getpid(), n, strerror(errno)); + exit(2); + } + + return (0); +} + +int +main(int argc, char **argv) +{ + int nfiles = 0; + int nthreads = 0; + + if (argc < 3 || (nfiles = atoi(argv[1])) == 0 || + (nthreads = atoi(argv[2])) == 0) { + printf("usage: seekflood \n"); + exit(1); + } + + memset(data, 0x5a, DATASIZE); + + /* fork off some flood threads */ + for (int i = 0; i < nthreads; i++) { + if (!fork()) { + /* thread main */ + + /* create zero file */ + int fd = _open_file(0, 1); + _write_file(0, fd); + close(fd); + + int count = 0; + + int h = 0, i, j, rfd, wfd; + for (i = 0; i < nfiles; i += 2, h++) { + j = i+1; + + /* seek h, write i */ + rfd = _open_file(h, 0); + wfd = _open_file(i, 1); + count += _seek_file(h, rfd); + _write_file(i, wfd); + close(rfd); + close(wfd); + + /* seek i, write j */ + rfd = _open_file(i, 0); + wfd = _open_file(j, 1); + count += _seek_file(i, rfd); + _write_file(j, wfd); + close(rfd); + close(wfd); + } + + /* return count of failed seeks to parent */ + exit(count < 256 ? count : 255); + } + } + + /* wait for threads, take their seek fail counts from exit code */ + int count = 0, crashed = 0; + for (int i = 0; i < nthreads; i++) { + int wstatus; + wait(&wstatus); + if (WIFEXITED(wstatus)) + count += WEXITSTATUS(wstatus); + else + crashed++; + } + + if (crashed) { + fprintf(stderr, "Error: child crashed; test failed\n"); + exit(1); + } + + if (count) { + fprintf(stderr, "Error: %d seek failures; test failed\n", + count); + exit(1); + } + + exit(0); +} From a9b937e066644c767e43aeeb46783e3139378b47 Mon Sep 17 00:00:00 2001 From: Chunwei Chen Date: Mon, 11 Dec 2023 14:42:06 -0800 Subject: [PATCH 08/11] For db_marker inherit the db pointer for AVL comparision. While evicting dbufs of a dnode, a marker node is added to the AVL. The marker node should be inserted in AVL tree ahead of the dbuf its trying to delete. The blkid and level is used to ensure this. However, this could go wrong there's another dbufs with the same blkid and level in DB_EVICTING state but not yet removed from AVL tree. dbuf_compare() could fail to give the right location or could cause confusion and trigger ASSERTs. To ensure that the marker is inserted before the deleting dbuf, use the pointer value of the original dbuf for comparision. Reviewed-by: Brian Behlendorf Co-authored-by: Sanjeev Bagewadi Signed-off-by: Chunwei Chen Closes #12482 Closes #15643 --- include/sys/dbuf.h | 1 + module/zfs/dnode.c | 8 ++++++++ module/zfs/dnode_sync.c | 9 ++++++++- 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index 1800a7e31da0..2ff0bc72b270 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -79,6 +79,7 @@ extern "C" { * dbuf_states_t (see comment on dn_dbufs in dnode.h). */ typedef enum dbuf_states { + DB_MARKER = -2, DB_SEARCH = -1, DB_UNCACHED, DB_FILL, diff --git a/module/zfs/dnode.c b/module/zfs/dnode.c index 7ae74ad1318d..ba28aa06a91f 100644 --- a/module/zfs/dnode.c +++ b/module/zfs/dnode.c @@ -99,6 +99,14 @@ dbuf_compare(const void *x1, const void *x2) if (likely(cmp)) return (cmp); + if (d1->db_state == DB_MARKER) { + ASSERT3S(d2->db_state, !=, DB_MARKER); + return (TREE_PCMP(d1->db_parent, d2)); + } else if (d2->db_state == DB_MARKER) { + ASSERT3S(d1->db_state, !=, DB_MARKER); + return (TREE_PCMP(d1, d2->db_parent)); + } + if (d1->db_state == DB_SEARCH) { ASSERT3S(d2->db_state, !=, DB_SEARCH); return (-1); diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index 8cffbdb9d20b..f67dad002319 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -482,7 +482,14 @@ dnode_evict_dbufs(dnode_t *dn) zfs_refcount_is_zero(&db->db_holds)) { db_marker->db_level = db->db_level; db_marker->db_blkid = db->db_blkid; - db_marker->db_state = DB_SEARCH; + /* + * Insert a MARKER node with the same level and blkid. + * And to resolve any ties in dbuf_compare() use the + * pointer of the dbuf that we are evicting. Pass the + * address in db_parent. + */ + db_marker->db_state = DB_MARKER; + db_marker->db_parent = (void *)((uintptr_t)db - 1); avl_insert_here(&dn->dn_dbufs, db_marker, db, AVL_BEFORE); From f4b97c1e009006bce43de5ccaf9326f572bf8b7a Mon Sep 17 00:00:00 2001 From: Brian Behlendorf Date: Tue, 12 Dec 2023 09:56:19 -0800 Subject: [PATCH 09/11] ZTS: Update raidz_expand_005_pos.ksh Align the raidz_expand_005_pos test with the raidz_expand_004_pos test and only verify no errors were reported. Allow scrub repair IO. Reviewed-by: Don Brady Signed-off-by: Brian Behlendorf Closes #15663 --- tests/zfs-tests/tests/functional/raidz/raidz_expand_005_pos.ksh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/zfs-tests/tests/functional/raidz/raidz_expand_005_pos.ksh b/tests/zfs-tests/tests/functional/raidz/raidz_expand_005_pos.ksh index a31a7d254bfe..56ee3e9be67c 100755 --- a/tests/zfs-tests/tests/functional/raidz/raidz_expand_005_pos.ksh +++ b/tests/zfs-tests/tests/functional/raidz/raidz_expand_005_pos.ksh @@ -113,7 +113,7 @@ function test_replace # log_must zpool scrub -w $pool log_must zpool status -v - log_must check_pool_status $pool "scan" "repaired 0B" + log_must check_pool_status $pool "scan" "with 0 errors" } log_must set_tunable32 EMBEDDED_SLOG_MIN_MS 99999 From 86063d90319ac8a785d81ba7e3f8037a7f5389f3 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 12 Dec 2023 15:53:59 -0500 Subject: [PATCH 10/11] dbuf: Handle arcbuf assignment after block cloning In some cases dbuf_assign_arcbuf() may be called on a block that was recently cloned. If it happened in current TXG we must undo the block cloning first, since the only one dirty record per TXG can't and shouldn't mean both cloning and overwrite same time. Reviewed-by: Kay Pedersen Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15653 --- module/zfs/dbuf.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index c5ccd4cd1e0c..a07fe1733586 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -2945,7 +2945,8 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) while (db->db_state == DB_READ || db->db_state == DB_FILL) cv_wait(&db->db_changed, &db->db_mtx); - ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED); + ASSERT(db->db_state == DB_CACHED || db->db_state == DB_UNCACHED || + db->db_state == DB_NOFILL); if (db->db_state == DB_CACHED && zfs_refcount_count(&db->db_holds) - 1 > db->db_dirtycnt) { @@ -2982,6 +2983,15 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) arc_buf_destroy(db->db_buf, db); } db->db_buf = NULL; + } else if (db->db_state == DB_NOFILL) { + /* + * We will be completely replacing the cloned block. In case + * it was cloned in this transaction group, let's undirty the + * pending clone and mark the block as uncached. This will be + * as if the clone was never done. + */ + VERIFY(!dbuf_undirty(db, tx)); + db->db_state = DB_UNCACHED; } ASSERT(db->db_buf == NULL); dbuf_set_data(db, buf); From 86e115e21e5bdeee73c88c5a1a73a29d9637380a Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 12 Dec 2023 15:59:24 -0500 Subject: [PATCH 11/11] dbuf: Set dr_data when unoverriding after clone Block cloning normally creates dirty record without dr_data. But if the block is read after cloning, it is moved into DB_CACHED state and receives the data buffer. If after that we call dbuf_unoverride() to convert the dirty record into normal write, we should give it the data buffer from dbuf and release one. Reviewed-by: Kay Pedersen Reviewed-by: Brian Behlendorf Signed-off-by: Alexander Motin Sponsored by: iXsystems, Inc. Closes #15654 Closes #15656 --- module/zfs/dbuf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index a07fe1733586..03c97941d6d3 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -1919,7 +1919,6 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) dmu_buf_impl_t *db = dr->dr_dbuf; blkptr_t *bp = &dr->dt.dl.dr_overridden_by; uint64_t txg = dr->dr_txg; - boolean_t release; ASSERT(MUTEX_HELD(&db->db_mtx)); /* @@ -1940,7 +1939,10 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) if (!BP_IS_HOLE(bp) && !dr->dt.dl.dr_nopwrite) zio_free(db->db_objset->os_spa, txg, bp); - release = !dr->dt.dl.dr_brtwrite; + if (dr->dt.dl.dr_brtwrite) { + ASSERT0P(dr->dt.dl.dr_data); + dr->dt.dl.dr_data = db->db_buf; + } dr->dt.dl.dr_override_state = DR_NOT_OVERRIDDEN; dr->dt.dl.dr_nopwrite = B_FALSE; dr->dt.dl.dr_brtwrite = B_FALSE; @@ -1954,7 +1956,7 @@ dbuf_unoverride(dbuf_dirty_record_t *dr) * the buf thawed to save the effort of freezing & * immediately re-thawing it. */ - if (release) + if (dr->dt.dl.dr_data) arc_release(dr->dt.dl.dr_data, db); }