diff --git a/sys/contrib/subrepo-openzfs/.gitrepo b/sys/contrib/subrepo-openzfs/.gitrepo index 306bffde0ec8..c047e03a09d8 100644 --- a/sys/contrib/subrepo-openzfs/.gitrepo +++ b/sys/contrib/subrepo-openzfs/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = https://github.com/CTSRD-CHERI/zfs.git branch = cheri-hybrid - commit = 256817ec444af1bf0846397835e955e8418a3b73 - parent = 0b9521aeecf670e0fda710dc5e8aaf1d98e60f11 + commit = eb74be3f7cad77ec845126937b1f7dd1df7a6ed9 + parent = cb14ece5a54b671b25e4656e23682b3dba902b89 method = merge cmdver = 0.4.3 diff --git a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/uio.h b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/uio.h index a2acdacf9eff..55f5fd23a325 100644 --- a/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/uio.h +++ b/sys/contrib/subrepo-openzfs/include/os/freebsd/spl/sys/uio.h @@ -63,7 +63,7 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off) } static inline void -zfs_uio_advance(zfs_uio_t *uio, size_t size) +zfs_uio_advance(zfs_uio_t *uio, ssize_t size) { zfs_uio_resid(uio) -= size; zfs_uio_offset(uio) += size; diff --git a/sys/contrib/subrepo-openzfs/include/os/linux/spl/sys/uio.h b/sys/contrib/subrepo-openzfs/include/os/linux/spl/sys/uio.h index a4b600004c9f..5e6ea8d3c221 100644 --- a/sys/contrib/subrepo-openzfs/include/os/linux/spl/sys/uio.h +++ b/sys/contrib/subrepo-openzfs/include/os/linux/spl/sys/uio.h @@ -95,7 +95,7 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off) } static inline void -zfs_uio_advance(zfs_uio_t *uio, size_t size) +zfs_uio_advance(zfs_uio_t *uio, ssize_t size) { uio->uio_resid -= size; uio->uio_loffset += size; diff --git a/sys/contrib/subrepo-openzfs/include/sys/dbuf.h b/sys/contrib/subrepo-openzfs/include/sys/dbuf.h index 2ff0bc72b270..3808a04cba80 100644 --- a/sys/contrib/subrepo-openzfs/include/sys/dbuf.h +++ b/sys/contrib/subrepo-openzfs/include/sys/dbuf.h @@ -380,8 +380,8 @@ dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level, int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags); void dmu_buf_will_clone(dmu_buf_t *db, dmu_tx_t *tx); void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx); -void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx); -void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx); +void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail); +boolean_t dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx, boolean_t failed); void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx); dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx); dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid, diff --git a/sys/contrib/subrepo-openzfs/lib/libspl/include/sys/uio.h b/sys/contrib/subrepo-openzfs/lib/libspl/include/sys/uio.h index e9e21819d4f8..665bfc42301b 100644 --- a/sys/contrib/subrepo-openzfs/lib/libspl/include/sys/uio.h +++ b/sys/contrib/subrepo-openzfs/lib/libspl/include/sys/uio.h @@ -90,7 +90,7 @@ zfs_uio_iov_at_index(zfs_uio_t *uio, uint_t idx, void **base, uint64_t *len) } static inline void -zfs_uio_advance(zfs_uio_t *uio, size_t size) +zfs_uio_advance(zfs_uio_t *uio, ssize_t size) { uio->uio_resid -= size; uio->uio_loffset += size; diff --git a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/dmu_os.c b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/dmu_os.c index ee6fb2dc657b..48ea37cbad59 100644 --- a/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/dmu_os.c +++ b/sys/contrib/subrepo-openzfs/module/os/freebsd/zfs/dmu_os.c @@ -107,7 +107,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); if (tocpy == db->db_size) - dmu_buf_will_fill(db, tx); + dmu_buf_will_fill(db, tx, B_FALSE); else dmu_buf_will_dirty(db, tx); @@ -123,7 +123,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size, } if (tocpy == db->db_size) - dmu_buf_fill_done(db, tx); + dmu_buf_fill_done(db, tx, B_FALSE); offset += tocpy; size -= tocpy; diff --git a/sys/contrib/subrepo-openzfs/module/zfs/dbuf.c b/sys/contrib/subrepo-openzfs/module/zfs/dbuf.c index 03c97941d6d3..e9d5abca3324 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/dbuf.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/dbuf.c @@ -2751,7 +2751,7 @@ dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) } void -dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) +dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail) { dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; @@ -2769,8 +2769,14 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx) * Block cloning: We will be completely overwriting a block * cloned in this transaction group, so let's undirty the * pending clone and mark the block as uncached. This will be - * as if the clone was never done. + * as if the clone was never done. But if the fill can fail + * we should have a way to return back to the cloned data. */ + if (canfail && dbuf_find_dirty_eq(db, tx->tx_txg) != NULL) { + mutex_exit(&db->db_mtx); + dmu_buf_will_dirty(db_fake, tx); + return; + } VERIFY(!dbuf_undirty(db, tx)); db->db_state = DB_UNCACHED; } @@ -2831,32 +2837,41 @@ dbuf_override_impl(dmu_buf_impl_t *db, const blkptr_t *bp, dmu_tx_t *tx) dl->dr_overridden_by.blk_birth = dr->dr_txg; } -void -dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx) +boolean_t +dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx, boolean_t failed) { (void) tx; dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf; - dbuf_states_t old_state; mutex_enter(&db->db_mtx); DBUF_VERIFY(db); - old_state = db->db_state; - db->db_state = DB_CACHED; - if (old_state == DB_FILL) { + if (db->db_state == DB_FILL) { if (db->db_level == 0 && db->db_freed_in_flight) { ASSERT(db->db_blkid != DMU_BONUS_BLKID); /* we were freed while filling */ /* XXX dbuf_undirty? */ memset(db->db.db_data, 0, db->db.db_size); db->db_freed_in_flight = FALSE; + db->db_state = DB_CACHED; DTRACE_SET_STATE(db, "fill done handling freed in flight"); + failed = B_FALSE; + } else if (failed) { + VERIFY(!dbuf_undirty(db, tx)); + db->db_buf = NULL; + dbuf_clear_data(db); + DTRACE_SET_STATE(db, "fill failed"); } else { + db->db_state = DB_CACHED; DTRACE_SET_STATE(db, "fill done"); } cv_broadcast(&db->db_changed); + } else { + db->db_state = DB_CACHED; + failed = B_FALSE; } mutex_exit(&db->db_mtx); + return (failed); } void @@ -3001,7 +3016,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) DTRACE_SET_STATE(db, "filling assigned arcbuf"); mutex_exit(&db->db_mtx); (void) dbuf_dirty(db, tx); - dmu_buf_fill_done(&db->db, tx); + dmu_buf_fill_done(&db->db, tx, B_FALSE); } void diff --git a/sys/contrib/subrepo-openzfs/module/zfs/dmu.c b/sys/contrib/subrepo-openzfs/module/zfs/dmu.c index f5a5d0fc437f..d82211e6d4c7 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/dmu.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/dmu.c @@ -1134,14 +1134,14 @@ dmu_write_impl(dmu_buf_t **dbp, int numbufs, uint64_t offset, uint64_t size, ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); if (tocpy == db->db_size) - dmu_buf_will_fill(db, tx); + dmu_buf_will_fill(db, tx, B_FALSE); else dmu_buf_will_dirty(db, tx); (void) memcpy((char *)db->db_data + bufoff, buf, tocpy); if (tocpy == db->db_size) - dmu_buf_fill_done(db, tx); + dmu_buf_fill_done(db, tx, B_FALSE); offset += tocpy; size -= tocpy; @@ -1349,27 +1349,24 @@ dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx) ASSERT(size > 0); - bufoff = zfs_uio_offset(uio) - db->db_offset; + offset_t off = zfs_uio_offset(uio); + bufoff = off - db->db_offset; tocpy = MIN(db->db_size - bufoff, size); ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size); if (tocpy == db->db_size) - dmu_buf_will_fill(db, tx); + dmu_buf_will_fill(db, tx, B_TRUE); else dmu_buf_will_dirty(db, tx); - /* - * XXX zfs_uiomove could block forever (eg.nfs-backed - * pages). There needs to be a uiolockdown() function - * to lock the pages in memory, so that zfs_uiomove won't - * block. - */ err = zfs_uio_fault_move((char *)db->db_data + bufoff, tocpy, UIO_WRITE, uio); - if (tocpy == db->db_size) - dmu_buf_fill_done(db, tx); + if (tocpy == db->db_size && dmu_buf_fill_done(db, tx, err)) { + /* The fill was reverted. Undo any uio progress. */ + zfs_uio_advance(uio, off - zfs_uio_offset(uio)); + } if (err) break; diff --git a/sys/contrib/subrepo-openzfs/module/zfs/dmu_recv.c b/sys/contrib/subrepo-openzfs/module/zfs/dmu_recv.c index 05ca91717c2f..54aa60259ea1 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/dmu_recv.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/dmu_recv.c @@ -2532,7 +2532,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs, * size of the provided arc_buf_t. */ if (db_spill->db_size != drrs->drr_length) { - dmu_buf_will_fill(db_spill, tx); + dmu_buf_will_fill(db_spill, tx, B_FALSE); VERIFY0(dbuf_spill_set_blksz(db_spill, drrs->drr_length, tx)); } diff --git a/sys/contrib/subrepo-openzfs/module/zfs/dsl_bookmark.c b/sys/contrib/subrepo-openzfs/module/zfs/dsl_bookmark.c index 03d9420dbdb9..4faefecbadbb 100644 --- a/sys/contrib/subrepo-openzfs/module/zfs/dsl_bookmark.c +++ b/sys/contrib/subrepo-openzfs/module/zfs/dsl_bookmark.c @@ -490,7 +490,7 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot, dmu_buf_t *db; VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus, DB_RF_MUST_SUCCEED, FTAG, &db)); - dmu_buf_will_fill(db, tx); + dmu_buf_will_fill(db, tx, B_FALSE); VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen, SPA_MINBLOCKSIZE), tx)); local_rl->rl_phys = db->db_data; diff --git a/sys/contrib/subrepo-openzfs/tests/runfiles/linux.run b/sys/contrib/subrepo-openzfs/tests/runfiles/linux.run index 17ba23352422..c7c17f271762 100644 --- a/sys/contrib/subrepo-openzfs/tests/runfiles/linux.run +++ b/sys/contrib/subrepo-openzfs/tests/runfiles/linux.run @@ -44,7 +44,8 @@ tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial', 'block_cloning_copyfilerange_cross_dataset', 'block_cloning_cross_enc_dataset', 'block_cloning_copyfilerange_fallback_same_txg', - 'block_cloning_replay', 'block_cloning_replay_encrypted'] + 'block_cloning_replay', 'block_cloning_replay_encrypted', + 'block_cloning_lwb_buffer_overflow'] tags = ['functional', 'block_cloning'] [tests/functional/chattr:Linux] diff --git a/sys/contrib/subrepo-openzfs/tests/test-runner/bin/zts-report.py.in b/sys/contrib/subrepo-openzfs/tests/test-runner/bin/zts-report.py.in index 3b5eeacb6bad..708b7be91767 100755 --- a/sys/contrib/subrepo-openzfs/tests/test-runner/bin/zts-report.py.in +++ b/sys/contrib/subrepo-openzfs/tests/test-runner/bin/zts-report.py.in @@ -305,6 +305,8 @@ elif sys.platform.startswith('linux'): ['SKIP', cfr_reason], 'block_cloning/block_cloning_replay_encrypted': ['SKIP', cfr_reason], + 'block_cloning/block_cloning_lwb_buffer_overflow': + ['SKIP', cfr_reason], 'block_cloning/block_cloning_copyfilerange_cross_dataset': ['SKIP', cfr_cross_reason], 'block_cloning/block_cloning_copyfilerange_fallback_same_txg': diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/Makefile.am index 06d3ff065e0e..0a95a4f9f952 100644 --- a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/Makefile.am +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/Makefile.am @@ -526,6 +526,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/block_cloning/block_cloning_cross_enc_dataset.ksh \ functional/block_cloning/block_cloning_replay.ksh \ functional/block_cloning/block_cloning_replay_encrypted.ksh \ + functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh \ functional/bootfs/bootfs_001_pos.ksh \ functional/bootfs/bootfs_002_neg.ksh \ functional/bootfs/bootfs_003_pos.ksh \ diff --git a/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh new file mode 100755 index 000000000000..0ae76b7e54a5 --- /dev/null +++ b/sys/contrib/subrepo-openzfs/tests/zfs-tests/tests/functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh @@ -0,0 +1,89 @@ +#!/bin/ksh -p +# +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or https://opensource.org/licenses/CDDL-1.0. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2023 by iXsystems, Inc. All rights reserved. +# + +. $STF_SUITE/include/libtest.shlib +. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib + +# +# DESCRIPTION: +# Test for LWB buffer overflow with multiple VDEVs ZIL when 128KB +# block write is split into two 68KB ones, trying to write maximum +# sizes 128KB TX_CLONE_RANGE record with 1022 block pointers into +# 68KB buffer. +# +# STRATEGY: +# 1. Create a pool with multiple VDEVs ZIL +# 2. Write maximum sizes TX_CLONE_RANGE record with 1022 block +# pointers into 68KB buffer +# 3. Sync TXG +# 4. Clone the file +# 5. Synchronize cached writes +# + +verify_runnable "global" + +if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then + log_unsupported "copy_file_range not available before Linux 4.5" +fi + +VDIR=$TEST_BASE_DIR/disk-bclone +VDEV="$VDIR/a $VDIR/b $VDIR/c" +LDEV="$VDIR/e $VDIR/f" + +function cleanup +{ + datasetexists $TESTPOOL && destroy_pool $TESTPOOL + rm -rf $VDIR +} + +log_onexit cleanup + +log_assert "Test for LWB buffer overflow with multiple VDEVs ZIL" + +log_must rm -rf $VDIR +log_must mkdir -p $VDIR +log_must truncate -s $MINVDEVSIZE $VDEV $LDEV + +log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV \ + log mirror $LDEV +log_must zfs create -o recordsize=32K $TESTPOOL/$TESTFS +# Each ZIL log entry can fit 130816 bytes for a block cloning operation, +# so it can store 1022 block pointers. When LWB optimization is enabled, +# an assert is hit when 128KB block write is split into two 68KB ones +# for 2 SLOG devices +log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file1 bs=32K count=1022 \ + conv=fsync +sync_pool $TESTPOOL +log_must clonefile -c /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2 +log_must sync + +sync_pool $TESTPOOL +log_must have_same_content /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2 +typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 $TESTPOOL/$TESTFS file2) +log_must [ "$blocks" = "$(seq -s " " 0 1021)" ] + +log_pass "LWB buffer overflow is not triggered with multiple VDEVs ZIL" +