Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Notable upstream pull request merges:
 #15665 9b1677f dmu: Allow buffer fills to fail

Obtained from:	OpenZFS
OpenZFS commit:	dbda451
  • Loading branch information
mmatuska authored and bsdjhb committed Mar 12, 2024
2 parents efce8cb + 188408d commit cb14ece
Show file tree
Hide file tree
Showing 15 changed files with 139 additions and 34 deletions.
2 changes: 1 addition & 1 deletion sys/contrib/openzfs/include/os/freebsd/spl/sys/uio.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off)
}

static inline void
zfs_uio_advance(zfs_uio_t *uio, size_t size)
zfs_uio_advance(zfs_uio_t *uio, ssize_t size)
{
zfs_uio_resid(uio) -= size;
zfs_uio_offset(uio) += size;
Expand Down
2 changes: 1 addition & 1 deletion sys/contrib/openzfs/include/os/linux/spl/sys/uio.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ zfs_uio_setoffset(zfs_uio_t *uio, offset_t off)
}

static inline void
zfs_uio_advance(zfs_uio_t *uio, size_t size)
zfs_uio_advance(zfs_uio_t *uio, ssize_t size)
{
uio->uio_resid -= size;
uio->uio_loffset += size;
Expand Down
4 changes: 2 additions & 2 deletions sys/contrib/openzfs/include/sys/dbuf.h
Original file line number Diff line number Diff line change
Expand Up @@ -380,8 +380,8 @@ dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level,
int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags);
void dmu_buf_will_clone(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx);
void dmu_buf_will_fill(dmu_buf_t *db, dmu_tx_t *tx, boolean_t canfail);
boolean_t dmu_buf_fill_done(dmu_buf_t *db, dmu_tx_t *tx, boolean_t failed);
void dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx);
dbuf_dirty_record_t *dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx);
dbuf_dirty_record_t *dbuf_dirty_lightweight(dnode_t *dn, uint64_t blkid,
Expand Down
2 changes: 1 addition & 1 deletion sys/contrib/openzfs/lib/libspl/include/sys/uio.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ zfs_uio_iov_at_index(zfs_uio_t *uio, uint_t idx, void **base, uint64_t *len)
}

static inline void
zfs_uio_advance(zfs_uio_t *uio, size_t size)
zfs_uio_advance(zfs_uio_t *uio, ssize_t size)
{
uio->uio_resid -= size;
uio->uio_loffset += size;
Expand Down
4 changes: 2 additions & 2 deletions sys/contrib/openzfs/module/os/freebsd/zfs/dmu_os.c
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);

if (tocpy == db->db_size)
dmu_buf_will_fill(db, tx);
dmu_buf_will_fill(db, tx, B_FALSE);
else
dmu_buf_will_dirty(db, tx);

Expand All @@ -123,7 +123,7 @@ dmu_write_pages(objset_t *os, uint64_t object, uint64_t offset, uint64_t size,
}

if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);
dmu_buf_fill_done(db, tx, B_FALSE);

offset += tocpy;
size -= tocpy;
Expand Down
33 changes: 24 additions & 9 deletions sys/contrib/openzfs/module/zfs/dbuf.c
Original file line number Diff line number Diff line change
Expand Up @@ -2751,7 +2751,7 @@ dmu_buf_will_not_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
}

void
dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx, boolean_t canfail)
{
dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake;

Expand All @@ -2769,8 +2769,14 @@ dmu_buf_will_fill(dmu_buf_t *db_fake, dmu_tx_t *tx)
* Block cloning: We will be completely overwriting a block
* cloned in this transaction group, so let's undirty the
* pending clone and mark the block as uncached. This will be
* as if the clone was never done.
* as if the clone was never done. But if the fill can fail
* we should have a way to return back to the cloned data.
*/
if (canfail && dbuf_find_dirty_eq(db, tx->tx_txg) != NULL) {
mutex_exit(&db->db_mtx);
dmu_buf_will_dirty(db_fake, tx);
return;
}
VERIFY(!dbuf_undirty(db, tx));
db->db_state = DB_UNCACHED;
}
Expand Down Expand Up @@ -2831,32 +2837,41 @@ dbuf_override_impl(dmu_buf_impl_t *db, const blkptr_t *bp, dmu_tx_t *tx)
dl->dr_overridden_by.blk_birth = dr->dr_txg;
}

void
dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx)
boolean_t
dmu_buf_fill_done(dmu_buf_t *dbuf, dmu_tx_t *tx, boolean_t failed)
{
(void) tx;
dmu_buf_impl_t *db = (dmu_buf_impl_t *)dbuf;
dbuf_states_t old_state;
mutex_enter(&db->db_mtx);
DBUF_VERIFY(db);

old_state = db->db_state;
db->db_state = DB_CACHED;
if (old_state == DB_FILL) {
if (db->db_state == DB_FILL) {
if (db->db_level == 0 && db->db_freed_in_flight) {
ASSERT(db->db_blkid != DMU_BONUS_BLKID);
/* we were freed while filling */
/* XXX dbuf_undirty? */
memset(db->db.db_data, 0, db->db.db_size);
db->db_freed_in_flight = FALSE;
db->db_state = DB_CACHED;
DTRACE_SET_STATE(db,
"fill done handling freed in flight");
failed = B_FALSE;
} else if (failed) {
VERIFY(!dbuf_undirty(db, tx));
db->db_buf = NULL;
dbuf_clear_data(db);
DTRACE_SET_STATE(db, "fill failed");
} else {
db->db_state = DB_CACHED;
DTRACE_SET_STATE(db, "fill done");
}
cv_broadcast(&db->db_changed);
} else {
db->db_state = DB_CACHED;
failed = B_FALSE;
}
mutex_exit(&db->db_mtx);
return (failed);
}

void
Expand Down Expand Up @@ -3001,7 +3016,7 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx)
DTRACE_SET_STATE(db, "filling assigned arcbuf");
mutex_exit(&db->db_mtx);
(void) dbuf_dirty(db, tx);
dmu_buf_fill_done(&db->db, tx);
dmu_buf_fill_done(&db->db, tx, B_FALSE);
}

void
Expand Down
21 changes: 9 additions & 12 deletions sys/contrib/openzfs/module/zfs/dmu.c
Original file line number Diff line number Diff line change
Expand Up @@ -1134,14 +1134,14 @@ dmu_write_impl(dmu_buf_t **dbp, int numbufs, uint64_t offset, uint64_t size,
ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);

if (tocpy == db->db_size)
dmu_buf_will_fill(db, tx);
dmu_buf_will_fill(db, tx, B_FALSE);
else
dmu_buf_will_dirty(db, tx);

(void) memcpy((char *)db->db_data + bufoff, buf, tocpy);

if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);
dmu_buf_fill_done(db, tx, B_FALSE);

offset += tocpy;
size -= tocpy;
Expand Down Expand Up @@ -1349,27 +1349,24 @@ dmu_write_uio_dnode(dnode_t *dn, zfs_uio_t *uio, uint64_t size, dmu_tx_t *tx)

ASSERT(size > 0);

bufoff = zfs_uio_offset(uio) - db->db_offset;
offset_t off = zfs_uio_offset(uio);
bufoff = off - db->db_offset;
tocpy = MIN(db->db_size - bufoff, size);

ASSERT(i == 0 || i == numbufs-1 || tocpy == db->db_size);

if (tocpy == db->db_size)
dmu_buf_will_fill(db, tx);
dmu_buf_will_fill(db, tx, B_TRUE);
else
dmu_buf_will_dirty(db, tx);

/*
* XXX zfs_uiomove could block forever (eg.nfs-backed
* pages). There needs to be a uiolockdown() function
* to lock the pages in memory, so that zfs_uiomove won't
* block.
*/
err = zfs_uio_fault_move((char *)db->db_data + bufoff,
tocpy, UIO_WRITE, uio);

if (tocpy == db->db_size)
dmu_buf_fill_done(db, tx);
if (tocpy == db->db_size && dmu_buf_fill_done(db, tx, err)) {
/* The fill was reverted. Undo any uio progress. */
zfs_uio_advance(uio, off - zfs_uio_offset(uio));
}

if (err)
break;
Expand Down
2 changes: 1 addition & 1 deletion sys/contrib/openzfs/module/zfs/dmu_recv.c
Original file line number Diff line number Diff line change
Expand Up @@ -2532,7 +2532,7 @@ receive_spill(struct receive_writer_arg *rwa, struct drr_spill *drrs,
* size of the provided arc_buf_t.
*/
if (db_spill->db_size != drrs->drr_length) {
dmu_buf_will_fill(db_spill, tx);
dmu_buf_will_fill(db_spill, tx, B_FALSE);
VERIFY0(dbuf_spill_set_blksz(db_spill,
drrs->drr_length, tx));
}
Expand Down
2 changes: 1 addition & 1 deletion sys/contrib/openzfs/module/zfs/dsl_bookmark.c
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ dsl_bookmark_create_sync_impl_snap(const char *bookmark, const char *snapshot,
dmu_buf_t *db;
VERIFY0(dmu_spill_hold_by_bonus(local_rl->rl_bonus,
DB_RF_MUST_SUCCEED, FTAG, &db));
dmu_buf_will_fill(db, tx);
dmu_buf_will_fill(db, tx, B_FALSE);
VERIFY0(dbuf_spill_set_blksz(db, P2ROUNDUP(bonuslen,
SPA_MINBLOCKSIZE), tx));
local_rl->rl_phys = db->db_data;
Expand Down
3 changes: 2 additions & 1 deletion sys/contrib/openzfs/tests/runfiles/linux.run
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@ tests = ['block_cloning_copyfilerange', 'block_cloning_copyfilerange_partial',
'block_cloning_copyfilerange_cross_dataset',
'block_cloning_cross_enc_dataset',
'block_cloning_copyfilerange_fallback_same_txg',
'block_cloning_replay', 'block_cloning_replay_encrypted']
'block_cloning_replay', 'block_cloning_replay_encrypted',
'block_cloning_lwb_buffer_overflow']
tags = ['functional', 'block_cloning']

[tests/functional/chattr:Linux]
Expand Down
2 changes: 2 additions & 0 deletions sys/contrib/openzfs/tests/test-runner/bin/zts-report.py.in
Original file line number Diff line number Diff line change
Expand Up @@ -305,6 +305,8 @@ elif sys.platform.startswith('linux'):
['SKIP', cfr_reason],
'block_cloning/block_cloning_replay_encrypted':
['SKIP', cfr_reason],
'block_cloning/block_cloning_lwb_buffer_overflow':
['SKIP', cfr_reason],
'block_cloning/block_cloning_copyfilerange_cross_dataset':
['SKIP', cfr_cross_reason],
'block_cloning/block_cloning_copyfilerange_fallback_same_txg':
Expand Down
1 change: 1 addition & 0 deletions sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -454,6 +454,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/block_cloning/block_cloning_cross_enc_dataset.ksh \
functional/block_cloning/block_cloning_replay.ksh \
functional/block_cloning/block_cloning_replay_encrypted.ksh \
functional/block_cloning/block_cloning_lwb_buffer_overflow.ksh \
functional/bootfs/bootfs_001_pos.ksh \
functional/bootfs/bootfs_002_neg.ksh \
functional/bootfs/bootfs_003_pos.ksh \
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or https://opensource.org/licenses/CDDL-1.0.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright (c) 2023 by iXsystems, Inc. All rights reserved.
#

. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/block_cloning/block_cloning.kshlib

#
# DESCRIPTION:
# Test for LWB buffer overflow with multiple VDEVs ZIL when 128KB
# block write is split into two 68KB ones, trying to write maximum
# sizes 128KB TX_CLONE_RANGE record with 1022 block pointers into
# 68KB buffer.
#
# STRATEGY:
# 1. Create a pool with multiple VDEVs ZIL
# 2. Write maximum sizes TX_CLONE_RANGE record with 1022 block
# pointers into 68KB buffer
# 3. Sync TXG
# 4. Clone the file
# 5. Synchronize cached writes
#

verify_runnable "global"

if [[ $(linux_version) -lt $(linux_version "4.5") ]]; then
log_unsupported "copy_file_range not available before Linux 4.5"
fi

VDIR=$TEST_BASE_DIR/disk-bclone
VDEV="$VDIR/a $VDIR/b $VDIR/c"
LDEV="$VDIR/e $VDIR/f"

function cleanup
{
datasetexists $TESTPOOL && destroy_pool $TESTPOOL
rm -rf $VDIR
}

log_onexit cleanup

log_assert "Test for LWB buffer overflow with multiple VDEVs ZIL"

log_must rm -rf $VDIR
log_must mkdir -p $VDIR
log_must truncate -s $MINVDEVSIZE $VDEV $LDEV

log_must zpool create -o feature@block_cloning=enabled $TESTPOOL $VDEV \
log mirror $LDEV
log_must zfs create -o recordsize=32K $TESTPOOL/$TESTFS
# Each ZIL log entry can fit 130816 bytes for a block cloning operation,
# so it can store 1022 block pointers. When LWB optimization is enabled,
# an assert is hit when 128KB block write is split into two 68KB ones
# for 2 SLOG devices
log_must dd if=/dev/urandom of=/$TESTPOOL/$TESTFS/file1 bs=32K count=1022 \
conv=fsync
sync_pool $TESTPOOL
log_must clonefile -c /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2
log_must sync

sync_pool $TESTPOOL
log_must have_same_content /$TESTPOOL/$TESTFS/file1 /$TESTPOOL/$TESTFS/file2
typeset blocks=$(get_same_blocks $TESTPOOL/$TESTFS file1 $TESTPOOL/$TESTFS file2)
log_must [ "$blocks" = "$(seq -s " " 0 1021)" ]

log_pass "LWB buffer overflow is not triggered with multiple VDEVs ZIL"

4 changes: 2 additions & 2 deletions sys/modules/zfs/zfs_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -1113,7 +1113,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */

/* Define the project alias string. */
#define ZFS_META_ALIAS "zfs-2.2.99-268-FreeBSD_g86e115e21"
#define ZFS_META_ALIAS "zfs-2.2.99-270-FreeBSD_gdbda45160"

/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
Expand Down Expand Up @@ -1143,7 +1143,7 @@
#define ZFS_META_NAME "zfs"

/* Define the project release. */
#define ZFS_META_RELEASE "268-FreeBSD_g86e115e21"
#define ZFS_META_RELEASE "270-FreeBDS_gdbda45160"

/* Define the project version. */
#define ZFS_META_VERSION "2.2.99"
Expand Down
2 changes: 1 addition & 1 deletion sys/modules/zfs/zfs_gitrev.h
Original file line number Diff line number Diff line change
@@ -1 +1 @@
#define ZFS_META_GITREV "zfs-2.2.99-268-g86e115e21"
#define ZFS_META_GITREV "zfs-2.2.99-270-gdbda45160"

0 comments on commit cb14ece

Please sign in to comment.