Skip to content

Commit

Permalink
ublksrv_tgt: add support for UBLK_F_USER_RECOVERY_FAIL_IO
Browse files Browse the repository at this point in the history
A new recovery flag, UBLK_F_USER_RECOVERY_FAIL_IO, is being added to
ublk_drv at

https://lore.kernel.org/linux-block/[email protected]/

Support the creation of devices with this flag specified (via a new -e
flag for ublk add). Add a test to verify that all recovery flag
combinations have the expected behavior.

Signed-off-by: Uday Shankar <[email protected]>
  • Loading branch information
ps-ushankar authored and ming1 committed Oct 9, 2024
1 parent df96b46 commit 5059fef
Show file tree
Hide file tree
Showing 8 changed files with 194 additions and 8 deletions.
19 changes: 18 additions & 1 deletion include/ublk_cmd.h
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@
_IOR('u', 0x14, struct ublksrv_ctrl_cmd)

/*
* 64bit are enough now, and it should be easy to extend in case of
* 64bits are enough now, and it should be easy to extend in case of
* running out of feature flags
*/
#define UBLK_FEATURES_LEN 8
Expand Down Expand Up @@ -147,8 +147,18 @@
*/
#define UBLK_F_NEED_GET_DATA (1UL << 2)

/*
* - Block devices are recoverable if ublk server exits and restarts
* - Outstanding I/O when ublk server exits is met with errors
* - I/O issued while there is no ublk server queues
*/
#define UBLK_F_USER_RECOVERY (1UL << 3)

/*
* - Block devices are recoverable if ublk server exits and restarts
* - Outstanding I/O when ublk server exits is reissued
* - I/O issued while there is no ublk server queues
*/
#define UBLK_F_USER_RECOVERY_REISSUE (1UL << 4)

/*
Expand Down Expand Up @@ -184,11 +194,18 @@
*/
#define UBLK_F_ZONED (1ULL << 8)

/*
* - Block devices are recoverable if ublk server exits and restarts
* - Outstanding I/O when ublk server exits is met with errors
* - I/O issued while there is no ublk server is met with errors
*/
#define UBLK_F_USER_RECOVERY_FAIL_IO (1ULL << 9)

/* device state */
#define UBLK_S_DEV_DEAD 0
#define UBLK_S_DEV_LIVE 1
#define UBLK_S_DEV_QUIESCED 2
#define UBLK_S_DEV_FAIL_IO 3

/* shipped via sqe->cmd of io_uring command */
struct ublksrv_ctrl_cmd {
Expand Down
2 changes: 2 additions & 0 deletions lib/ublksrv_cmd.c
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,8 @@ static const char *ublksrv_dev_state_desc(struct ublksrv_ctrl_dev *dev)
return "LIVE";
case UBLK_S_DEV_QUIESCED:
return "QUIESCED";
case UBLK_S_DEV_FAIL_IO:
return "FAIL_IO";
default:
return "UNKNOWN";
};
Expand Down
1 change: 0 additions & 1 deletion nbd/tgt_nbd.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -808,7 +808,6 @@ static int nbd_setup_tgt(struct ublksrv_dev *dev, int type, bool recovery,

ublk_assert(jbuf);
ublk_assert(type == UBLKSRV_TGT_TYPE_NBD);
ublk_assert(!recovery || info->state == UBLK_S_DEV_QUIESCED);

ublksrv_json_read_target_str_info(jbuf, NBD_MAX_NAME, "host",
host_name);
Expand Down
1 change: 0 additions & 1 deletion qcow2/tgt_qcow2.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,6 @@ static int qcow2_recovery_tgt(struct ublksrv_dev *dev, int type)
int tgt_depth;

ublk_assert(jbuf);
ublk_assert(info->state == UBLK_S_DEV_QUIESCED);
ublk_assert(type == UBLKSRV_TGT_TYPE_QCOW2);

/* qcow2 doesn't support user copy yet */
Expand Down
163 changes: 163 additions & 0 deletions tests/generic/007
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
#!/bin/bash
# SPDX-License-Identifier: MIT or GPL-2.0-only

. common/fio_common

echo -e "\ttest nosrv (state after ublk server is killed) and recovery behavior"
echo -e "\tfor all valid recovery options"
echo

DD_PID=0

# submit an I/O async and store pid into DD_PID
submit_io()
{
dd if=$1 of=/dev/null iflag=direct count=1 bs=4k 2>/dev/null &
DD_PID=$!
}

# check the status of the I/O issued by DD_PID
# 0 - I/O succeeded
# 1 - I/O error
# 2 - I/O queued
check_io_status()
{
sleep 1
# if process is still alive after 1 second, I/O is likely queued
if ps -p $DD_PID > /dev/null 2>/dev/null; then
return 2
else
if wait $DD_PID; then return 0; else return 1; fi
fi
}

del_dev()
{
sleep 2
RES=`__remove_ublk_dev_return $1`
if [ $RES -ne 0 ]; then
echo -e "\t\tdelete $1 failed"
return 1
fi
wait
sleep 3
}

ublk_run_recovery_test()
{
export T_TYPE_PARAMS="-t null -r $RECOVERY -i $RECOVERY_REISSUE -e $RECOVERY_FAIL_IO"
echo -e "\trunning with params: $T_TYPE_PARAMS"
DEV=`__create_ublk_dev`

echo -e "\t\tcheck behavior before nosrv - expect no error"
submit_io $DEV
check_io_status
RES=$?
if [ $RES -ne 0 ]; then
echo -e "\t\tI/O error while ublk server still up!"
return 1
fi

pid1=`__ublk_get_pid $DEV`
kill -9 $pid1
sleep 2
echo -ne "\t\tcheck behavior during nosrv - "
submit_io $DEV
check_io_status
RES=$?
if [ $RECOVERY_FAIL_IO -ne 0 ]; then
echo "expect I/O error"
if [ $RES -ne 1 ]; then
echo -e "\t\tincorrect nosrv behavior!"
echo -e "\t\texpected io error, got $RES"
return 1
fi
elif [ $RECOVERY -ne 0 ]; then
echo "expect I/O queued"
if [ $RES -ne 2 ]; then
echo -e "\t\tincorrect nosrv behavior!"
echo -e "\t\texpected queued io, got $RES"
return 1
fi
else
echo "expect I/O error" # because device should be gone
if [ $RES -ne 1 ]; then
echo -e "\t\tincorrect nosrv behavior!"
echo -e "\t\texpected io error, got $RES"
return 1
fi
fi

echo -e "\t\ttry to recover the device"
secs=0
while [ $secs -lt 10 ]; do
RES=`__recover_ublk_dev $DEV`
[ $RES -eq 0 ] && break
sleep 1
let secs++
done
if [ $RES -ne 0 ]; then
echo -e "\t\tfailed to recover device!"
if [ $RECOVERY -ne 0 ]; then
return 1
else
echo -e "\t\tforgiving expected recovery failure"
del_dev $DEV
echo
return 0
fi
else
if [ $RECOVERY -eq 0 ]; then
echo -e "\t\trecovery unexpectedly succeeded!"
return 1
fi
fi

# if I/O queued before, make sure it completes now
if [ $RECOVERY_FAIL_IO -eq 0 ] && [ $RECOVERY -ne 0 ]; then
echo -e "\t\tchecking that I/O completed after recovery"
check_io_status
RES=$?
if [ $RES -ne 0 ]; then
echo -e "\t\tpreviously queued I/O did not succeed!"
echo -e "\t\texpected success got $RES"
return 1
fi
fi

echo -e "\t\tcheck behavior after recovery - expect no error"
submit_io $DEV
check_io_status
RES=$?
if [ $RES -ne 0 ]; then
echo -e "\t\tI/O error after recovery!"
return 1
fi

# cleanup
pid2=`__ublk_get_pid $DEV`
kill -9 $pid2
del_dev $DEV

echo
}

RECOVERY=0
RECOVERY_REISSUE=0
RECOVERY_FAIL_IO=0
ublk_run_recovery_test

RECOVERY=1
RECOVERY_REISSUE=0
RECOVERY_FAIL_IO=0
ublk_run_recovery_test

RECOVERY=1
RECOVERY_REISSUE=1
RECOVERY_FAIL_IO=0
ublk_run_recovery_test

RECOVERY=1
RECOVERY_REISSUE=0
RECOVERY_FAIL_IO=1
ublk_run_recovery_test
1 change: 0 additions & 1 deletion tgt_loop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ static int loop_recovery_tgt(struct ublksrv_dev *dev, int type)
const char *jbuf = ublksrv_ctrl_get_recovery_jbuf(cdev);

ublk_assert(type == UBLKSRV_TGT_TYPE_LOOP);
ublk_assert(info->state == UBLK_S_DEV_QUIESCED);

return loop_setup_tgt(dev, type, true, jbuf);
}
Expand Down
1 change: 0 additions & 1 deletion tgt_null.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,6 @@ static int null_recovery_tgt(struct ublksrv_dev *dev, int type)
struct ublk_params p;

ublk_assert(jbuf);
ublk_assert(info->state == UBLK_S_DEV_QUIESCED);
ublk_assert(type == UBLKSRV_TGT_TYPE_NULL);

ret = ublksrv_json_read_params(&p, jbuf);
Expand Down
14 changes: 11 additions & 3 deletions ublksrv_tgt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -685,6 +685,7 @@ static int cmd_dev_add(int argc, char *argv[])
{ "uring_comp", 1, NULL, 'u' },
{ "need_get_data", 1, NULL, 'g' },
{ "user_recovery", 1, NULL, 'r'},
{ "user_recovery_fail_io", 1, NULL, 'e'},
{ "user_recovery_reissue", 1, NULL, 'i'},
{ "debug_mask", 1, NULL, 0},
{ "unprivileged", 0, NULL, 0},
Expand All @@ -698,6 +699,7 @@ static int cmd_dev_add(int argc, char *argv[])
int uring_comp = 0;
int need_get_data = 0;
int user_recovery = 0;
int user_recovery_fail_io = 0;
int user_recovery_reissue = 0;
int unprivileged = 0;
const char *dump_buf;
Expand All @@ -711,7 +713,7 @@ static int cmd_dev_add(int argc, char *argv[])

mkpath(data.run_dir);

while ((opt = getopt_long(argc, argv, "-:t:n:d:q:u:g:r:i:z",
while ((opt = getopt_long(argc, argv, "-:t:n:d:q:u:g:r:e:i:z",
longopts, &option_index)) != -1) {
switch (opt) {
case 'n':
Expand All @@ -738,6 +740,9 @@ static int cmd_dev_add(int argc, char *argv[])
case 'r':
user_recovery = strtol(optarg, NULL, 10);
break;
case 'e':
user_recovery_fail_io = strtol(optarg, NULL, 10);
break;
case 'i':
user_recovery_reissue = strtol(optarg, NULL, 10);
break;
Expand Down Expand Up @@ -765,6 +770,8 @@ static int cmd_dev_add(int argc, char *argv[])
data.flags |= UBLK_F_NEED_GET_DATA;
if (user_recovery)
data.flags |= UBLK_F_USER_RECOVERY;
if (user_recovery_fail_io)
data.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO;
if (user_recovery_reissue)
data.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE;
if (unprivileged)
Expand Down Expand Up @@ -871,8 +878,8 @@ static void cmd_dev_add_usage(const char *cmd)
printf("%s add -t %s\n", cmd, data.names);
printf("\t-n DEV_ID -q NR_HW_QUEUES -d QUEUE_DEPTH\n");
printf("\t-u URING_COMP -g NEED_GET_DATA -r USER_RECOVERY\n");
printf("\t-i USER_RECOVERY_REISSUE --debug_mask=0x{DBG_MASK}\n");
printf("\t--unprivileged\n\n");
printf("\t-i USER_RECOVERY_REISSUE -e USER_RECOVERY_FAIL_IO\n");
printf("\t--debug_mask=0x{DBG_MASK} --unprivileged\n\n");
printf("\ttarget specific command line:\n");
ublksrv_for_each_tgt_type(show_tgt_add_usage, NULL);
}
Expand Down Expand Up @@ -1047,6 +1054,7 @@ static int cmd_dev_get_features(int argc, char *argv[])
[const_ilog2(UBLK_F_CMD_IOCTL_ENCODE)] = "CMD_IOCTL_ENCODE",
[const_ilog2(UBLK_F_USER_COPY)] = "USER_COPY",
[const_ilog2(UBLK_F_ZONED)] = "ZONED",
[const_ilog2(UBLK_F_USER_RECOVERY_FAIL_IO)] = "RECOVERY_FAIL_IO",
};

ret = ublksrv_ctrl_get_features(dev, &features);
Expand Down

0 comments on commit 5059fef

Please sign in to comment.