Skip to content

Commit

Permalink
Merge pull request #250 from shefty/master
Browse files Browse the repository at this point in the history
fabtest: Fix unconnected bandwidth tests
  • Loading branch information
shefty committed May 4, 2015
2 parents f395121 + 0c5a799 commit 026888b
Show file tree
Hide file tree
Showing 5 changed files with 70 additions and 52 deletions.
3 changes: 2 additions & 1 deletion complex/fabtest.h
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,8 @@ int ft_send_msg();
int ft_send_dgram();
int ft_send_dgram_done();
int ft_recv_dgram();
int ft_recv_dgram_flood();
int ft_recv_dgram_flood(size_t *recv_cnt);
int ft_send_dgram_flood();
int ft_sendrecv_dgram();

int ft_run_test();
Expand Down
3 changes: 2 additions & 1 deletion complex/ft_config.c
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ static struct ft_set test_sets[] = {
.service = "2224",
.prov_name = "sockets",
.test_type = {
FT_TEST_LATENCY
FT_TEST_LATENCY,
FT_TEST_BANDWIDTH
},
.class_function = {
FT_FUNC_SEND,
Expand Down
2 changes: 2 additions & 0 deletions complex/ft_endpoint.c
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,8 @@ int ft_reset_ep(void)
return ret;
}

memset(ft_tx.buf, 0, ft_tx.msg_size);
memset(ft_rx.buf, 0, ft_rx.msg_size);
ret = ft_post_recv_bufs();
if (ret)
return ret;
Expand Down
59 changes: 25 additions & 34 deletions complex/ft_msg.c
Original file line number Diff line number Diff line change
Expand Up @@ -254,6 +254,21 @@ int ft_send_dgram(void)
return ret;
}

int ft_send_dgram_flood(void)
{
int i, ret = 0;

ft_tx.seqno = 0;
*(uint8_t*) ft_tx.buf = 0;
for (i = 0; i < ft.xfer_iter - 1; i++) {
ret = ft_send_msg();
if (ret)
break;
}

return ret;
}

int ft_recv_dgram(void)
{
struct timespec s, e;
Expand Down Expand Up @@ -290,47 +305,23 @@ int ft_recv_dgram(void)
return -FI_ETIMEDOUT;
}

int ft_recv_dgram_flood(void)
int ft_recv_dgram_flood(size_t *recv_cnt)
{
struct timespec s, e;
int credits, ret;
int64_t poll_time = 0;
int expected = ft.xfer_iter;
int got = 0;
int ret;
size_t cnt = 0;

do {
if (ft_rx.credits > (ft_rx.max_credits >> 1)) {
ret = ft_post_recv_bufs();
if (ret)
return ret;
}

credits = ft_rx.credits;

ret = ft_comp_rx();
ret = ft_post_recv_bufs();
if (ret)
return ret;

if (credits != ft_rx.credits) {
poll_time = 0;
got += ft_rx.credits - credits;
}

if (got >= expected)
return 0;

if (!poll_time)
clock_gettime(CLOCK_MONOTONIC, &s);

clock_gettime(CLOCK_MONOTONIC, &e);
poll_time = get_elapsed(&s, &e, MILLI);
break;

} while (poll_time < 1);
ret = ft_comp_rx();
cnt += ft_rx.credits;

if (expected != got)
fprintf(stderr, "Warn: lost %d dgrams\n", expected - got);
} while (!ret && (*(uint8_t *) ft_rx.buf != (uint8_t) ~0));

return -FI_ETIMEDOUT;
*recv_cnt = cnt;
return ret;
}

int ft_sendrecv_dgram(void)
Expand Down
55 changes: 39 additions & 16 deletions complex/ft_test.c
Original file line number Diff line number Diff line change
Expand Up @@ -299,29 +299,50 @@ static int ft_bw(void)
return ret;
}

while (ft_tx.credits < ft_tx.max_credits) {
ret = ft_comp_tx();
if (ret)
return ret;
}

return 0;
}

static int ft_bw_dgram(void)
/*
* The datagram streaming test sends datagrams with the initial byte
* of the message cleared until we're ready to end the test. The first
* byte is then set to 0xFF. On the receive side, we count the number
* of completions until that message is seen. Only the receiving side
* reports any performance data. The sender does not know how many
* packets were dropped in flight.
*
* Because we re-use the same buffer for all messages, the receiving
* side can notice that the first byte has changed and end the test
* before the completion associated with the last message has been
* written to the CQ. As a result, the number of messages that were
* counted as received may be slightly lower than the number of messages
* that were actually received.
*
* For a significantly large number of transfers, this falls into the
* noise, but it is visible if the number of iterations is small, such
* as when running the quick test. The fix for this would either to use
* CQ data to exchange the end of test marker, or to allocate separate
* buffers for each receive operation.
*
* The message with the end of test marker is retried until until the
* receiver acknowledges it. If the receiver ack message is lost, the
* bandwidth test will hang. However, this is the only message that the
* receiver sends, so there's a reasonably good chance of it being transmitted
* successfully.
*/
static int ft_bw_dgram(size_t *recv_cnt)
{
int ret, i;
int ret;

if (listen_sock < 0) {
for (i = 0; i < ft.xfer_iter; i++) {
ret = ft_send_dgram();
if (ret)
return ret;
}
*recv_cnt = 0;
ret = ft_send_dgram_flood();
if (ret)
return ret;

ft_tx.seqno = ~0;
ret = ft_sendrecv_dgram();
} else {
ret = ft_recv_dgram_flood();
ret = ft_recv_dgram_flood(recv_cnt);
if (ret)
return ret;

Expand All @@ -333,6 +354,7 @@ static int ft_bw_dgram(void)

static int ft_run_bandwidth(void)
{
size_t recv_cnt;
int ret, i;

for (i = 0; i < ft.size_cnt; i += ft.inc_step) {
Expand All @@ -342,19 +364,20 @@ static int ft_run_bandwidth(void)

ft.xfer_iter = test_info.test_flags & FT_FLAG_QUICKTEST ?
5 : size_to_count(ft_tx.msg_size);
recv_cnt = ft.xfer_iter;

ret = ft_sync_test(0);
if (ret)
return ret;

clock_gettime(CLOCK_MONOTONIC, &start);
ret = (test_info.ep_type == FI_EP_DGRAM) ?
ft_bw_dgram() : ft_bw();
ft_bw_dgram(&recv_cnt) : ft_bw();
clock_gettime(CLOCK_MONOTONIC, &end);
if (ret)
return ret;

show_perf("bw", ft_tx.msg_size, ft.xfer_iter, &start, &end, 1);
show_perf("bw", ft_tx.msg_size, recv_cnt, &start, &end, 1);
}

return 0;
Expand Down

0 comments on commit 026888b

Please sign in to comment.